what exactly is returned when `return_as="generator"` is in `joblib.Parallel`?

52 Views Asked by At

What exactly is returned when we set return_as="generator" in joblib.Parallel?

Below, you can see that passing the generator object returned by joblib.Parallel into dict() takes less time(for most of the cases) than passing the generator object(from the sequential implementation) into dict(), so it's not a usual generator, I think.

import time
import random
import networkx as nx
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from joblib import Parallel, delayed
from networkx.algorithms.shortest_paths.weighted import single_source_bellman_ford_path


def all_pairs_bellman_ford_path(G, weight="weight"):
    path = single_source_bellman_ford_path
    for n in G:
        yield (n, path(G, n, weight=weight))

def all_pairs_bellman_ford_path_parallel(G, weight="weight"):
    def _calculate_shortest_paths_subset(source):
        return (source, single_source_bellman_ford_path(G, source, weight=weight))

    nodes = G.nodes

    paths = Parallel(n_jobs=-1, return_as="generator")(
        delayed(_calculate_shortest_paths_subset)(source) for source in nodes
    )
    return paths


heatmapDF = pd.DataFrame()
number_of_nodes_list = [10, 50, 100, 300, 500]
pList = [1, 0.8, 0.6, 0.4, 0.2]
for p in pList:
    for num in number_of_nodes_list:
        G = nx.fast_gnp_random_graph(num, p, seed=42, directed=False)

        random.seed(42)
        for u, v in G.edges():
            G[u][v]["weight"] = random.random()

        c = all_pairs_bellman_ford_path_parallel(G)
        t1 = time.time()
        dict(c)
        t2 = time.time()
        parallelTime = t2 - t1
        c = all_pairs_bellman_ford_path(G)
        t1 = time.time()
        dict(c)
        t2 = time.time()
        stdTime = t2 - t1
        timesFaster = stdTime / parallelTime
        heatmapDF.at[num, p] = timesFaster
        print("Finished " + str(num) + " " + str(p))

plt.figure(figsize=(20, 4))
hm = sns.heatmap(data=heatmapDF.T, annot=True, cmap="Greens", cbar=True)
hm.set_yticklabels(pList)
hm.set_xticklabels(number_of_nodes_list)
plt.xticks(rotation=45)
plt.yticks(rotation=20)
plt.title("Times Speedups of all_pairs_bellman_ford_path")
plt.xlabel("Number of Vertices")
plt.ylabel("Edge Probability")
plt.tight_layout()
plt.savefig("timing/heatmap_all_pairs_bellman_ford_path_timing.png")

output:

speedup heatmap

0

There are 0 best solutions below