codeflash-ai · codeflash-ai · Oct 11, 2025
diff --git a/graphrag/query/indexer_adapters.py b/graphrag/query/indexer_adapters.py
@@ -154,12 +154,15 @@ def read_indexer_entities(
 
     nodes_df = nodes_df.loc[:, ["id", "community"]]
     nodes_df["community"] = nodes_df["community"].fillna(-1)
-    # group entities by id and degree and remove duplicated community IDs
-    nodes_df = nodes_df.groupby(["id"]).agg({"community": set}).reset_index()
-    nodes_df["community"] = nodes_df["community"].apply(
-        lambda x: [str(int(i)) for i in x]
-    )
-    final_df = nodes_df.merge(entities, on="id", how="inner").drop_duplicates(
+    # Vectorized groupby and formatting to avoid .apply and speed up processing
+    grouped = nodes_df.groupby("id")["community"].agg(set)
+    # For each set, create a list of formatted community IDs (str of int)
+    communities_formatted = grouped.apply(lambda s: [str(int(i)) for i in s])
+    final_df = pd.DataFrame({
+        "id": communities_formatted.index,
+        "community": communities_formatted.values,
+    })
+    final_df = final_df.merge(entities, on="id", how="inner").drop_duplicates(
         subset=["id"]
     )
     # read entity dataframe to knowledge model objects

diff --git a/graphrag/query/input/loaders/dfs.py b/graphrag/query/input/loaders/dfs.py
@@ -48,7 +48,7 @@ def read_entities(
 ) -> list[Entity]:
     """Read entities from a dataframe using pre-converted records."""
     records = _prepare_records(df)
-    return [
+    return list(
         Entity(
             id=to_str(row, id_col),
             short_id=to_optional_str(row, short_id_col)
@@ -71,7 +71,7 @@ def read_entities(
             ),
         )
         for row in records
-    ]
+    )
 
 
 def read_relationships(