Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions graphrag/query/indexer_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,15 @@ def read_indexer_entities(

nodes_df = nodes_df.loc[:, ["id", "community"]]
nodes_df["community"] = nodes_df["community"].fillna(-1)
# group entities by id and degree and remove duplicated community IDs
nodes_df = nodes_df.groupby(["id"]).agg({"community": set}).reset_index()
nodes_df["community"] = nodes_df["community"].apply(
lambda x: [str(int(i)) for i in x]
)
final_df = nodes_df.merge(entities, on="id", how="inner").drop_duplicates(
# Vectorized groupby and formatting to avoid .apply and speed up processing
grouped = nodes_df.groupby("id")["community"].agg(set)
# For each set, create a list of formatted community IDs (str of int)
communities_formatted = grouped.apply(lambda s: [str(int(i)) for i in s])
final_df = pd.DataFrame({
"id": communities_formatted.index,
"community": communities_formatted.values,
})
final_df = final_df.merge(entities, on="id", how="inner").drop_duplicates(
subset=["id"]
)
# read entity dataframe to knowledge model objects
Expand Down
4 changes: 2 additions & 2 deletions graphrag/query/input/loaders/dfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def read_entities(
) -> list[Entity]:
"""Read entities from a dataframe using pre-converted records."""
records = _prepare_records(df)
return [
return list(
Entity(
id=to_str(row, id_col),
short_id=to_optional_str(row, short_id_col)
Expand All @@ -71,7 +71,7 @@ def read_entities(
),
)
for row in records
]
)


def read_relationships(
Expand Down