Skip to content

Commit 17ac0f8

Browse files
committed
perf: full_config_hash
1 parent 33959df commit 17ac0f8

File tree

2 files changed

+26
-22
lines changed

2 files changed

+26
-22
lines changed

global_objects/etl_task.py

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, List, Optional, Dict, Union
1+
from typing import Any, List, Optional, Dict, Tuple, Union
22
from sqlalchemy.sql.expression import cast
33
from sqlalchemy.orm.attributes import flag_modified
44
from sqlalchemy.dialects.postgresql import UUID
@@ -15,6 +15,7 @@
1515
IntegrationSharepoint,
1616
)
1717
from submodules.model.util import prevent_sql_injection
18+
from submodules.model.etl_utils import get_hashed_string
1819

1920

2021
FINISHED_STATES = [
@@ -131,13 +132,10 @@ def get_or_create(
131132
priority: Optional[int] = -1,
132133
id: Optional[str] = None,
133134
with_commit: bool = True,
134-
):
135+
) -> Tuple[EtlTask, bool]:
135136
if id:
136137
return get_by_id(id)
137138

138-
file_reference_id = meta_data.get("file_reference_id") if meta_data else None
139-
integration_id = meta_data.get("integration_id") if meta_data else None
140-
markdown_file_id = meta_data.get("markdown_file_id") if meta_data else None
141139
query: EtlTask = session.query(EtlTask).filter(
142140
EtlTask.organization_id == org_id,
143141
EtlTask.original_file_name == original_file_name,
@@ -146,6 +144,10 @@ def get_or_create(
146144

147145
if file_path:
148146
query = query.filter(EtlTask.file_path == file_path)
147+
148+
file_reference_id = meta_data.get("file_reference_id") if meta_data else None
149+
integration_id = meta_data.get("integration_id") if meta_data else None
150+
markdown_file_id = meta_data.get("markdown_file_id") if meta_data else None
149151
if file_reference_id:
150152
query = query.filter(
151153
file_reference_id
@@ -161,25 +163,24 @@ def get_or_create(
161163
integration_id == cast(EtlTask.meta_data.op("->>")("integration_id"), UUID)
162164
)
163165

164-
# TODO: enhance
165-
if with_commit is False:
166-
return query.first()
167-
168166
if etl_task := query.first():
169-
return etl_task
167+
return etl_task, True
170168

171-
return create(
172-
org_id=org_id,
173-
user_id=user_id,
174-
original_file_name=original_file_name,
175-
file_size_bytes=file_size_bytes,
176-
tokenizer=tokenizer,
177-
full_config=full_config,
178-
meta_data=meta_data,
179-
priority=priority,
180-
file_path=file_path,
181-
id=id,
182-
with_commit=with_commit,
169+
return (
170+
create(
171+
org_id=org_id,
172+
user_id=user_id,
173+
original_file_name=original_file_name,
174+
file_size_bytes=file_size_bytes,
175+
tokenizer=tokenizer,
176+
full_config=full_config,
177+
meta_data=meta_data,
178+
priority=priority,
179+
file_path=file_path,
180+
id=id,
181+
with_commit=with_commit,
182+
),
183+
False,
183184
)
184185

185186

@@ -205,6 +206,7 @@ def create(
205206
file_size_bytes=file_size_bytes,
206207
tokenizer=tokenizer,
207208
full_config=full_config,
209+
full_config_hash=get_hashed_string(full_config),
208210
meta_data=meta_data,
209211
priority=priority,
210212
)

models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2614,6 +2614,8 @@ class EtlTask(Base):
26142614
error_message = Column(String)
26152615
meta_data = Column(JSON)
26162616

2617+
full_config_hash = Column(String, index=True)
2618+
26172619

26182620
class ConversationShare(Base):
26192621
__tablename__ = Tablenames.CONVERSATION_SHARE.value

0 commit comments

Comments
 (0)