1- from typing import Any , List , Optional , Dict , Union
1+ from typing import Any , List , Optional , Dict , Tuple , Union
22from sqlalchemy .sql .expression import cast
33from sqlalchemy .orm .attributes import flag_modified
44from sqlalchemy .dialects .postgresql import UUID
1515 IntegrationSharepoint ,
1616)
1717from submodules .model .util import prevent_sql_injection
18+ from submodules .model .etl_utils import get_hashed_string
1819
1920
2021FINISHED_STATES = [
@@ -131,13 +132,10 @@ def get_or_create(
131132 priority : Optional [int ] = - 1 ,
132133 id : Optional [str ] = None ,
133134 with_commit : bool = True ,
134- ):
135+ ) -> Tuple [ EtlTask , bool ] :
135136 if id :
136137 return get_by_id (id )
137138
138- file_reference_id = meta_data .get ("file_reference_id" ) if meta_data else None
139- integration_id = meta_data .get ("integration_id" ) if meta_data else None
140- markdown_file_id = meta_data .get ("markdown_file_id" ) if meta_data else None
141139 query : EtlTask = session .query (EtlTask ).filter (
142140 EtlTask .organization_id == org_id ,
143141 EtlTask .original_file_name == original_file_name ,
@@ -146,6 +144,10 @@ def get_or_create(
146144
147145 if file_path :
148146 query = query .filter (EtlTask .file_path == file_path )
147+
148+ file_reference_id = meta_data .get ("file_reference_id" ) if meta_data else None
149+ integration_id = meta_data .get ("integration_id" ) if meta_data else None
150+ markdown_file_id = meta_data .get ("markdown_file_id" ) if meta_data else None
149151 if file_reference_id :
150152 query = query .filter (
151153 file_reference_id
@@ -161,25 +163,24 @@ def get_or_create(
161163 integration_id == cast (EtlTask .meta_data .op ("->>" )("integration_id" ), UUID )
162164 )
163165
164- # TODO: enhance
165- if with_commit is False :
166- return query .first ()
167-
168166 if etl_task := query .first ():
169- return etl_task
167+ return etl_task , True
170168
171- return create (
172- org_id = org_id ,
173- user_id = user_id ,
174- original_file_name = original_file_name ,
175- file_size_bytes = file_size_bytes ,
176- tokenizer = tokenizer ,
177- full_config = full_config ,
178- meta_data = meta_data ,
179- priority = priority ,
180- file_path = file_path ,
181- id = id ,
182- with_commit = with_commit ,
169+ return (
170+ create (
171+ org_id = org_id ,
172+ user_id = user_id ,
173+ original_file_name = original_file_name ,
174+ file_size_bytes = file_size_bytes ,
175+ tokenizer = tokenizer ,
176+ full_config = full_config ,
177+ meta_data = meta_data ,
178+ priority = priority ,
179+ file_path = file_path ,
180+ id = id ,
181+ with_commit = with_commit ,
182+ ),
183+ False ,
183184 )
184185
185186
@@ -205,6 +206,7 @@ def create(
205206 file_size_bytes = file_size_bytes ,
206207 tokenizer = tokenizer ,
207208 full_config = full_config ,
209+ full_config_hash = get_hashed_string (full_config ),
208210 meta_data = meta_data ,
209211 priority = priority ,
210212 )
0 commit comments