1313import re
1414import toml
1515from pydantic import BaseModel
16- #from hermes.model import SoftwareMetadata
16+ # from hermes.model import SoftwareMetadata
1717from hermes .commands .harvest .base import HermesHarvestCommand , HermesHarvestPlugin
1818
19+
1920class TomlHarvestSettings (BaseModel ):
2021 """
2122 Settings class for this plugin
@@ -51,17 +52,17 @@ def __call__(self, command: HermesHarvestCommand):
5152 invoked when hermes harvest is run and this module is registered as a harvester
5253 """
5354
54- #set the working directory temporary to the correct location
55+ # set the working directory temporary to the correct location
5556 old_dir = getcwd ()
5657 chdir (command .args .path )
5758
58- #harvesting the data from the .toml file specified in the Settings class
59- data = {}# SoftwareMetadata()
59+ # harvesting the data from the .toml file specified in the Settings class
60+ data = {} # SoftwareMetadata()
6061 self .read_from_toml (command .settings .toml .filename , data )
6162
6263 chdir (old_dir )
6364
64- #returning the harvested data and some metadata
65+ # returning the harvested data and some metadata
6566 return data , {"filename" : command .settings .toml .filename }
6667
6768 @classmethod
@@ -87,7 +88,7 @@ def read_from_toml(cls, file, data):
8788 Nothing
8889 """
8990
90- #load the toml file as a dictionary
91+ # load the toml file as a dictionary
9192 try :
9293 if not isinstance (toml_data := toml .load (file ), dict ):
9394 return
@@ -139,8 +140,7 @@ def handle_project_table(cls, table: dict, data):
139140 for key , dest_key in cls .easy_mappings .get ("project" ).items ():
140141 if (value := table .get (key , None )) is None :
141142 continue
142- if (isinstance (value , str ) or
143- isinstance (value , list ) and all (isinstance (val , str ) for val in value )):
143+ if (isinstance (value , str ) or isinstance (value , list ) and all (isinstance (val , str ) for val in value )):
144144 data [dest_key ] = value
145145
146146 # check authors
@@ -155,6 +155,10 @@ def handle_project_table(cls, table: dict, data):
155155 if not (urls := table .get ("urls" )) is None :
156156 cls .handle_urls (urls , data )
157157
158+ # check classifiers
159+ if not (classifiers := table .get ("classifiers" )) is None :
160+ cls .handle_pypi_classifieres (classifiers , data )
161+
158162 @classmethod
159163 def handle_poetry_table (cls , table : dict , data ):
160164 """
@@ -179,11 +183,10 @@ def handle_poetry_table(cls, table: dict, data):
179183 """
180184
181185 # handle all easy mappings
182- for key , dest_key in cls .easy_mappings .get ("poetry" ):
186+ for key , dest_key in cls .easy_mappings .get ("poetry" ). items () :
183187 if (value := table .get (key , None )) is None :
184188 continue
185- if (isinstance (value , str ) or
186- isinstance (value , list ) and all (isinstance (val , str ) for val in value )):
189+ if (isinstance (value , str ) or isinstance (value , list ) and all (isinstance (val , str ) for val in value )):
187190 data [dest_key ] = value
188191
189192 # check authors
@@ -198,6 +201,9 @@ def handle_poetry_table(cls, table: dict, data):
198201 if not (urls := table .get ("urls" )) is None :
199202 cls .handle_urls (urls , data )
200203
204+ # check classifiers
205+ if not (classifiers := table .get ("classifiers" )) is None :
206+ cls .handle_pypi_classifieres (classifiers , data )
201207
202208 @classmethod
203209 def handle_flit_table (cls , table : dict , data ):
@@ -223,11 +229,17 @@ def handle_flit_table(cls, table: dict, data):
223229 """
224230
225231 # handle all easy mappings
226- for key , dest_key in cls .easy_mappings .get ("flit" ):
232+ for key , dest_key in cls .easy_mappings .get ("flit" ). items () :
227233 if (value := table .get (key , None )) is None :
228234 continue
229- if (isinstance (value , str ) or
230- isinstance (value , list ) and all (isinstance (val , str ) for val in value )):
235+ if isinstance (value , str ) and value != "" :
236+ data [dest_key ] = value
237+ elif isinstance (value , list ) and len (value ) != 0 :
238+ value = list (set ([val for val in value if isinstance (val , str ) and val != "" ]))
239+ if len (value ) == 0 :
240+ continue
241+ if len (value ) == 1 :
242+ value = value [0 ]
231243 data [dest_key ] = value
232244
233245 # check author
@@ -239,6 +251,10 @@ def handle_flit_table(cls, table: dict, data):
239251 "email" : table .get ("maintainer-email" , "" )}
240252 cls .handle_person (possible_maintainer , "schema:maintainer" , data )
241253
254+ # check classifiers
255+ if not (classifiers := table .get ("classifiers" )) is None :
256+ cls .handle_pypi_classifieres (classifiers , data )
257+
242258 @classmethod
243259 def handle_person (cls , person_data , key : str , data ):
244260 """
@@ -262,6 +278,9 @@ def handle_person(cls, person_data, key: str, data):
262278 ------
263279 Nothing
264280 """
281+ if not isinstance (key , str ) or len (key ) == 0 :
282+ return
283+
265284 if isinstance (person_data , list ):
266285 # try to extract the name and email from all persons in the list
267286 # and add the resulting list as a list or a single item to the SoftwareMetadata object
@@ -307,7 +326,10 @@ def extract_personal_data(cls, person) -> dict[str, str]:
307326 return {}
308327 # retrieve the name and email from the string or dict
309328 if isinstance (person , str ):
310- [(name , email )] = getaddresses ([person ])
329+ if person .find ("@" ) != - 1 :
330+ [(name , email )] = getaddresses ([person ])
331+ else :
332+ name , email = (person , "" )
311333 else :
312334 name , email = person .get ("name" , "" ), person .get ("email" , "" )
313335 if not isinstance (name , str ):
@@ -318,14 +340,14 @@ def extract_personal_data(cls, person) -> dict[str, str]:
318340 # create an object with name, email and @type if name or email is not empty
319341 person = {}
320342 if name != "" :
321- person ["name" ] = name
343+ person ["schema: name" ] = name
322344 # try to validate the email address
323- if re .fullmatch ("([a-z]|[A-Z]|[0-9])+(.([a-z]|[A-Z]|[0-9])+)*@([a-z]|[A-Z]|[0-9])+." \
324- " ([a-z]|[A-Z]|[0-9])+(.([a-z]|[A-Z]|[0-9])+)*" , email ):
325- person ["email" ] = email
345+ if re .fullmatch (r "([a-z]|[A-Z]|[0-9])+(\ .([a-z]|[A-Z]|[0-9])+)*@([a-z]|[A-Z]|[0-9])+"
346+ r"\. ([a-z]|[A-Z]|[0-9])+(\ .([a-z]|[A-Z]|[0-9])+)*" , email ):
347+ person ["schema: email" ] = email
326348 if not person :
327349 return {}
328- person ["@type" ] = "https:// schema.org/ Person"
350+ person ["@type" ] = "schema: Person"
329351 return person
330352
331353 @classmethod
@@ -368,15 +390,13 @@ def handle_pypi_classifieres(cls, classifiers: str | list[str], data):
368390 classifier = classifier .split (" :: " )
369391 if len (classifier ) < 2 :
370392 continue
371- if (classifier [0 ] == "Operating System" and
372- not (len (classifier ) == 2 and classifier [1 ] == "Microsoft" )):
393+ if (classifier [0 ] == "Operating System" and not (len (classifier ) == 2 and classifier [1 ] == "Microsoft" )):
373394 temp = {"@type" : "schema:SoftwareApplication" , "schema:name" : classifier [- 1 ]}
374395 sorted_classifiers ["schema:targetProduct" ].append (temp )
375396 elif classifier [0 ] == "Intended Audience" :
376397 temp = {"@type" : "schema:Audience" , "schema:name" : classifier [- 1 ]}
377398 sorted_classifiers ["schema:audience" ].append (temp )
378- elif (classifier [0 ] == "License" and
379- not (classifier [1 ] == "OSI Approved" and len (classifier ) == 2 )):
399+ elif (classifier [0 ] == "License" and not (classifier [1 ] == "OSI Approved" and len (classifier ) == 2 )):
380400 temp = {"@type" : "schema:CreativeWork" , "schema:name" : classifier [- 1 ]}
381401 sorted_classifiers ["schema:license" ].append (temp )
382402 elif classifier [0 ] == "Natural Language" :
0 commit comments