@@ -2142,6 +2142,113 @@ def CheckCompressionSubType(infile, formatspecs=__file_format_multi_dict__, file
21422142 fp .close ()
21432143 return filetype
21442144
2145+ # Precompiled regexes (faster than compiling each call)
2146+ _RE_ZERO_SPACE_UNIT = re .compile (r"([0]+) ([A-Za-z]+)" )
2147+ _RE_DOT_SPACE_UNIT = re .compile (r"\. ([A-Za-z]+)" )
2148+
2149+ # Unit tables
2150+ _IEC_UNITS = (" B" , " KiB" , " MiB" , " GiB" , " TiB" , " PiB" , " EiB" , " ZiB" , " YiB" )
2151+ _SI_UNITS = (" B" , " kB" , " MB" , " GB" , " TB" , " PB" , " EB" , " ZB" , " YB" )
2152+
2153+
2154+ def _format_readable (value , suffix , precision ):
2155+ # Keep behavior close to original: format with width 3 and precision, then clean up
2156+ s = ("%3." + str (precision ) + "f%s" ) % (value , suffix )
2157+ s = _RE_ZERO_SPACE_UNIT .sub (r" \2" , s )
2158+ s = _RE_DOT_SPACE_UNIT .sub (r" \1" , s )
2159+ return s
2160+
2161+
2162+ def get_readable_size (num_bytes , precision = 1 , unit = "IEC" ):
2163+ unit = (unit or "IEC" ).upper ()
2164+ if unit == "SI" :
2165+ unitsize = 1000.0
2166+ units = _SI_UNITS
2167+ else :
2168+ unitsize = 1024.0
2169+ units = _IEC_UNITS
2170+
2171+ org = num_bytes
2172+ value = float (num_bytes )
2173+
2174+ # Find the best unit without repeated loop string work
2175+ last_suffix = units [- 1 ]
2176+ for suffix in units [:- 1 ]:
2177+ if abs (value ) < unitsize :
2178+ readable = _format_readable (value , suffix , precision )
2179+ parts = readable .split ()
2180+ return {
2181+ "Bytes" : org ,
2182+ "ReadableWithSuffix" : readable ,
2183+ "ReadableWithoutSuffix" : parts [0 ],
2184+ "ReadableSuffix" : parts [1 ],
2185+ }
2186+ value /= unitsize
2187+
2188+ # Fall back to the largest unit (use the right "Y*" suffix for chosen system)
2189+ readable = _format_readable (value , last_suffix , precision )
2190+ parts = readable .split ()
2191+ return {
2192+ "Bytes" : org ,
2193+ "ReadableWithSuffix" : readable ,
2194+ "ReadableWithoutSuffix" : parts [0 ],
2195+ "ReadableSuffix" : parts [1 ],
2196+ }
2197+
2198+
2199+ def _normalize_hash_types (usehashtypes ):
2200+ # Returns list like ["md5", "sha1"] with empties removed
2201+ if not usehashtypes :
2202+ return []
2203+ return [h .strip ().lower () for h in usehashtypes .split ("," ) if h .strip ()]
2204+
2205+
2206+ def get_readable_size_from_file (infile , precision = 1 , unit = "IEC" ,
2207+ usehashes = False , usehashtypes = "md5,sha1" ):
2208+ size = os .path .getsize (infile )
2209+ out = get_readable_size (size , precision , unit )
2210+
2211+ if usehashes :
2212+ hash_types = _normalize_hash_types (usehashtypes )
2213+ if hash_types :
2214+ # Stream the file once and update multiple hashers (no full read into memory)
2215+ hashers = {}
2216+ for h in hash_types :
2217+ # hashlib.new expects names like "md5", "sha1", "sha256"...
2218+ hashers [h .upper ()] = hashlib .new (h )
2219+
2220+ with open (infile , "rb" ) as f :
2221+ for chunk in iter (lambda : f .read (1024 * 1024 ), b"" ):
2222+ for hasher in hashers .values ():
2223+ hasher .update (chunk )
2224+
2225+ for name , hasher in hashers .items ():
2226+ out [name ] = hasher .hexdigest ()
2227+
2228+ return out
2229+
2230+
2231+ def get_readable_size_from_string (instring , precision = 1 , unit = "IEC" ,
2232+ usehashes = False , usehashtypes = "md5,sha1" ):
2233+ # In Py3, len(str) counts characters; len(bytes) counts bytes. Keep original behavior.
2234+ size = len (instring )
2235+ out = get_readable_size (size , precision , unit )
2236+
2237+ if usehashes :
2238+ hash_types = _normalize_hash_types (usehashtypes )
2239+ if hash_types :
2240+ if isinstance (instring , bytes ):
2241+ data = instring
2242+ else :
2243+ data = instring .encode ("utf-8" )
2244+
2245+ for h in hash_types :
2246+ hasher = hashlib .new (h )
2247+ hasher .update (data )
2248+ out [h .upper ()] = hasher .hexdigest ()
2249+
2250+ return out
2251+
21452252def _advance (fp , base , n ):
21462253 """
21472254 Move file position to right after the BOM/signature.
@@ -4841,7 +4948,9 @@ def ReadFileHeaderDataWithContentToArray(fp, listonly=False, contentasfile=True,
48414948 fcontents .seek (0 , 0 )
48424949 if (not contentasfile ):
48434950 fcontents = fcontents .read ()
4844- outlist = {'fheadersize' : fheadsize , 'fhstart' : fheaderstart , 'fhend' : fhend , 'ftype' : ftype , 'fencoding' : fencoding , 'fcencoding' : fcencoding , 'fname' : fname , 'fbasedir' : fbasedir , 'flinkname' : flinkname , 'fsize' : fsize , 'fblksize' : fblksize , 'fblocks' : fblocks , 'fflags' : fflags , 'fatime' : divmod (int (fatime ), 10 ** 9 )[0 ], 'fmtime' : divmod (int (fmtime ), 10 ** 9 )[0 ], 'fctime' : divmod (int (fctime ), 10 ** 9 )[0 ], 'fbtime' : divmod (int (fbtime ), 10 ** 9 )[0 ], 'fatime_ns' : fatime , 'fmtime_ns' : fmtime , 'fctime_ns' : fctime , 'fbtime_ns' : fbtime , 'fmode' : fmode , 'fchmode' : fchmode , 'fstrmode' : PrintPermissionString (fmode , ftype ), 'ftypemod' : ftypemod , 'fwinattributes' : fwinattributes , 'fcompression' : fcompression , 'fcsize' : fcsize , 'fuid' : fuid , 'funame' : funame , 'fgid' : fgid , 'fgname' : fgname , 'finode' : finode , 'flinkcount' : flinkcount ,
4951+ iecsize = get_readable_size (fsize , unit = "IEC" )
4952+ sisize = get_readable_size (fsize , unit = "SI" )
4953+ outlist = {'fheadersize' : fheadsize , 'fhstart' : fheaderstart , 'fhend' : fhend , 'ftype' : ftype , 'fencoding' : fencoding , 'fcencoding' : fcencoding , 'fname' : fname , 'fbasedir' : fbasedir , 'flinkname' : flinkname , 'fsize' : fsize , 'fsize_si' : sisize , 'fsize_iec' : iecsize , 'fblksize' : fblksize , 'fblocks' : fblocks , 'fflags' : fflags , 'fatime' : divmod (int (fatime ), 10 ** 9 )[0 ], 'fmtime' : divmod (int (fmtime ), 10 ** 9 )[0 ], 'fctime' : divmod (int (fctime ), 10 ** 9 )[0 ], 'fbtime' : divmod (int (fbtime ), 10 ** 9 )[0 ], 'fatime_ns' : fatime , 'fmtime_ns' : fmtime , 'fctime_ns' : fctime , 'fbtime_ns' : fbtime , 'fmode' : fmode , 'fchmode' : fchmode , 'fstrmode' : PrintPermissionString (fmode , ftype ), 'ftypemod' : ftypemod , 'fwinattributes' : fwinattributes , 'fcompression' : fcompression , 'fcsize' : fcsize , 'fuid' : fuid , 'funame' : funame , 'fgid' : fgid , 'fgname' : fgname , 'finode' : finode , 'flinkcount' : flinkcount ,
48454954 'fdev' : fdev , 'frdev' : frdev , 'fseektojson' : fseektojson , 'fseektocontent' : fseektocontent , 'fseeknextfile' : fseeknextfile , 'fheaderchecksumtype' : HeaderOut [- 4 ], 'fjsonchecksumtype' : fjsonchecksumtype , 'fcontentchecksumtype' : HeaderOut [- 3 ], 'fnumfields' : fnumfields + 2 , 'frawheader' : HeaderOut , 'fvendorfields' : fvendorfields , 'fvendordata' : fvendorfieldslist , 'fextrafields' : fextrafields , 'fextrafieldsize' : fextrasize , 'fextradata' : fextrafieldslist , 'fjsontype' : fjsontype , 'fjsonlen' : fjsonlen , 'fjsonsize' : fjsonsize , 'fjsonrawdata' : fjsonrawcontent , 'fjsondata' : fjsoncontent , 'fjstart' : fjstart , 'fjend' : fjend , 'fheaderchecksum' : fcs , 'fjsonchecksum' : fjsonchecksum , 'fcontentchecksum' : fccs , 'fhascontents' : pyhascontents , 'fcontentstart' : fcontentstart , 'fcontentend' : fcontentend , 'fcontentasfile' : contentasfile , 'fcontents' : fcontents }
48464955 return outlist
48474956
@@ -5478,7 +5587,9 @@ def ReadFileDataWithContentToArray(fp, filestart=0, seekstart=0, seekend=0, list
54785587 realidnum = realidnum + 1
54795588 CatSize = fp .tell ()
54805589 CatSizeEnd = CatSize
5481- outlist .update ({'fp' : fp , 'fsize' : CatSizeEnd })
5590+ iecsize = get_readable_size (CatSizeEnd , unit = "IEC" )
5591+ sisize = get_readable_size (CatSizeEnd , unit = "SI" )
5592+ outlist .update ({'fp' : fp , 'fsize' : CatSizeEnd , 'fsize_si' : sisize , 'fsize_iec' : iecsize })
54825593 return outlist
54835594
54845595
0 commit comments