Skip to content

Commit fefd7a2

Browse files
committed
Small update
1 parent b505490 commit fefd7a2

File tree

1 file changed

+113
-2
lines changed

1 file changed

+113
-2
lines changed

pyneofile/pyneofile.py

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2142,6 +2142,113 @@ def CheckCompressionSubType(infile, formatspecs=__file_format_multi_dict__, file
21422142
fp.close()
21432143
return filetype
21442144

2145+
# Precompiled regexes (faster than compiling each call)
2146+
_RE_ZERO_SPACE_UNIT = re.compile(r"([0]+) ([A-Za-z]+)")
2147+
_RE_DOT_SPACE_UNIT = re.compile(r"\. ([A-Za-z]+)")
2148+
2149+
# Unit tables
2150+
_IEC_UNITS = (" B", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB", " ZiB", " YiB")
2151+
_SI_UNITS = (" B", " kB", " MB", " GB", " TB", " PB", " EB", " ZB", " YB")
2152+
2153+
2154+
def _format_readable(value, suffix, precision):
2155+
# Keep behavior close to original: format with width 3 and precision, then clean up
2156+
s = ("%3." + str(precision) + "f%s") % (value, suffix)
2157+
s = _RE_ZERO_SPACE_UNIT.sub(r" \2", s)
2158+
s = _RE_DOT_SPACE_UNIT.sub(r" \1", s)
2159+
return s
2160+
2161+
2162+
def get_readable_size(num_bytes, precision=1, unit="IEC"):
2163+
unit = (unit or "IEC").upper()
2164+
if unit == "SI":
2165+
unitsize = 1000.0
2166+
units = _SI_UNITS
2167+
else:
2168+
unitsize = 1024.0
2169+
units = _IEC_UNITS
2170+
2171+
org = num_bytes
2172+
value = float(num_bytes)
2173+
2174+
# Find the best unit without repeated loop string work
2175+
last_suffix = units[-1]
2176+
for suffix in units[:-1]:
2177+
if abs(value) < unitsize:
2178+
readable = _format_readable(value, suffix, precision)
2179+
parts = readable.split()
2180+
return {
2181+
"Bytes": org,
2182+
"ReadableWithSuffix": readable,
2183+
"ReadableWithoutSuffix": parts[0],
2184+
"ReadableSuffix": parts[1],
2185+
}
2186+
value /= unitsize
2187+
2188+
# Fall back to the largest unit (use the right "Y*" suffix for chosen system)
2189+
readable = _format_readable(value, last_suffix, precision)
2190+
parts = readable.split()
2191+
return {
2192+
"Bytes": org,
2193+
"ReadableWithSuffix": readable,
2194+
"ReadableWithoutSuffix": parts[0],
2195+
"ReadableSuffix": parts[1],
2196+
}
2197+
2198+
2199+
def _normalize_hash_types(usehashtypes):
2200+
# Returns list like ["md5", "sha1"] with empties removed
2201+
if not usehashtypes:
2202+
return []
2203+
return [h.strip().lower() for h in usehashtypes.split(",") if h.strip()]
2204+
2205+
2206+
def get_readable_size_from_file(infile, precision=1, unit="IEC",
2207+
usehashes=False, usehashtypes="md5,sha1"):
2208+
size = os.path.getsize(infile)
2209+
out = get_readable_size(size, precision, unit)
2210+
2211+
if usehashes:
2212+
hash_types = _normalize_hash_types(usehashtypes)
2213+
if hash_types:
2214+
# Stream the file once and update multiple hashers (no full read into memory)
2215+
hashers = {}
2216+
for h in hash_types:
2217+
# hashlib.new expects names like "md5", "sha1", "sha256"...
2218+
hashers[h.upper()] = hashlib.new(h)
2219+
2220+
with open(infile, "rb") as f:
2221+
for chunk in iter(lambda: f.read(1024 * 1024), b""):
2222+
for hasher in hashers.values():
2223+
hasher.update(chunk)
2224+
2225+
for name, hasher in hashers.items():
2226+
out[name] = hasher.hexdigest()
2227+
2228+
return out
2229+
2230+
2231+
def get_readable_size_from_string(instring, precision=1, unit="IEC",
2232+
usehashes=False, usehashtypes="md5,sha1"):
2233+
# In Py3, len(str) counts characters; len(bytes) counts bytes. Keep original behavior.
2234+
size = len(instring)
2235+
out = get_readable_size(size, precision, unit)
2236+
2237+
if usehashes:
2238+
hash_types = _normalize_hash_types(usehashtypes)
2239+
if hash_types:
2240+
if isinstance(instring, bytes):
2241+
data = instring
2242+
else:
2243+
data = instring.encode("utf-8")
2244+
2245+
for h in hash_types:
2246+
hasher = hashlib.new(h)
2247+
hasher.update(data)
2248+
out[h.upper()] = hasher.hexdigest()
2249+
2250+
return out
2251+
21452252
def _advance(fp, base, n):
21462253
"""
21472254
Move file position to right after the BOM/signature.
@@ -4841,7 +4948,9 @@ def ReadFileHeaderDataWithContentToArray(fp, listonly=False, contentasfile=True,
48414948
fcontents.seek(0, 0)
48424949
if(not contentasfile):
48434950
fcontents = fcontents.read()
4844-
outlist = {'fheadersize': fheadsize, 'fhstart': fheaderstart, 'fhend': fhend, 'ftype': ftype, 'fencoding': fencoding, 'fcencoding': fcencoding, 'fname': fname, 'fbasedir': fbasedir, 'flinkname': flinkname, 'fsize': fsize, 'fblksize': fblksize, 'fblocks': fblocks, 'fflags': fflags, 'fatime': divmod(int(fatime), 10**9)[0], 'fmtime': divmod(int(fmtime), 10**9)[0], 'fctime': divmod(int(fctime), 10**9)[0], 'fbtime': divmod(int(fbtime), 10**9)[0], 'fatime_ns': fatime, 'fmtime_ns': fmtime, 'fctime_ns': fctime, 'fbtime_ns': fbtime, 'fmode': fmode, 'fchmode': fchmode, 'fstrmode': PrintPermissionString(fmode, ftype), 'ftypemod': ftypemod, 'fwinattributes': fwinattributes, 'fcompression': fcompression, 'fcsize': fcsize, 'fuid': fuid, 'funame': funame, 'fgid': fgid, 'fgname': fgname, 'finode': finode, 'flinkcount': flinkcount,
4951+
iecsize = get_readable_size(fsize, unit="IEC")
4952+
sisize = get_readable_size(fsize, unit="SI")
4953+
outlist = {'fheadersize': fheadsize, 'fhstart': fheaderstart, 'fhend': fhend, 'ftype': ftype, 'fencoding': fencoding, 'fcencoding': fcencoding, 'fname': fname, 'fbasedir': fbasedir, 'flinkname': flinkname, 'fsize': fsize, 'fsize_si': sisize, 'fsize_iec': iecsize, 'fblksize': fblksize, 'fblocks': fblocks, 'fflags': fflags, 'fatime': divmod(int(fatime), 10**9)[0], 'fmtime': divmod(int(fmtime), 10**9)[0], 'fctime': divmod(int(fctime), 10**9)[0], 'fbtime': divmod(int(fbtime), 10**9)[0], 'fatime_ns': fatime, 'fmtime_ns': fmtime, 'fctime_ns': fctime, 'fbtime_ns': fbtime, 'fmode': fmode, 'fchmode': fchmode, 'fstrmode': PrintPermissionString(fmode, ftype), 'ftypemod': ftypemod, 'fwinattributes': fwinattributes, 'fcompression': fcompression, 'fcsize': fcsize, 'fuid': fuid, 'funame': funame, 'fgid': fgid, 'fgname': fgname, 'finode': finode, 'flinkcount': flinkcount,
48454954
'fdev': fdev, 'frdev': frdev, 'fseektojson': fseektojson, 'fseektocontent': fseektocontent, 'fseeknextfile': fseeknextfile, 'fheaderchecksumtype': HeaderOut[-4], 'fjsonchecksumtype': fjsonchecksumtype, 'fcontentchecksumtype': HeaderOut[-3], 'fnumfields': fnumfields + 2, 'frawheader': HeaderOut, 'fvendorfields': fvendorfields, 'fvendordata': fvendorfieldslist, 'fextrafields': fextrafields, 'fextrafieldsize': fextrasize, 'fextradata': fextrafieldslist, 'fjsontype': fjsontype, 'fjsonlen': fjsonlen, 'fjsonsize': fjsonsize, 'fjsonrawdata': fjsonrawcontent, 'fjsondata': fjsoncontent, 'fjstart': fjstart, 'fjend': fjend, 'fheaderchecksum': fcs, 'fjsonchecksum': fjsonchecksum, 'fcontentchecksum': fccs, 'fhascontents': pyhascontents, 'fcontentstart': fcontentstart, 'fcontentend': fcontentend, 'fcontentasfile': contentasfile, 'fcontents': fcontents}
48464955
return outlist
48474956

@@ -5478,7 +5587,9 @@ def ReadFileDataWithContentToArray(fp, filestart=0, seekstart=0, seekend=0, list
54785587
realidnum = realidnum + 1
54795588
CatSize = fp.tell()
54805589
CatSizeEnd = CatSize
5481-
outlist.update({'fp': fp, 'fsize': CatSizeEnd})
5590+
iecsize = get_readable_size(CatSizeEnd, unit="IEC")
5591+
sisize = get_readable_size(CatSizeEnd, unit="SI")
5592+
outlist.update({'fp': fp, 'fsize': CatSizeEnd, 'fsize_si': sisize, 'fsize_iec': iecsize})
54825593
return outlist
54835594

54845595

0 commit comments

Comments
 (0)