Skip to content

Commit 4e61054

Browse files
committed
Small update
1 parent 9ee91cf commit 4e61054

File tree

1 file changed

+347
-0
lines changed

1 file changed

+347
-0
lines changed

pyneofile/pyfile.py

Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3165,6 +3165,353 @@ def GetTotalSize(file_list):
31653165
PY_STDERR_TEXT.write("Error accessing file {}: {}\n".format(item, e))
31663166
return total_size
31673167

3168+
def MajorMinorToDev(major, minor):
3169+
"""
3170+
Converts major and minor numbers to a device number.
3171+
Compatible with Python 2 and 3.
3172+
"""
3173+
return (major << 8) | minor
3174+
3175+
def DevToMajorMinor(dev):
3176+
"""
3177+
Extracts major and minor numbers from a device number.
3178+
Compatible with Python 2 and 3.
3179+
"""
3180+
major = (dev >> 8) & 0xFF
3181+
minor = dev & 0xFF
3182+
return major, minor
3183+
3184+
3185+
def GetDataFromArray(data, path, default=None):
3186+
element = data
3187+
try:
3188+
for key in path:
3189+
element = element[key]
3190+
return element
3191+
except (KeyError, TypeError, IndexError):
3192+
return default
3193+
3194+
3195+
def GetDataFromArrayAlt(structure, path, default=None):
3196+
element = structure
3197+
for key in path:
3198+
if isinstance(element, dict) and key in element:
3199+
element = element[key]
3200+
elif isinstance(element, list) and isinstance(key, int) and -len(element) <= key < len(element):
3201+
element = element[key]
3202+
else:
3203+
return default
3204+
return element
3205+
3206+
# ========= pushback-aware delimiter reader =========
3207+
class _DelimiterReader:
3208+
"""
3209+
Chunked reader that consumes up to N occurrences of a byte delimiter.
3210+
- Works with non-seekable streams by stashing over-read bytes on fp._read_until_delim_pushback
3211+
- For seekable streams, rewinds over-read via seek(-n, SEEK_CUR)
3212+
"""
3213+
_PB_ATTR = "_read_until_delim_pushback"
3214+
3215+
def __init__(self, fp, delimiter, chunk_size=8192, max_read=64 * 1024 * 1024):
3216+
if not hasattr(fp, "read"):
3217+
raise ValueError("fp must be a readable file-like object")
3218+
3219+
# normalize delimiter -> bytes
3220+
if delimiter is None:
3221+
delimiter = "\0"
3222+
if isinstance(delimiter, str):
3223+
delimiter_b = delimiter.encode("utf-8")
3224+
else:
3225+
delimiter_b = bytes(delimiter)
3226+
if not delimiter_b:
3227+
raise ValueError("delimiter must not be empty")
3228+
3229+
self.fp = fp
3230+
self.delim = delimiter_b
3231+
self.dlen = len(delimiter_b)
3232+
self.chunk = int(chunk_size)
3233+
self.max_read = int(max_read)
3234+
3235+
self._buf = bytearray()
3236+
self._total = 0
3237+
3238+
# detect seekability (best-effort)
3239+
seekable = getattr(fp, "seekable", None)
3240+
if callable(seekable):
3241+
self._seekable = bool(seekable())
3242+
else:
3243+
self._seekable = hasattr(fp, "seek") and hasattr(fp, "tell")
3244+
3245+
# Preload any pushback from previous reads on this fp
3246+
pb = getattr(fp, self._PB_ATTR, None)
3247+
if pb:
3248+
self._buf.extend(pb)
3249+
setattr(fp, self._PB_ATTR, bytearray()) # consume
3250+
3251+
def _read_more(self):
3252+
data = self.fp.read(self.chunk)
3253+
if not data:
3254+
return False
3255+
if not isinstance(data, (bytes, bytearray, memoryview)):
3256+
raise TypeError("fp.read() must return bytes-like")
3257+
if isinstance(data, memoryview):
3258+
data = data.tobytes()
3259+
self._buf.extend(data)
3260+
self._total += len(data)
3261+
if self._total > self.max_read:
3262+
raise ValueError("Maximum read limit reached without finding the delimiter")
3263+
return True
3264+
3265+
def _pushback(self, over_bytes):
3266+
"""Return extra bytes to the stream (seek back) or stash on the fp."""
3267+
if not over_bytes:
3268+
return
3269+
if self._seekable:
3270+
try:
3271+
self.fp.seek(-len(over_bytes), io.SEEK_CUR)
3272+
return
3273+
except Exception:
3274+
pass
3275+
# Non-seekable: stash for next call on this fp
3276+
pb = getattr(self.fp, self._PB_ATTR, None)
3277+
if pb is None:
3278+
setattr(self.fp, self._PB_ATTR, bytearray(over_bytes))
3279+
else:
3280+
pb.extend(over_bytes)
3281+
3282+
def read_one_piece(self):
3283+
"""
3284+
Read bytes up to (but not including) the next delimiter.
3285+
Returns (piece_bytes, found_delimiter_bool).
3286+
"""
3287+
out = bytearray()
3288+
while True:
3289+
idx = self._buf.find(self.delim)
3290+
if idx != -1:
3291+
out.extend(self._buf[:idx])
3292+
over = self._buf[idx + self.dlen:]
3293+
self._buf[:] = b""
3294+
self._pushback(over)
3295+
return bytes(out), True
3296+
3297+
# No delimiter present: emit buffer and read more
3298+
if self._buf:
3299+
out.extend(self._buf)
3300+
self._buf[:] = b""
3301+
3302+
if not self._read_more():
3303+
# EOF: return whatever we have (possibly empty), no delimiter
3304+
return bytes(out), False
3305+
3306+
def read_n_pieces(self, n, pad_to_n=False):
3307+
"""
3308+
Read up to n pieces (n delimiters). Returns list of bytes; len <= n.
3309+
If pad_to_n=True, pads with b"" until length == n (avoids downstream IndexError).
3310+
"""
3311+
n = int(n)
3312+
parts = []
3313+
while len(parts) < n:
3314+
piece, found = self.read_one_piece()
3315+
if not found and piece == b"":
3316+
break # true EOF with nothing more
3317+
parts.append(piece)
3318+
if not found:
3319+
break # EOF after a final unterminated piece
3320+
if pad_to_n and len(parts) < n:
3321+
parts.extend([b""] * (n - len(parts)))
3322+
return parts
3323+
3324+
3325+
# ========= helpers =========
3326+
def _default_delim(delimiter):
3327+
# Try your global spec if present; else default to NUL
3328+
try:
3329+
if delimiter is None:
3330+
delimiter = __file_format_dict__["format_delimiter"]
3331+
except Exception:
3332+
pass
3333+
return delimiter if delimiter is not None else "\0"
3334+
3335+
3336+
def _decode_text(b, errors):
3337+
return b.decode("utf-8", errors=errors)
3338+
3339+
3340+
def _read_exact(fp, n):
3341+
"""Read exactly n bytes or raise EOFError on premature EOF."""
3342+
want = int(n)
3343+
out = bytearray()
3344+
while len(out) < want:
3345+
chunk = fp.read(want - len(out))
3346+
if not chunk:
3347+
raise EOFError("Unexpected EOF: wanted {} more bytes".format(want - len(out)))
3348+
if isinstance(chunk, memoryview):
3349+
chunk = chunk.tobytes()
3350+
out.extend(chunk)
3351+
return bytes(out)
3352+
3353+
3354+
def _expect_delimiter(fp, delimiter):
3355+
"""Read exactly len(delimiter) bytes and require an exact match (no seeking)."""
3356+
delim = _default_delim(delimiter)
3357+
if isinstance(delim, str):
3358+
delim_b = delim.encode("utf-8")
3359+
else:
3360+
delim_b = bytes(delim)
3361+
got = _read_exact(fp, len(delim_b))
3362+
if got != delim_b:
3363+
raise ValueError("Delimiter mismatch: expected {!r}, got {!r}".format(delim_b, got))
3364+
3365+
3366+
# ========= unified public API (bytes/text control) =========
3367+
def read_until_delimiter(
3368+
fp,
3369+
delimiter=b"\0",
3370+
max_read=None,
3371+
chunk_size=None,
3372+
decode=True,
3373+
errors=None,
3374+
):
3375+
"""
3376+
Read until the first occurrence of 'delimiter'. Strips the delimiter.
3377+
- Returns text (UTF-8) when decode=True; bytes when decode=False.
3378+
- Non-seekable streams are supported via pushback on the file object.
3379+
"""
3380+
if max_read is None:
3381+
max_read = 64 * 1024 * 1024
3382+
if chunk_size is None:
3383+
chunk_size = 8192
3384+
if errors is None:
3385+
errors = "strict"
3386+
3387+
r = _DelimiterReader(
3388+
fp,
3389+
delimiter=_default_delim(delimiter),
3390+
chunk_size=chunk_size,
3391+
max_read=max_read,
3392+
)
3393+
piece, _found = r.read_one_piece()
3394+
return _decode_text(piece, errors) if decode else piece
3395+
3396+
3397+
def read_until_n_delimiters(
3398+
fp,
3399+
delimiter=b"\0",
3400+
num_delimiters=1,
3401+
max_read=None,
3402+
chunk_size=None,
3403+
decode=True,
3404+
errors=None,
3405+
pad_to_n=False,
3406+
):
3407+
"""
3408+
Read up to 'num_delimiters' occurrences. Returns list of pieces (len <= N).
3409+
If pad_to_n=True, pads with empty pieces to length N (useful for rigid parsers).
3410+
"""
3411+
if max_read is None:
3412+
max_read = 64 * 1024 * 1024
3413+
if chunk_size is None:
3414+
chunk_size = 8192
3415+
if errors is None:
3416+
errors = "strict"
3417+
3418+
r = _DelimiterReader(
3419+
fp,
3420+
delimiter=_default_delim(delimiter),
3421+
chunk_size=chunk_size,
3422+
max_read=max_read,
3423+
)
3424+
parts = r.read_n_pieces(num_delimiters, pad_to_n=pad_to_n)
3425+
if decode:
3426+
return [_decode_text(p, errors) for p in parts]
3427+
return parts
3428+
3429+
3430+
# ========= back-compat wrappers (your original names) =========
3431+
def ReadTillNullByteOld(fp, delimiter=_default_delim(None)):
3432+
# emulate byte-by-byte via chunk_size=1; decode with 'replace' like your Alt
3433+
return read_until_delimiter(
3434+
fp,
3435+
delimiter,
3436+
max_read=64 * 1024 * 1024,
3437+
chunk_size=1,
3438+
decode=True,
3439+
errors="replace",
3440+
)
3441+
3442+
3443+
def ReadUntilNullByteOld(fp, delimiter=_default_delim(None)):
3444+
return ReadTillNullByteOld(fp, delimiter)
3445+
3446+
3447+
def ReadTillNullByteAlt(fp, delimiter=_default_delim(None), chunk_size=1024, max_read=64 * 1024 * 1024):
3448+
return read_until_delimiter(
3449+
fp,
3450+
delimiter,
3451+
max_read=max_read,
3452+
chunk_size=chunk_size,
3453+
decode=True,
3454+
errors="replace",
3455+
)
3456+
3457+
3458+
def ReadUntilNullByteAlt(fp, delimiter=_default_delim(None), chunk_size=1024, max_read=64 * 1024 * 1024):
3459+
return ReadTillNullByteAlt(fp, delimiter, chunk_size, max_read)
3460+
3461+
3462+
def ReadTillNullByte(fp, delimiter=_default_delim(None), max_read=64 * 1024 * 1024):
3463+
return read_until_delimiter(
3464+
fp,
3465+
delimiter,
3466+
max_read=max_read,
3467+
chunk_size=8192,
3468+
decode=True,
3469+
errors="strict",
3470+
)
3471+
3472+
3473+
def ReadUntilNullByte(fp, delimiter=_default_delim(None), max_read=64 * 1024 * 1024):
3474+
return ReadTillNullByte(fp, delimiter, max_read)
3475+
3476+
3477+
def ReadTillNullByteByNum(
3478+
fp,
3479+
delimiter=_default_delim(None),
3480+
num_delimiters=1,
3481+
chunk_size=1024,
3482+
max_read=64 * 1024 * 1024,
3483+
):
3484+
# Return list of text parts; **pad to N** to avoid IndexError in rigid parsers
3485+
return read_until_n_delimiters(
3486+
fp,
3487+
delimiter,
3488+
num_delimiters,
3489+
max_read=max_read,
3490+
chunk_size=chunk_size,
3491+
decode=True,
3492+
errors="replace",
3493+
pad_to_n=True,
3494+
)
3495+
3496+
3497+
def ReadUntilNullByteByNum(
3498+
fp,
3499+
delimiter=_default_delim(None),
3500+
num_delimiters=1,
3501+
chunk_size=1024,
3502+
max_read=64 * 1024 * 1024,
3503+
):
3504+
return ReadTillNullByteByNum(fp, delimiter, num_delimiters, chunk_size, max_read)
3505+
3506+
def SeekToEndOfFile(fp):
3507+
lasttell = 0
3508+
while(True):
3509+
fp.seek(1, 1)
3510+
if(lasttell == fp.tell()):
3511+
break
3512+
lasttell = fp.tell()
3513+
return True
3514+
31683515
def ReadFileHeaderData(fp, skipchecksum=False, formatspecs=None, saltkey=None):
31693516
if(formatspecs is None):
31703517
formatspecs = __file_format_multi_dict__

0 commit comments

Comments
 (0)