@@ -3165,6 +3165,353 @@ def GetTotalSize(file_list):
31653165 PY_STDERR_TEXT .write ("Error accessing file {}: {}\n " .format (item , e ))
31663166 return total_size
31673167
3168+ def MajorMinorToDev (major , minor ):
3169+ """
3170+ Converts major and minor numbers to a device number.
3171+ Compatible with Python 2 and 3.
3172+ """
3173+ return (major << 8 ) | minor
3174+
3175+ def DevToMajorMinor (dev ):
3176+ """
3177+ Extracts major and minor numbers from a device number.
3178+ Compatible with Python 2 and 3.
3179+ """
3180+ major = (dev >> 8 ) & 0xFF
3181+ minor = dev & 0xFF
3182+ return major , minor
3183+
3184+
3185+ def GetDataFromArray (data , path , default = None ):
3186+ element = data
3187+ try :
3188+ for key in path :
3189+ element = element [key ]
3190+ return element
3191+ except (KeyError , TypeError , IndexError ):
3192+ return default
3193+
3194+
3195+ def GetDataFromArrayAlt (structure , path , default = None ):
3196+ element = structure
3197+ for key in path :
3198+ if isinstance (element , dict ) and key in element :
3199+ element = element [key ]
3200+ elif isinstance (element , list ) and isinstance (key , int ) and - len (element ) <= key < len (element ):
3201+ element = element [key ]
3202+ else :
3203+ return default
3204+ return element
3205+
3206+ # ========= pushback-aware delimiter reader =========
3207+ class _DelimiterReader :
3208+ """
3209+ Chunked reader that consumes up to N occurrences of a byte delimiter.
3210+ - Works with non-seekable streams by stashing over-read bytes on fp._read_until_delim_pushback
3211+ - For seekable streams, rewinds over-read via seek(-n, SEEK_CUR)
3212+ """
3213+ _PB_ATTR = "_read_until_delim_pushback"
3214+
3215+ def __init__ (self , fp , delimiter , chunk_size = 8192 , max_read = 64 * 1024 * 1024 ):
3216+ if not hasattr (fp , "read" ):
3217+ raise ValueError ("fp must be a readable file-like object" )
3218+
3219+ # normalize delimiter -> bytes
3220+ if delimiter is None :
3221+ delimiter = "\0 "
3222+ if isinstance (delimiter , str ):
3223+ delimiter_b = delimiter .encode ("utf-8" )
3224+ else :
3225+ delimiter_b = bytes (delimiter )
3226+ if not delimiter_b :
3227+ raise ValueError ("delimiter must not be empty" )
3228+
3229+ self .fp = fp
3230+ self .delim = delimiter_b
3231+ self .dlen = len (delimiter_b )
3232+ self .chunk = int (chunk_size )
3233+ self .max_read = int (max_read )
3234+
3235+ self ._buf = bytearray ()
3236+ self ._total = 0
3237+
3238+ # detect seekability (best-effort)
3239+ seekable = getattr (fp , "seekable" , None )
3240+ if callable (seekable ):
3241+ self ._seekable = bool (seekable ())
3242+ else :
3243+ self ._seekable = hasattr (fp , "seek" ) and hasattr (fp , "tell" )
3244+
3245+ # Preload any pushback from previous reads on this fp
3246+ pb = getattr (fp , self ._PB_ATTR , None )
3247+ if pb :
3248+ self ._buf .extend (pb )
3249+ setattr (fp , self ._PB_ATTR , bytearray ()) # consume
3250+
3251+ def _read_more (self ):
3252+ data = self .fp .read (self .chunk )
3253+ if not data :
3254+ return False
3255+ if not isinstance (data , (bytes , bytearray , memoryview )):
3256+ raise TypeError ("fp.read() must return bytes-like" )
3257+ if isinstance (data , memoryview ):
3258+ data = data .tobytes ()
3259+ self ._buf .extend (data )
3260+ self ._total += len (data )
3261+ if self ._total > self .max_read :
3262+ raise ValueError ("Maximum read limit reached without finding the delimiter" )
3263+ return True
3264+
3265+ def _pushback (self , over_bytes ):
3266+ """Return extra bytes to the stream (seek back) or stash on the fp."""
3267+ if not over_bytes :
3268+ return
3269+ if self ._seekable :
3270+ try :
3271+ self .fp .seek (- len (over_bytes ), io .SEEK_CUR )
3272+ return
3273+ except Exception :
3274+ pass
3275+ # Non-seekable: stash for next call on this fp
3276+ pb = getattr (self .fp , self ._PB_ATTR , None )
3277+ if pb is None :
3278+ setattr (self .fp , self ._PB_ATTR , bytearray (over_bytes ))
3279+ else :
3280+ pb .extend (over_bytes )
3281+
3282+ def read_one_piece (self ):
3283+ """
3284+ Read bytes up to (but not including) the next delimiter.
3285+ Returns (piece_bytes, found_delimiter_bool).
3286+ """
3287+ out = bytearray ()
3288+ while True :
3289+ idx = self ._buf .find (self .delim )
3290+ if idx != - 1 :
3291+ out .extend (self ._buf [:idx ])
3292+ over = self ._buf [idx + self .dlen :]
3293+ self ._buf [:] = b""
3294+ self ._pushback (over )
3295+ return bytes (out ), True
3296+
3297+ # No delimiter present: emit buffer and read more
3298+ if self ._buf :
3299+ out .extend (self ._buf )
3300+ self ._buf [:] = b""
3301+
3302+ if not self ._read_more ():
3303+ # EOF: return whatever we have (possibly empty), no delimiter
3304+ return bytes (out ), False
3305+
3306+ def read_n_pieces (self , n , pad_to_n = False ):
3307+ """
3308+ Read up to n pieces (n delimiters). Returns list of bytes; len <= n.
3309+ If pad_to_n=True, pads with b"" until length == n (avoids downstream IndexError).
3310+ """
3311+ n = int (n )
3312+ parts = []
3313+ while len (parts ) < n :
3314+ piece , found = self .read_one_piece ()
3315+ if not found and piece == b"" :
3316+ break # true EOF with nothing more
3317+ parts .append (piece )
3318+ if not found :
3319+ break # EOF after a final unterminated piece
3320+ if pad_to_n and len (parts ) < n :
3321+ parts .extend ([b"" ] * (n - len (parts )))
3322+ return parts
3323+
3324+
3325+ # ========= helpers =========
3326+ def _default_delim (delimiter ):
3327+ # Try your global spec if present; else default to NUL
3328+ try :
3329+ if delimiter is None :
3330+ delimiter = __file_format_dict__ ["format_delimiter" ]
3331+ except Exception :
3332+ pass
3333+ return delimiter if delimiter is not None else "\0 "
3334+
3335+
3336+ def _decode_text (b , errors ):
3337+ return b .decode ("utf-8" , errors = errors )
3338+
3339+
3340+ def _read_exact (fp , n ):
3341+ """Read exactly n bytes or raise EOFError on premature EOF."""
3342+ want = int (n )
3343+ out = bytearray ()
3344+ while len (out ) < want :
3345+ chunk = fp .read (want - len (out ))
3346+ if not chunk :
3347+ raise EOFError ("Unexpected EOF: wanted {} more bytes" .format (want - len (out )))
3348+ if isinstance (chunk , memoryview ):
3349+ chunk = chunk .tobytes ()
3350+ out .extend (chunk )
3351+ return bytes (out )
3352+
3353+
3354+ def _expect_delimiter (fp , delimiter ):
3355+ """Read exactly len(delimiter) bytes and require an exact match (no seeking)."""
3356+ delim = _default_delim (delimiter )
3357+ if isinstance (delim , str ):
3358+ delim_b = delim .encode ("utf-8" )
3359+ else :
3360+ delim_b = bytes (delim )
3361+ got = _read_exact (fp , len (delim_b ))
3362+ if got != delim_b :
3363+ raise ValueError ("Delimiter mismatch: expected {!r}, got {!r}" .format (delim_b , got ))
3364+
3365+
3366+ # ========= unified public API (bytes/text control) =========
3367+ def read_until_delimiter (
3368+ fp ,
3369+ delimiter = b"\0 " ,
3370+ max_read = None ,
3371+ chunk_size = None ,
3372+ decode = True ,
3373+ errors = None ,
3374+ ):
3375+ """
3376+ Read until the first occurrence of 'delimiter'. Strips the delimiter.
3377+ - Returns text (UTF-8) when decode=True; bytes when decode=False.
3378+ - Non-seekable streams are supported via pushback on the file object.
3379+ """
3380+ if max_read is None :
3381+ max_read = 64 * 1024 * 1024
3382+ if chunk_size is None :
3383+ chunk_size = 8192
3384+ if errors is None :
3385+ errors = "strict"
3386+
3387+ r = _DelimiterReader (
3388+ fp ,
3389+ delimiter = _default_delim (delimiter ),
3390+ chunk_size = chunk_size ,
3391+ max_read = max_read ,
3392+ )
3393+ piece , _found = r .read_one_piece ()
3394+ return _decode_text (piece , errors ) if decode else piece
3395+
3396+
3397+ def read_until_n_delimiters (
3398+ fp ,
3399+ delimiter = b"\0 " ,
3400+ num_delimiters = 1 ,
3401+ max_read = None ,
3402+ chunk_size = None ,
3403+ decode = True ,
3404+ errors = None ,
3405+ pad_to_n = False ,
3406+ ):
3407+ """
3408+ Read up to 'num_delimiters' occurrences. Returns list of pieces (len <= N).
3409+ If pad_to_n=True, pads with empty pieces to length N (useful for rigid parsers).
3410+ """
3411+ if max_read is None :
3412+ max_read = 64 * 1024 * 1024
3413+ if chunk_size is None :
3414+ chunk_size = 8192
3415+ if errors is None :
3416+ errors = "strict"
3417+
3418+ r = _DelimiterReader (
3419+ fp ,
3420+ delimiter = _default_delim (delimiter ),
3421+ chunk_size = chunk_size ,
3422+ max_read = max_read ,
3423+ )
3424+ parts = r .read_n_pieces (num_delimiters , pad_to_n = pad_to_n )
3425+ if decode :
3426+ return [_decode_text (p , errors ) for p in parts ]
3427+ return parts
3428+
3429+
3430+ # ========= back-compat wrappers (your original names) =========
3431+ def ReadTillNullByteOld (fp , delimiter = _default_delim (None )):
3432+ # emulate byte-by-byte via chunk_size=1; decode with 'replace' like your Alt
3433+ return read_until_delimiter (
3434+ fp ,
3435+ delimiter ,
3436+ max_read = 64 * 1024 * 1024 ,
3437+ chunk_size = 1 ,
3438+ decode = True ,
3439+ errors = "replace" ,
3440+ )
3441+
3442+
3443+ def ReadUntilNullByteOld (fp , delimiter = _default_delim (None )):
3444+ return ReadTillNullByteOld (fp , delimiter )
3445+
3446+
3447+ def ReadTillNullByteAlt (fp , delimiter = _default_delim (None ), chunk_size = 1024 , max_read = 64 * 1024 * 1024 ):
3448+ return read_until_delimiter (
3449+ fp ,
3450+ delimiter ,
3451+ max_read = max_read ,
3452+ chunk_size = chunk_size ,
3453+ decode = True ,
3454+ errors = "replace" ,
3455+ )
3456+
3457+
3458+ def ReadUntilNullByteAlt (fp , delimiter = _default_delim (None ), chunk_size = 1024 , max_read = 64 * 1024 * 1024 ):
3459+ return ReadTillNullByteAlt (fp , delimiter , chunk_size , max_read )
3460+
3461+
3462+ def ReadTillNullByte (fp , delimiter = _default_delim (None ), max_read = 64 * 1024 * 1024 ):
3463+ return read_until_delimiter (
3464+ fp ,
3465+ delimiter ,
3466+ max_read = max_read ,
3467+ chunk_size = 8192 ,
3468+ decode = True ,
3469+ errors = "strict" ,
3470+ )
3471+
3472+
3473+ def ReadUntilNullByte (fp , delimiter = _default_delim (None ), max_read = 64 * 1024 * 1024 ):
3474+ return ReadTillNullByte (fp , delimiter , max_read )
3475+
3476+
3477+ def ReadTillNullByteByNum (
3478+ fp ,
3479+ delimiter = _default_delim (None ),
3480+ num_delimiters = 1 ,
3481+ chunk_size = 1024 ,
3482+ max_read = 64 * 1024 * 1024 ,
3483+ ):
3484+ # Return list of text parts; **pad to N** to avoid IndexError in rigid parsers
3485+ return read_until_n_delimiters (
3486+ fp ,
3487+ delimiter ,
3488+ num_delimiters ,
3489+ max_read = max_read ,
3490+ chunk_size = chunk_size ,
3491+ decode = True ,
3492+ errors = "replace" ,
3493+ pad_to_n = True ,
3494+ )
3495+
3496+
3497+ def ReadUntilNullByteByNum (
3498+ fp ,
3499+ delimiter = _default_delim (None ),
3500+ num_delimiters = 1 ,
3501+ chunk_size = 1024 ,
3502+ max_read = 64 * 1024 * 1024 ,
3503+ ):
3504+ return ReadTillNullByteByNum (fp , delimiter , num_delimiters , chunk_size , max_read )
3505+
3506+ def SeekToEndOfFile (fp ):
3507+ lasttell = 0
3508+ while (True ):
3509+ fp .seek (1 , 1 )
3510+ if (lasttell == fp .tell ()):
3511+ break
3512+ lasttell = fp .tell ()
3513+ return True
3514+
31683515def ReadFileHeaderData (fp , skipchecksum = False , formatspecs = None , saltkey = None ):
31693516 if (formatspecs is None ):
31703517 formatspecs = __file_format_multi_dict__
0 commit comments