@@ -399,6 +399,59 @@ def readline(self, size=-1):
399399 return self ._buffer .readline (size )
400400
401401
402+ def _read_exact (fp , n ):
403+ '''Read exactly *n* bytes from `fp`
404+
405+ This method is required because fp may be unbuffered,
406+ i.e. return short reads.
407+ '''
408+ data = fp .read (n )
409+ while len (data ) < n :
410+ b = fp .read (n - len (data ))
411+ if not b :
412+ raise EOFError ("Compressed file ended before the "
413+ "end-of-stream marker was reached" )
414+ data += b
415+ return data
416+
417+
418+ def _read_gzip_header (fp ):
419+ '''Read a gzip header from `fp` and progress to the end of the header.
420+
421+ Returns last mtime if header was present or None otherwise.
422+ '''
423+ magic = fp .read (2 )
424+ if magic == b'' :
425+ return None
426+
427+ if magic != b'\037 \213 ' :
428+ raise BadGzipFile ('Not a gzipped file (%r)' % magic )
429+
430+ (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
431+ if method != 8 :
432+ raise BadGzipFile ('Unknown compression method' )
433+
434+ if flag & FEXTRA :
435+ # Read & discard the extra field, if present
436+ extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
437+ _read_exact (fp , extra_len )
438+ if flag & FNAME :
439+ # Read and discard a null-terminated string containing the filename
440+ while True :
441+ s = fp .read (1 )
442+ if not s or s == b'\000 ' :
443+ break
444+ if flag & FCOMMENT :
445+ # Read and discard a null-terminated string containing a comment
446+ while True :
447+ s = fp .read (1 )
448+ if not s or s == b'\000 ' :
449+ break
450+ if flag & FHCRC :
451+ _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
452+ return last_mtime
453+
454+
402455class _GzipReader (_compression .DecompressReader ):
403456 def __init__ (self , fp ):
404457 super ().__init__ (_PaddedFile (fp ), zlib .decompressobj ,
@@ -411,53 +464,11 @@ def _init_read(self):
411464 self ._crc = zlib .crc32 (b"" )
412465 self ._stream_size = 0 # Decompressed size of unconcatenated stream
413466
414- def _read_exact (self , n ):
415- '''Read exactly *n* bytes from `self._fp`
416-
417- This method is required because self._fp may be unbuffered,
418- i.e. return short reads.
419- '''
420-
421- data = self ._fp .read (n )
422- while len (data ) < n :
423- b = self ._fp .read (n - len (data ))
424- if not b :
425- raise EOFError ("Compressed file ended before the "
426- "end-of-stream marker was reached" )
427- data += b
428- return data
429-
430467 def _read_gzip_header (self ):
431- magic = self ._fp . read ( 2 )
432- if magic == b'' :
468+ last_mtime = _read_gzip_header ( self ._fp )
469+ if last_mtime is None :
433470 return False
434-
435- if magic != b'\037 \213 ' :
436- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
437-
438- (method , flag ,
439- self ._last_mtime ) = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
440- if method != 8 :
441- raise BadGzipFile ('Unknown compression method' )
442-
443- if flag & FEXTRA :
444- # Read & discard the extra field, if present
445- extra_len , = struct .unpack ("<H" , self ._read_exact (2 ))
446- self ._read_exact (extra_len )
447- if flag & FNAME :
448- # Read and discard a null-terminated string containing the filename
449- while True :
450- s = self ._fp .read (1 )
451- if not s or s == b'\000 ' :
452- break
453- if flag & FCOMMENT :
454- # Read and discard a null-terminated string containing a comment
455- while True :
456- s = self ._fp .read (1 )
457- if not s or s == b'\000 ' :
458- break
459- if flag & FHCRC :
460- self ._read_exact (2 ) # Read & discard the 16-bit header CRC
471+ self ._last_mtime = last_mtime
461472 return True
462473
463474 def read (self , size = - 1 ):
@@ -520,7 +531,7 @@ def _read_eof(self):
520531 # We check that the computed CRC and size of the
521532 # uncompressed data matches the stored values. Note that the size
522533 # stored is the true file size mod 2**32.
523- crc32 , isize = struct .unpack ("<II" , self . _read_exact (8 ))
534+ crc32 , isize = struct .unpack ("<II" , _read_exact (self . _fp , 8 ))
524535 if crc32 != self ._crc :
525536 raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
526537 hex (self ._crc )))
@@ -540,21 +551,69 @@ def _rewind(self):
540551 super ()._rewind ()
541552 self ._new_member = True
542553
554+
555+ def _create_simple_gzip_header (compresslevel : int ,
556+ mtime = None ) -> bytes :
557+ """
558+ Write a simple gzip header with no extra fields.
559+ :param compresslevel: Compresslevel used to determine the xfl bytes.
560+ :param mtime: The mtime (must support conversion to a 32-bit integer).
561+ :return: A bytes object representing the gzip header.
562+ """
563+ if mtime is None :
564+ mtime = time .time ()
565+ if compresslevel == _COMPRESS_LEVEL_BEST :
566+ xfl = 2
567+ elif compresslevel == _COMPRESS_LEVEL_FAST :
568+ xfl = 4
569+ else :
570+ xfl = 0
571+ # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
572+ # fields added to header), mtime, xfl and os (255 for unknown OS).
573+ return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
574+
575+
543576def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
544577 """Compress data in one shot and return the compressed string.
545- Optional argument is the compression level, in range of 0-9.
578+
579+ compresslevel sets the compression level in range of 0-9.
580+ mtime can be used to set the modification time. The modification time is
581+ set to the current time by default.
546582 """
547- buf = io .BytesIO ()
548- with GzipFile (fileobj = buf , mode = 'wb' , compresslevel = compresslevel , mtime = mtime ) as f :
549- f .write (data )
550- return buf .getvalue ()
583+ if mtime == 0 :
584+ # Use zlib as it creates the header with 0 mtime by default.
585+ # This is faster and with less overhead.
586+ return zlib .compress (data , level = compresslevel , wbits = 31 )
587+ header = _create_simple_gzip_header (compresslevel , mtime )
588+ trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
589+ # Wbits=-15 creates a raw deflate block.
590+ return (header + zlib .compress (data , level = compresslevel , wbits = - 15 ) +
591+ trailer )
592+
551593
552594def decompress (data ):
553595 """Decompress a gzip compressed string in one shot.
554596 Return the decompressed string.
555597 """
556- with GzipFile (fileobj = io .BytesIO (data )) as f :
557- return f .read ()
598+ decompressed_members = []
599+ while True :
600+ fp = io .BytesIO (data )
601+ if _read_gzip_header (fp ) is None :
602+ return b"" .join (decompressed_members )
603+ # Use a zlib raw deflate compressor
604+ do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
605+ # Read all the data except the header
606+ decompressed = do .decompress (data [fp .tell ():])
607+ if not do .eof or len (do .unused_data ) < 8 :
608+ raise EOFError ("Compressed file ended before the end-of-stream "
609+ "marker was reached" )
610+ crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
611+ if crc != zlib .crc32 (decompressed ):
612+ raise BadGzipFile ("CRC check failed" )
613+ if length != (len (decompressed ) & 0xffffffff ):
614+ raise BadGzipFile ("Incorrect length of data produced" )
615+ decompressed_members .append (decompressed )
616+ data = do .unused_data [8 :].lstrip (b"\x00 " )
558617
559618
560619def main ():
0 commit comments