@@ -86,11 +86,8 @@ def unpack(stream, **kwargs):
8686 Raises `ExtraData` when `packed` contains extra bytes.
8787 See :class:`Unpacker` for options.
8888 """
89- unpacker = Unpacker (stream , ** kwargs )
90- ret = unpacker ._fb_unpack ()
91- if unpacker ._fb_got_extradata ():
92- raise ExtraData (ret , unpacker ._fb_get_extradata ())
93- return ret
89+ data = stream .read ()
90+ return unpackb (data , ** kwargs )
9491
9592
9693def unpackb (packed , ** kwargs ):
@@ -121,7 +118,7 @@ class Unpacker(object):
121118 If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
122119
123120 :param int read_size:
124- Used as `file_like.read(read_size)`. (default: `min(1024**2 , max_buffer_size)`)
121+ Used as `file_like.read(read_size)`. (default: `min(16*1024 , max_buffer_size)`)
125122
126123 :param bool use_list:
127124 If true, unpack msgpack array to Python list.
@@ -199,27 +196,23 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
199196 self ._fb_feeding = False
200197
201198 #: array of bytes feeded.
202- self ._fb_buffers = []
203- #: Which buffer we currently reads
204- self ._fb_buf_i = 0
199+ self ._buffer = b""
205200 #: Which position we currently reads
206- self ._fb_buf_o = 0
207- #: Total size of _fb_bufferes
208- self ._fb_buf_n = 0
201+ self ._buff_i = 0
209202
210203 # When Unpacker is used as an iterable, between the calls to next(),
211204 # the buffer is not "consumed" completely, for efficiency sake.
212205 # Instead, it is done sloppily. To make sure we raise BufferFull at
213206 # the correct moments, we have to keep track of how sloppy we were.
214207 # Furthermore, when the buffer is incomplete (that is: in the case
215208 # we raise an OutOfData) we need to rollback the buffer to the correct
216- # state, which _fb_slopiness records.
217- self ._fb_sloppiness = 0
209+ # state, which _buf_checkpoint records.
210+ self ._buf_checkpoint = 0
218211
219212 self ._max_buffer_size = max_buffer_size or 2 ** 31 - 1
220213 if read_size > self ._max_buffer_size :
221214 raise ValueError ("read_size must be smaller than max_buffer_size" )
222- self ._read_size = read_size or min (self ._max_buffer_size , 4096 )
215+ self ._read_size = read_size or min (self ._max_buffer_size , 16 * 1024 )
223216 self ._encoding = encoding
224217 self ._unicode_errors = unicode_errors
225218 self ._use_list = use_list
@@ -248,103 +241,75 @@ def __init__(self, file_like=None, read_size=0, use_list=True,
248241 def feed (self , next_bytes ):
249242 if isinstance (next_bytes , array .array ):
250243 next_bytes = next_bytes .tostring ()
251- elif isinstance (next_bytes , bytearray ):
252- next_bytes = bytes ( next_bytes )
244+ if not isinstance (next_bytes , ( bytes , bytearray ) ):
245+ raise TypeError ( " next_bytes should be bytes, bytearray or array.array" )
253246 assert self ._fb_feeding
254- if ( self . _fb_buf_n + len ( next_bytes ) - self . _fb_sloppiness
255- > self ._max_buffer_size ):
247+
248+ if ( len ( self . _buffer ) - self . _buff_i + len ( next_bytes ) > self ._max_buffer_size ):
256249 raise BufferFull
257- self ._fb_buf_n += len (next_bytes )
258- self ._fb_buffers .append (next_bytes )
259-
260- def _fb_sloppy_consume (self ):
261- """ Gets rid of some of the used parts of the buffer. """
262- if self ._fb_buf_i :
263- for i in xrange (self ._fb_buf_i ):
264- self ._fb_buf_n -= len (self ._fb_buffers [i ])
265- self ._fb_buffers = self ._fb_buffers [self ._fb_buf_i :]
266- self ._fb_buf_i = 0
267- if self ._fb_buffers :
268- self ._fb_sloppiness = self ._fb_buf_o
269- else :
270- self ._fb_sloppiness = 0
250+ # bytes + bytearray -> bytearray
251+ # So cast before append
252+ self ._buffer += bytes (next_bytes )
271253
272254 def _fb_consume (self ):
273255 """ Gets rid of the used parts of the buffer. """
274- if self ._fb_buf_i :
275- for i in xrange (self ._fb_buf_i ):
276- self ._fb_buf_n -= len (self ._fb_buffers [i ])
277- self ._fb_buffers = self ._fb_buffers [self ._fb_buf_i :]
278- self ._fb_buf_i = 0
279- if self ._fb_buffers :
280- self ._fb_buffers [0 ] = self ._fb_buffers [0 ][self ._fb_buf_o :]
281- self ._fb_buf_n -= self ._fb_buf_o
282- else :
283- self ._fb_buf_n = 0
284- self ._fb_buf_o = 0
285- self ._fb_sloppiness = 0
256+ self ._buf_checkpoint = self ._buff_i
286257
287258 def _fb_got_extradata (self ):
288- if self ._fb_buf_i != len (self ._fb_buffers ):
289- return True
290- if self ._fb_feeding :
291- return False
292- if not self .file_like :
293- return False
294- if self .file_like .read (1 ):
295- return True
296- return False
259+ return self ._buff_i < len (self ._buffer )
297260
298- def __iter__ (self ):
299- return self
261+ def _fb_get_extradata (self ):
262+ return self . _buffer [ self . _buff_i :]
300263
301264 def read_bytes (self , n ):
302265 return self ._fb_read (n )
303266
304- def _fb_rollback (self ):
305- self ._fb_buf_i = 0
306- self ._fb_buf_o = self ._fb_sloppiness
267+ def _fb_read (self , n , write_bytes = None ):
268+ # (int, Optional[Callable]) -> bytearray
269+ remain_bytes = len (self ._buffer ) - self ._buff_i - n
270+
271+ # Fast path: buffer has n bytes already
272+ if remain_bytes >= 0 :
273+ ret = self ._buffer [self ._buff_i :self ._buff_i + n ]
274+ self ._buff_i += n
275+ if write_bytes is not None :
276+ write_bytes (ret )
277+ return ret
307278
308- def _fb_get_extradata (self ):
309- bufs = self ._fb_buffers [self ._fb_buf_i :]
310- if bufs :
311- bufs [0 ] = bufs [0 ][self ._fb_buf_o :]
312- return b'' .join (bufs )
279+ if self ._fb_feeding :
280+ self ._buff_i = self ._buf_checkpoint
281+ raise OutOfData
313282
314- def _fb_read (self , n , write_bytes = None ):
315- buffs = self ._fb_buffers
316- # We have a redundant codepath for the most common case, such that
317- # pypy optimizes it properly. This is the case that the read fits
318- # in the current buffer.
319- if (write_bytes is None and self ._fb_buf_i < len (buffs ) and
320- self ._fb_buf_o + n < len (buffs [self ._fb_buf_i ])):
321- self ._fb_buf_o += n
322- return buffs [self ._fb_buf_i ][self ._fb_buf_o - n :self ._fb_buf_o ]
323-
324- # The remaining cases.
325- ret = b''
326- while len (ret ) != n :
327- sliced = n - len (ret )
328- if self ._fb_buf_i == len (buffs ):
329- if self ._fb_feeding :
330- break
331- to_read = sliced
332- if self ._read_size > to_read :
333- to_read = self ._read_size
334- tmp = self .file_like .read (to_read )
335- if not tmp :
336- break
337- buffs .append (tmp )
338- self ._fb_buf_n += len (tmp )
339- continue
340- ret += buffs [self ._fb_buf_i ][self ._fb_buf_o :self ._fb_buf_o + sliced ]
341- self ._fb_buf_o += sliced
342- if self ._fb_buf_o >= len (buffs [self ._fb_buf_i ]):
343- self ._fb_buf_o = 0
344- self ._fb_buf_i += 1
345- if len (ret ) != n :
346- self ._fb_rollback ()
283+ # Strip buffer before checkpoint before reading file.
284+ if self ._buf_checkpoint > 0 :
285+ self ._buffer = self ._buffer [self ._buf_checkpoint :]
286+ self ._buff_i -= self ._buf_checkpoint
287+ self ._buf_checkpoint = 0
288+
289+ # Read from file
290+ remain_bytes = - remain_bytes
291+ while remain_bytes > 0 :
292+ to_read_bytes = max (self ._read_size , remain_bytes )
293+ read_data = self .file_like .read (to_read_bytes )
294+ if not read_data :
295+ break
296+ assert isinstance (read_data , bytes )
297+ self ._buffer += read_data
298+ remain_bytes -= len (read_data )
299+
300+ if len (self ._buffer ) < n + self ._buff_i :
301+ self ._buff_i = 0 # rollback
347302 raise OutOfData
303+
304+ if len (self ._buffer ) == n :
305+ # checkpoint == 0
306+ ret = self ._buffer
307+ self ._buffer = b""
308+ self ._buff_i = 0
309+ else :
310+ ret = self ._buffer [self ._buff_i :self ._buff_i + n ]
311+ self ._buff_i += n
312+
348313 if write_bytes is not None :
349314 write_bytes (ret )
350315 return ret
@@ -562,15 +527,19 @@ def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None):
562527 assert typ == TYPE_IMMEDIATE
563528 return obj
564529
565- def next (self ):
530+ def __iter__ (self ):
531+ return self
532+
533+ def __next__ (self ):
566534 try :
567535 ret = self ._fb_unpack (EX_CONSTRUCT , None )
568- self ._fb_sloppy_consume ()
536+ self ._fb_consume ()
569537 return ret
570538 except OutOfData :
571539 self ._fb_consume ()
572540 raise StopIteration
573- __next__ = next
541+
542+ next = __next__
574543
575544 def skip (self , write_bytes = None ):
576545 self ._fb_unpack (EX_SKIP , write_bytes )
0 commit comments