11/*-
22 * Copyright 2003-2005 Colin Percival
33 * Copyright 2012 Matthew Endsley
4- * Copyright 2024 Erick Ortiz
54 * All rights reserved
65 *
76 * Redistribution and use in source and binary forms, with or without
8- * modification, are permitted providing that the following conditions
7+ * modification, are permitted providing that the following conditions
98 * are met:
109 * 1. Redistributions of source code must retain the above copyright
1110 * notice, this list of conditions and the following disclaimer.
3433
3534#define MIN (x ,y ) (((x)<(y)) ? (x) : (y))
3635
36+ static int64_t median3 (int64_t a , int64_t b , int64_t c ) {
37+ return a < b ? (b < c ? b : a < c ? c : a ) : b > c ? b : a > c ? c : a ;
38+ }
39+
3740static void split (int64_t * indices , int64_t * values , int64_t start , int64_t length , int64_t offset ) {
3841 int64_t i , j , k , pivotValue , tmp , rangeStart , rangeEnd ;
42+ int64_t pivotStartValue , pivotEndValue ;
3943 if (length < 16 ) {
4044 for (k = start ; k < start + length ; k += j ) {
4145 j = 1 ;
@@ -57,7 +61,24 @@ static void split(int64_t* indices, int64_t* values, int64_t start, int64_t leng
5761 }
5862 return ;
5963 }
60- pivotValue = values [indices [start + length / 2 ] + offset ];
64+
65+ /* Select pivot, algorithm by Bentley & McIlroy */
66+ j = start + length / 2 ;
67+ k = start + length - 1 ;
68+ pivotValue = values [indices [j ] + offset ];
69+ pivotStartValue = values [indices [start ] + offset ];
70+ pivotEndValue = values [indices [k ] + offset ];
71+ if (length > 40 ) {
72+ /* Big array: Pseudomedian of 9 */
73+ tmp = length / 8 ;
74+ pivotValue = median3 (pivotValue , values [indices [j - tmp ] + offset ], values [indices [j + tmp ] + offset ]);
75+ pivotStartValue = median3 (pivotStartValue , values [indices [start + tmp ] + offset ],
76+ values [indices [start + tmp + tmp ] + offset ]);
77+ pivotEndValue = median3 (pivotEndValue , values [indices [k - tmp ] + offset ],
78+ values [indices [k - tmp - tmp ] + offset ]);
79+ } /* Else medium array: Pseudomedian of 3 */
80+ pivotValue = median3 (pivotValue , pivotStartValue , pivotEndValue );
81+
6182 rangeStart = 0 ;
6283 rangeEnd = 0 ;
6384 for (i = start ; i < start + length ; i ++ ) {
@@ -115,7 +136,6 @@ static void quickSuffixSort(int64_t* suffixArray, int64_t* sortedGroup, const ui
115136 charFreq [0 ] = 0 ;
116137 for (i = 0 ; i < inputSize ; i ++ )
117138 suffixArray [++ charFreq [inputString [i ]]] = i ;
118- suffixArray [0 ] = inputSize ;
119139 for (i = 0 ; i < inputSize ; i ++ )
120140 sortedGroup [i ] = charFreq [inputString [i ]];
121141 sortedGroup [inputSize ] = 0 ;
@@ -130,7 +150,8 @@ static void quickSuffixSort(int64_t* suffixArray, int64_t* sortedGroup, const ui
130150 groupLen -= suffixArray [i ];
131151 i -= suffixArray [i ];
132152 } else {
133- if (groupLen ) suffixArray [i - groupLen ] = - groupLen ;
153+ if (groupLen )
154+ suffixArray [i - groupLen ] = - groupLen ;
134155 groupLen = sortedGroup [suffixArray [i ]] + 1 - i ;
135156 split (suffixArray , sortedGroup , i , groupLen , height );
136157 i += groupLen ;
@@ -156,22 +177,26 @@ static int64_t calcMatchingLength(const uint8_t* oldData, int64_t oldDataSize, c
156177static int64_t binSearchSuffixArray (const int64_t * suffixArray , const uint8_t * oldData , int64_t oldDataSize ,
157178 const uint8_t * newData , int64_t newDataSize , int64_t start , int64_t end ,
158179 int64_t * bestMatchPosition ) {
159- int64_t x ;
180+ int64_t matchLengthStart , matchLengthEnd , midIndex , cmpsize ;
181+ int32_t res ;
160182 if (end - start < 2 ) {
161- int64_t y ;
162- x = calcMatchingLength (oldData + suffixArray [start ], oldDataSize - suffixArray [start ], newData , newDataSize );
163- y = calcMatchingLength (oldData + suffixArray [end ], oldDataSize - suffixArray [end ], newData , newDataSize );
164- if (x > y ) {
183+ matchLengthStart = calcMatchingLength (oldData + suffixArray [start ], oldDataSize - suffixArray [start ], newData , newDataSize );
184+ matchLengthEnd = calcMatchingLength (oldData + suffixArray [end ], oldDataSize - suffixArray [end ], newData , newDataSize );
185+ if (matchLengthStart > matchLengthEnd ) {
165186 * bestMatchPosition = suffixArray [start ];
166- return x ;
187+ return matchLengthStart ;
167188 }
168189 * bestMatchPosition = suffixArray [end ];
169- return y ;
190+ return matchLengthEnd ;
170191 }
171- x = start + (end - start ) / 2 ;
172- if (memcmp (oldData + suffixArray [x ], newData ,MIN (oldDataSize - suffixArray [x ], newDataSize )) < 0 )
173- return binSearchSuffixArray (suffixArray , oldData , oldDataSize , newData , newDataSize , x , end , bestMatchPosition );
174- return binSearchSuffixArray (suffixArray , oldData , oldDataSize , newData , newDataSize , start , x , bestMatchPosition );
192+ midIndex = start + (end - start ) / 2 ;
193+ if (memcmp (oldData + suffixArray [midIndex ], newData , MIN (oldDataSize - suffixArray [matchLengthStart ], newDataSize )) < 0 ) {
194+ cmpsize = MIN (oldDataSize - suffixArray [midIndex ], newDataSize );
195+ res = memcmp (oldData + suffixArray [midIndex ], newData , cmpsize );
196+ if (res < 0 || (res == 0 && cmpsize < newDataSize ))
197+ return binSearchSuffixArray (suffixArray , oldData , oldDataSize , newData , newDataSize , midIndex , end , bestMatchPosition );
198+ }
199+ return binSearchSuffixArray (suffixArray , oldData , oldDataSize , newData , newDataSize , start , midIndex , bestMatchPosition );
175200}
176201
177202static void offsetToBytes (const int64_t offset , uint8_t * bytebuf ) {
@@ -224,28 +249,29 @@ static int64_t writedata(struct bsdiff_stream* stream, const void* buffer, int64
224249}
225250
226251struct bsdiff_request {
227- const uint8_t * old ;
228- int64_t oldsize ;
229- const uint8_t * new ;
230- int64_t newsize ;
252+ const uint8_t * oldData ;
253+ int64_t oldDataSize ;
254+ const uint8_t * newData ;
255+ int64_t newDataSize ;
231256 struct bsdiff_stream * stream ;
232- int64_t * I ;
257+ int64_t * indices ;
233258 uint8_t * buffer ;
234259};
235260
236261static int bsdiff_internal (const struct bsdiff_request req ) {
237262 int64_t * suffix_array ,* rank_array ;
238263 int64_t currentScan , matchedPosition , matchedLength ;
239- int64_t lastScan , lastMatchedPosition , lastOffset ;
264+ int64_t lastScan , lastMatchedPosition , lastOffset , lastWriteNewScan , lastWriteOldPosition ;
265+ int64_t currentControlBlock [3 ], nextControlBlock [3 ];
240266 int64_t oldscore , scoreCompare ;
241267 int64_t score , scoreFront , lengthFront , scoreBack , lengthBack ;
242268 int64_t overlapLength , scoreOverlap , lengthOverlap ;
243269 int64_t i ;
244270 uint8_t * diffBuf ;
245271 uint8_t controlBuf [8 * 3 ];
246- if ((rank_array = req .stream -> malloc ((req .oldsize + 1 ) * sizeof (int64_t ))) == NULL ) return -1 ;
247- suffix_array = req .I ;
248- quickSuffixSort (suffix_array , rank_array , req .old , req .oldsize );
272+ if ((rank_array = req .stream -> malloc ((req .oldDataSize + 1 ) * sizeof (int64_t ))) == NULL ) return -1 ;
273+ suffix_array = req .indices ;
274+ quickSuffixSort (suffix_array , rank_array , req .oldData , req .oldDataSize );
249275 req .stream -> free (rank_array );
250276 diffBuf = req .buffer ;
251277 /* Compute the differences, writing ctrl as we go */
@@ -255,96 +281,139 @@ static int bsdiff_internal(const struct bsdiff_request req) {
255281 lastScan = 0 ;
256282 lastMatchedPosition = 0 ;
257283 lastOffset = 0 ;
258- while (currentScan < req .newsize ) {
284+ lastWriteNewScan = 0 ;
285+ lastWriteOldPosition = 0 ;
286+ memset (currentControlBlock , 0 , 3 );
287+ while (currentScan < req .newDataSize ) {
259288 oldscore = 0 ;
260- for (scoreCompare = currentScan += matchedLength ; currentScan < req .newsize ; currentScan ++ ) {
261- matchedLength = binSearchSuffixArray (suffix_array , req .old , req .oldsize , req .new + currentScan ,
262- req .newsize - currentScan ,
263- 0 , req .oldsize , & matchedPosition );
289+ for (scoreCompare = currentScan += matchedLength ; currentScan < req .newDataSize ; currentScan ++ ) {
290+ matchedLength = binSearchSuffixArray (suffix_array , req .oldData , req .oldDataSize , req .newData + currentScan ,
291+ req .newDataSize - currentScan ,
292+ 0 , req .oldDataSize , & matchedPosition );
264293 for (; scoreCompare < currentScan + matchedLength ; scoreCompare ++ )
265- if (( scoreCompare + lastOffset < req .oldsize ) &&
266- ( req .old [scoreCompare + lastOffset ] == req .new [scoreCompare ]) )
294+ if (scoreCompare + lastOffset < req .oldDataSize &&
295+ req .oldData [scoreCompare + lastOffset ] == req .newData [scoreCompare ])
267296 oldscore ++ ;
268- if ((( matchedLength == oldscore ) && ( matchedLength != 0 ) ) ||
269- ( matchedLength > oldscore + 8 ) )
297+ if ((matchedLength == oldscore && matchedLength != 0 ) ||
298+ matchedLength > oldscore + 8 )
270299 break ;
271- if (( currentScan + lastOffset < req .oldsize ) &&
272- ( req .old [currentScan + lastOffset ] == req .new [currentScan ]) )
300+ if (currentScan + lastOffset < req .oldDataSize &&
301+ req .oldData [currentScan + lastOffset ] == req .newData [currentScan ])
273302 oldscore -- ;
274303 }
275- if (matchedLength != oldscore || currentScan == req .newsize ) {
304+ if (matchedLength != oldscore || currentScan == req .newDataSize ) {
276305 score = 0 ;
277306 scoreFront = 0 ;
278307 lengthFront = 0 ;
279- for (i = 0 ; ( lastScan + i < currentScan ) && ( lastMatchedPosition + i < req .oldsize ) ;) {
280- if (req .old [lastMatchedPosition + i ] == req .new [lastScan + i ]) score ++ ;
308+ for (i = 0 ; lastScan + i < currentScan && lastMatchedPosition + i < req .oldDataSize ;) {
309+ if (req .oldData [lastMatchedPosition + i ] == req .newData [lastScan + i ]) score ++ ;
281310 i ++ ;
282311 if (score * 2 - i > scoreFront * 2 - lengthFront ) {
283312 scoreFront = score ;
284313 lengthFront = i ;
285- };
286- };
314+ }
315+ }
287316
288317 lengthBack = 0 ;
289- if (currentScan < req .newsize ) {
318+ if (currentScan < req .newDataSize ) {
290319 score = 0 ;
291320 scoreBack = 0 ;
292321 for (i = 1 ; (currentScan >= lastScan + i ) && (matchedPosition >= i ); i ++ ) {
293- if (req .old [matchedPosition - i ] == req .new [currentScan - i ]) score ++ ;
322+ if (req .oldData [matchedPosition - i ] == req .newData [currentScan - i ]) score ++ ;
294323 if (score * 2 - i > scoreBack * 2 - lengthBack ) {
295324 scoreBack = score ;
296325 lengthBack = i ;
297- };
298- };
299- };
326+ }
327+ }
328+ }
300329
301330 if (lastScan + lengthFront > currentScan - lengthBack ) {
302331 overlapLength = (lastScan + lengthFront ) - (currentScan - lengthBack );
303332 score = 0 ;
304333 scoreOverlap = 0 ;
305334 lengthOverlap = 0 ;
306335 for (i = 0 ; i < overlapLength ; i ++ ) {
307- if (req .new [lastScan + lengthFront - overlapLength + i ] ==
308- req .old [lastMatchedPosition + lengthFront - overlapLength + i ])
336+ if (req .newData [lastScan + lengthFront - overlapLength + i ] ==
337+ req .oldData [lastMatchedPosition + lengthFront - overlapLength + i ])
309338 score ++ ;
310- if (req .new [currentScan - lengthBack + i ] ==
311- req .old [matchedPosition - lengthBack + i ])
339+ if (req .newData [currentScan - lengthBack + i ] ==
340+ req .oldData [matchedPosition - lengthBack + i ])
312341 score -- ;
313342 if (score > scoreOverlap ) {
314343 scoreOverlap = score ;
315344 lengthOverlap = i + 1 ;
316- };
317- };
345+ }
346+ }
318347
319348 lengthFront += lengthOverlap - overlapLength ;
320349 lengthBack -= lengthOverlap ;
321- };
350+ }
322351
323- offsetToBytes ( lengthFront , controlBuf ) ;
324- offsetToBytes (( currentScan - lengthBack ) - (lastScan + lengthFront ), controlBuf + 8 );
325- offsetToBytes (( matchedPosition - lengthBack ) - (lastMatchedPosition + lengthFront ), controlBuf + 16 );
352+ nextControlBlock [ 0 ] = lengthFront ;
353+ nextControlBlock [ 1 ] = currentScan - lengthBack - (lastScan + lengthFront );
354+ nextControlBlock [ 2 ] = matchedPosition - lengthBack - (lastMatchedPosition + lengthFront );
326355
327- /* Write control data */
328- if (writedata (req .stream , controlBuf , sizeof (controlBuf )))
329- return -1 ;
356+ if (nextControlBlock [0 ]) {
357+ if (currentControlBlock [0 ] || currentControlBlock [1 ] || currentControlBlock [2 ]) {
358+ offsetToBytes (currentControlBlock [0 ], controlBuf );
359+ offsetToBytes (currentControlBlock [1 ], controlBuf + 8 );
360+ offsetToBytes (currentControlBlock [2 ], controlBuf + 16 );
330361
331- /* Write diff data */
332- for (i = 0 ; i < lengthFront ; i ++ )
333- diffBuf [i ] = req .new [lastScan + i ] - req .old [lastMatchedPosition + i ];
334- if (writedata (req .stream , diffBuf , lengthFront ))
335- return -1 ;
362+ /* Write control data */
363+ if (writedata (req .stream , controlBuf , sizeof (controlBuf )))
364+ return -1 ;
336365
337- /* Write extra data */
338- for (i = 0 ; i < (currentScan - lengthBack ) - (lastScan + lengthFront ); i ++ )
339- diffBuf [i ] = req .new [lastScan + lengthFront + i ];
340- if (writedata (req .stream , diffBuf , (currentScan - lengthBack ) - (lastScan + lengthFront )))
341- return -1 ;
366+ /* Write diff data */
367+ for (i = 0 ; i < currentControlBlock [0 ]; i ++ )
368+ diffBuf [i ] = req .newData [lastWriteNewScan + i ] - req .oldData [lastWriteOldPosition + i ];
369+
370+ if (writedata (req .stream , diffBuf , currentControlBlock [0 ]))
371+ return -1 ;
372+
373+ /* Write extra data */
374+ for (i = 0 ; i < currentControlBlock [1 ]; i ++ )
375+ diffBuf [i ] = req .newData [lastWriteNewScan + currentControlBlock [0 ] + i ];
376+ if (writedata (req .stream , diffBuf , currentControlBlock [1 ]))
377+ return -1 ;
378+
379+ lastWriteNewScan = lastScan ;
380+ lastWriteOldPosition = lastMatchedPosition ;
381+ }
382+ currentControlBlock [0 ] = nextControlBlock [0 ];
383+ currentControlBlock [1 ] = nextControlBlock [1 ];
384+ currentControlBlock [2 ] = nextControlBlock [2 ];
385+ } else {
386+ currentControlBlock [1 ] += nextControlBlock [1 ];
387+ currentControlBlock [2 ] += nextControlBlock [2 ];
388+ }
342389
343390 lastScan = currentScan - lengthBack ;
344391 lastMatchedPosition = matchedPosition - lengthBack ;
345392 lastOffset = matchedPosition - currentScan ;
346- };
347- };
393+ }
394+ }
395+
396+ if (currentControlBlock [0 ] || currentControlBlock [1 ]) {
397+ offsetToBytes (currentControlBlock [0 ], controlBuf );
398+ offsetToBytes (currentControlBlock [1 ], controlBuf + 8 );
399+ offsetToBytes (currentControlBlock [2 ], controlBuf + 16 );
400+
401+ /* Write control data */
402+ if (writedata (req .stream , controlBuf , sizeof (controlBuf )))
403+ return -1 ;
404+
405+ /* Write diff data */
406+ for (i = 0 ; i < currentControlBlock [0 ]; i ++ )
407+ diffBuf [i ] = req .newData [lastWriteNewScan + i ] - req .oldData [lastWriteOldPosition + i ];
408+ if (writedata (req .stream , diffBuf , currentControlBlock [0 ]))
409+ return -1 ;
410+
411+ /* Write extra data */
412+ for (i = 0 ; i < currentControlBlock [1 ]; i ++ )
413+ diffBuf [i ] = req .newData [lastWriteNewScan + currentControlBlock [0 ] + i ];
414+ if (writedata (req .stream , diffBuf , currentControlBlock [1 ]))
415+ return -1 ;
416+ }
348417
349418 return 0 ;
350419}
@@ -353,24 +422,24 @@ int bsdiff(const uint8_t* old, int64_t oldsize, const uint8_t* new, int64_t news
353422 int result ;
354423 struct bsdiff_request req ;
355424
356- if ((req .I = stream -> malloc ((oldsize + 1 ) * sizeof (int64_t ))) == NULL )
425+ if ((req .indices = stream -> malloc ((oldsize + 1 ) * sizeof (int64_t ))) == NULL )
357426 return -1 ;
358427
359428 if ((req .buffer = stream -> malloc (newsize + 1 )) == NULL ) {
360- stream -> free (req .I );
429+ stream -> free (req .indices );
361430 return -1 ;
362431 }
363432
364- req .old = old ;
365- req .oldsize = oldsize ;
366- req .new = new ;
367- req .newsize = newsize ;
433+ req .oldData = old ;
434+ req .oldDataSize = oldsize ;
435+ req .newData = new ;
436+ req .newDataSize = newsize ;
368437 req .stream = stream ;
369438
370439 result = bsdiff_internal (req );
371440
372441 stream -> free (req .buffer );
373- stream -> free (req .I );
442+ stream -> free (req .indices );
374443
375444 return result ;
376445}
@@ -464,7 +533,7 @@ int main(int argc, char* argv[]) {
464533 return 1 ;
465534 }
466535
467- /* Write header (signature+newsize)*/
536+ /* Write header (signature+newsize) */
468537 offsetToBytes (newsize , buf );
469538 if (fwrite ("ENDSLEY/BSDIFF43" , 16 , 1 , pf ) != 1 ||
470539 fwrite (buf , sizeof (buf ), 1 , pf ) != 1 ) {
0 commit comments