Skip to content

Commit f908b18

Browse files
authored
Merge pull request libgit2#4327 from libgit2/peff/drop-sha1-entry-pos
sha1_lookup: drop sha1_entry_pos function
2 parents e536fa3 + 9842b32 commit f908b18

File tree

3 files changed

+0
-224
lines changed

3 files changed

+0
-224
lines changed

src/pack.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,11 +1316,7 @@ static int pack_entry_find_offset(
13161316
short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
13171317
#endif
13181318

1319-
#ifdef GIT_USE_LOOKUP
1320-
pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id);
1321-
#else
13221319
pos = sha1_position(index, stride, lo, hi, short_oid->id);
1323-
#endif
13241320

13251321
if (pos >= 0) {
13261322
/* An object matching exactly the oid was found */

src/sha1_lookup.c

Lines changed: 0 additions & 214 deletions
Original file line numberDiff line numberDiff line change
@@ -11,220 +11,6 @@
1111
#include "common.h"
1212
#include "oid.h"
1313

14-
/*
15-
* Conventional binary search loop looks like this:
16-
*
17-
* unsigned lo, hi;
18-
* do {
19-
* unsigned mi = (lo + hi) / 2;
20-
* int cmp = "entry pointed at by mi" minus "target";
21-
* if (!cmp)
22-
* return (mi is the wanted one)
23-
* if (cmp > 0)
24-
* hi = mi; "mi is larger than target"
25-
* else
26-
* lo = mi+1; "mi is smaller than target"
27-
* } while (lo < hi);
28-
*
29-
* The invariants are:
30-
*
31-
* - When entering the loop, lo points at a slot that is never
32-
* above the target (it could be at the target), hi points at a
33-
* slot that is guaranteed to be above the target (it can never
34-
* be at the target).
35-
*
36-
* - We find a point 'mi' between lo and hi (mi could be the same
37-
* as lo, but never can be as same as hi), and check if it hits
38-
* the target. There are three cases:
39-
*
40-
* - if it is a hit, we are happy.
41-
*
42-
* - if it is strictly higher than the target, we set it to hi,
43-
* and repeat the search.
44-
*
45-
* - if it is strictly lower than the target, we update lo to
46-
* one slot after it, because we allow lo to be at the target.
47-
*
48-
* If the loop exits, there is no matching entry.
49-
*
50-
* When choosing 'mi', we do not have to take the "middle" but
51-
* anywhere in between lo and hi, as long as lo <= mi < hi is
52-
* satisfied. When we somehow know that the distance between the
53-
* target and lo is much shorter than the target and hi, we could
54-
* pick mi that is much closer to lo than the midway.
55-
*
56-
* Now, we can take advantage of the fact that SHA-1 is a good hash
57-
* function, and as long as there are enough entries in the table, we
58-
* can expect uniform distribution. An entry that begins with for
59-
* example "deadbeef..." is much likely to appear much later than in
60-
* the midway of the table. It can reasonably be expected to be near
61-
* 87% (222/256) from the top of the table.
62-
*
63-
* However, we do not want to pick "mi" too precisely. If the entry at
64-
* the 87% in the above example turns out to be higher than the target
65-
* we are looking for, we would end up narrowing the search space down
66-
* only by 13%, instead of 50% we would get if we did a simple binary
67-
* search. So we would want to hedge our bets by being less aggressive.
68-
*
69-
* The table at "table" holds at least "nr" entries of "elem_size"
70-
* bytes each. Each entry has the SHA-1 key at "key_offset". The
71-
* table is sorted by the SHA-1 key of the entries. The caller wants
72-
* to find the entry with "key", and knows that the entry at "lo" is
73-
* not higher than the entry it is looking for, and that the entry at
74-
* "hi" is higher than the entry it is looking for.
75-
*/
76-
int sha1_entry_pos(const void *table,
77-
size_t elem_size,
78-
size_t key_offset,
79-
unsigned lo, unsigned hi, unsigned nr,
80-
const unsigned char *key)
81-
{
82-
const unsigned char *base = (const unsigned char*)table;
83-
const unsigned char *hi_key, *lo_key;
84-
unsigned ofs_0;
85-
86-
if (!nr || lo >= hi)
87-
return -1;
88-
89-
if (nr == hi)
90-
hi_key = NULL;
91-
else
92-
hi_key = base + elem_size * hi + key_offset;
93-
lo_key = base + elem_size * lo + key_offset;
94-
95-
ofs_0 = 0;
96-
do {
97-
int cmp;
98-
unsigned ofs, mi, range;
99-
unsigned lov, hiv, kyv;
100-
const unsigned char *mi_key;
101-
102-
range = hi - lo;
103-
if (hi_key) {
104-
for (ofs = ofs_0; ofs < 20; ofs++)
105-
if (lo_key[ofs] != hi_key[ofs])
106-
break;
107-
ofs_0 = ofs;
108-
/*
109-
* byte 0 thru (ofs-1) are the same between
110-
* lo and hi; ofs is the first byte that is
111-
* different.
112-
*
113-
* If ofs==20, then no bytes are different,
114-
* meaning we have entries with duplicate
115-
* keys. We know that we are in a solid run
116-
* of this entry (because the entries are
117-
* sorted, and our lo and hi are the same,
118-
* there can be nothing but this single key
119-
* in between). So we can stop the search.
120-
* Either one of these entries is it (and
121-
* we do not care which), or we do not have
122-
* it.
123-
*
124-
* Furthermore, we know that one of our
125-
* endpoints must be the edge of the run of
126-
* duplicates. For example, given this
127-
* sequence:
128-
*
129-
* idx 0 1 2 3 4 5
130-
* key A C C C C D
131-
*
132-
* If we are searching for "B", we might
133-
* hit the duplicate run at lo=1, hi=3
134-
* (e.g., by first mi=3, then mi=0). But we
135-
* can never have lo > 1, because B < C.
136-
* That is, if our key is less than the
137-
* run, we know that "lo" is the edge, but
138-
* we can say nothing of "hi". Similarly,
139-
* if our key is greater than the run, we
140-
* know that "hi" is the edge, but we can
141-
* say nothing of "lo".
142-
*
143-
* Therefore if we do not find it, we also
144-
* know where it would go if it did exist:
145-
* just on the far side of the edge that we
146-
* know about.
147-
*/
148-
if (ofs == 20) {
149-
mi = lo;
150-
mi_key = base + elem_size * mi + key_offset;
151-
cmp = memcmp(mi_key, key, 20);
152-
if (!cmp)
153-
return mi;
154-
if (cmp < 0)
155-
return -1 - hi;
156-
else
157-
return -1 - lo;
158-
}
159-
160-
hiv = hi_key[ofs_0];
161-
if (ofs_0 < 19)
162-
hiv = (hiv << 8) | hi_key[ofs_0+1];
163-
} else {
164-
hiv = 256;
165-
if (ofs_0 < 19)
166-
hiv <<= 8;
167-
}
168-
lov = lo_key[ofs_0];
169-
kyv = key[ofs_0];
170-
if (ofs_0 < 19) {
171-
lov = (lov << 8) | lo_key[ofs_0+1];
172-
kyv = (kyv << 8) | key[ofs_0+1];
173-
}
174-
assert(lov < hiv);
175-
176-
if (kyv < lov)
177-
return -1 - lo;
178-
if (hiv < kyv)
179-
return -1 - hi;
180-
181-
/*
182-
* Even if we know the target is much closer to 'hi'
183-
* than 'lo', if we pick too precisely and overshoot
184-
* (e.g. when we know 'mi' is closer to 'hi' than to
185-
* 'lo', pick 'mi' that is higher than the target), we
186-
* end up narrowing the search space by a smaller
187-
* amount (i.e. the distance between 'mi' and 'hi')
188-
* than what we would have (i.e. about half of 'lo'
189-
* and 'hi'). Hedge our bets to pick 'mi' less
190-
* aggressively, i.e. make 'mi' a bit closer to the
191-
* middle than we would otherwise pick.
192-
*/
193-
kyv = (kyv * 6 + lov + hiv) / 8;
194-
if (lov < hiv - 1) {
195-
if (kyv == lov)
196-
kyv++;
197-
else if (kyv == hiv)
198-
kyv--;
199-
}
200-
mi = (range - 1) * (kyv - lov) / (hiv - lov) + lo;
201-
202-
#ifdef INDEX_DEBUG_LOOKUP
203-
printf("lo %u hi %u rg %u mi %u ", lo, hi, range, mi);
204-
printf("ofs %u lov %x, hiv %x, kyv %x\n",
205-
ofs_0, lov, hiv, kyv);
206-
#endif
207-
208-
if (!(lo <= mi && mi < hi)) {
209-
giterr_set(GITERR_INVALID, "assertion failure: binary search invariant is false");
210-
return -1;
211-
}
212-
213-
mi_key = base + elem_size * mi + key_offset;
214-
cmp = memcmp(mi_key + ofs_0, key + ofs_0, 20 - ofs_0);
215-
if (!cmp)
216-
return mi;
217-
if (cmp > 0) {
218-
hi = mi;
219-
hi_key = mi_key;
220-
} else {
221-
lo = mi + 1;
222-
lo_key = mi_key + elem_size;
223-
}
224-
} while (lo < hi);
225-
return -((int)lo)-1;
226-
}
227-
22814
int sha1_position(const void *table,
22915
size_t stride,
23016
unsigned lo, unsigned hi,

src/sha1_lookup.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,6 @@
99

1010
#include <stdlib.h>
1111

12-
int sha1_entry_pos(const void *table,
13-
size_t elem_size,
14-
size_t key_offset,
15-
unsigned lo, unsigned hi, unsigned nr,
16-
const unsigned char *key);
17-
1812
int sha1_position(const void *table,
1913
size_t stride,
2014
unsigned lo, unsigned hi,

0 commit comments

Comments
 (0)