Skip to content

Commit f2400a9

Browse files
committed
config_entries: Avoid excessive map operations
When appending config entries, we currently always first get the currently existing map entry and then afterwards update the map to contain the current config value. In the common scenario where keys aren't being overridden, this is the best we can do. But in case a key gets set multiple times, then we'll also perform these two map operations. In extreme cases, hashing the map keys will thus start to dominate performance. Let's optimize the pattern by using a separately allocated map entry. Currently, we always put the current list entry into the map and update it to get any overridden multivar. As these list entries are also used to iterate config entries, we cannot update them in-place in the map and are thus forced to always set the map to contain the new entry. But with a separately allocated map entry, we can now create one once per config key and insert it into the map. Whenever appending a new config value with the same key, we can now just update the map entry in-place instead of having to replace the map entry completely. This reduces calls to the hashing function by half and trades the improved runtime for one more allocation per unique config key. Given that the refactoring arguably improves code readability by splitting concerns of the `config_entry_list` type and not having to track it in two different structures, this alone would already be reason enough to take the trade. Given a pathological case of a gitconfig with 100.000 repeated keys and a section of length 10.000 characters, this reduces runtime by half from approximately 14 seconds to 7 seconds as expected.
1 parent ae30009 commit f2400a9

File tree

1 file changed

+35
-27
lines changed

1 file changed

+35
-27
lines changed

src/config_entries.c

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@ typedef struct config_entry_list {
1111
struct config_entry_list *next;
1212
struct config_entry_list *last;
1313
git_config_entry *entry;
14-
bool first;
1514
} config_entry_list;
1615

16+
typedef struct {
17+
git_config_entry *entry;
18+
bool multivar;
19+
} config_entry_map_head;
20+
1721
typedef struct config_entries_iterator {
1822
git_config_iterator parent;
1923
git_config_entries *entries;
@@ -102,14 +106,16 @@ void git_config_entries_incref(git_config_entries *entries)
102106
static void config_entries_free(git_config_entries *entries)
103107
{
104108
config_entry_list *list = NULL, *next;
109+
config_entry_map_head *head;
105110

111+
git_strmap_foreach_value(entries->map, head,
112+
git__free((char *) head->entry->name); git__free(head)
113+
);
106114
git_strmap_free(entries->map);
107115

108116
list = entries->list;
109117
while (list != NULL) {
110118
next = list->next;
111-
if (list->first)
112-
git__free((char *) list->entry->name);
113119
git__free((char *) list->entry->value);
114120
git__free(list->entry);
115121
git__free(list);
@@ -127,40 +133,42 @@ void git_config_entries_free(git_config_entries *entries)
127133

128134
int git_config_entries_append(git_config_entries *entries, git_config_entry *entry)
129135
{
130-
config_entry_list *existing, *head;
131-
132-
head = git__calloc(1, sizeof(config_entry_list));
133-
GIT_ERROR_CHECK_ALLOC(head);
134-
head->entry = entry;
135-
136-
/*
137-
* This is a micro-optimization for configuration files
138-
* with a lot of same keys. As for multivars the entry's
139-
* key will be the same for all entries, we can just free
140-
* all except the first entry's name and just re-use it.
141-
*/
142-
if ((existing = git_strmap_get(entries->map, entry->name)) != NULL) {
136+
config_entry_list *list_head;
137+
config_entry_map_head *map_head;
138+
139+
if ((map_head = git_strmap_get(entries->map, entry->name)) != NULL) {
140+
map_head->multivar = true;
141+
/*
142+
* This is a micro-optimization for configuration files
143+
* with a lot of same keys. As for multivars the entry's
144+
* key will be the same for all entries, we can just free
145+
* all except the first entry's name and just re-use it.
146+
*/
143147
git__free((char *) entry->name);
144-
entry->name = existing->entry->name;
148+
entry->name = map_head->entry->name;
145149
} else {
146-
head->first = 1;
150+
map_head = git__calloc(1, sizeof(*map_head));
151+
if ((git_strmap_set(entries->map, entry->name, map_head)) < 0)
152+
return -1;
147153
}
154+
map_head->entry = entry;
155+
156+
list_head = git__calloc(1, sizeof(config_entry_list));
157+
GIT_ERROR_CHECK_ALLOC(list_head);
158+
list_head->entry = entry;
148159

149160
if (entries->list)
150-
entries->list->last->next = head;
161+
entries->list->last->next = list_head;
151162
else
152-
entries->list = head;
153-
entries->list->last = head;
154-
155-
if (git_strmap_set(entries->map, entry->name, head) < 0)
156-
return -1;
163+
entries->list = list_head;
164+
entries->list->last = list_head;
157165

158166
return 0;
159167
}
160168

161169
int git_config_entries_get(git_config_entry **out, git_config_entries *entries, const char *key)
162170
{
163-
config_entry_list *entry;
171+
config_entry_map_head *entry;
164172
if ((entry = git_strmap_get(entries->map, key)) == NULL)
165173
return GIT_ENOTFOUND;
166174
*out = entry->entry;
@@ -169,12 +177,12 @@ int git_config_entries_get(git_config_entry **out, git_config_entries *entries,
169177

170178
int git_config_entries_get_unique(git_config_entry **out, git_config_entries *entries, const char *key)
171179
{
172-
config_entry_list *entry;
180+
config_entry_map_head *entry;
173181

174182
if ((entry = git_strmap_get(entries->map, key)) == NULL)
175183
return GIT_ENOTFOUND;
176184

177-
if (!entry->first) {
185+
if (entry->multivar) {
178186
git_error_set(GIT_ERROR_CONFIG, "entry is not unique due to being a multivar");
179187
return -1;
180188
}

0 commit comments

Comments
 (0)