Skip to content

Commit d7f58ea

Browse files
committed
config_file: implement stat cache to avoid repeated rehashing
To decide whether a config file has changed, we always hash its complete contents. This is unnecessarily expensive, as well-behaved filesystems will always update stat information for files which have changed. So before computing the hash, we should first check whether the stat info has actually changed for either the configuration file or any of its includes. This avoids having to re-read the configuration file and its includes every time when we check whether it's been modified. Tracing the for-each-ref example previous to this commit, one can see that we repeatedly re-open both the repo configuration as well as the global configuration: $ strace lg2 for-each-ref |& grep config access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory) access("/home/pks/.config/git/config", F_OK) = 0 access("/etc/gitconfig", F_OK) = -1 ENOENT (No such file or directory) stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 access("/tmp/repo/.git/config", F_OK) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c05290) = -1 ENOENT (No such file or directory) access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 access("/home/pks/.config/git/config", F_OK) = 0 stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c051f0) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffd15c05090) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 With the change, we only do stats for those files and open them a single time, only: $ strace lg2 for-each-ref |& grep config access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory) access("/home/pks/.config/git/config", F_OK) = 0 access("/etc/gitconfig", F_OK) = -1 ENOENT (No such file or directory) stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 access("/tmp/repo/.git/config", F_OK) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 open("/tmp/repo/.git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/home/pks/.gitconfig", 0x7ffe70540d20) = -1 ENOENT (No such file or directory) access("/home/pks/.gitconfig", F_OK) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 access("/home/pks/.config/git/config", F_OK) = 0 stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 open("/home/pks/.config/git/config", O_RDONLY|O_CLOEXEC) = 3 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/home/pks/.gitconfig", 0x7ffe70540ca0) = -1 ENOENT (No such file or directory) stat("/home/pks/.gitconfig", 0x7ffe70540c80) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory) stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory) stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 stat("/tmp/repo/.git/config", {st_mode=S_IFREG|0644, st_size=92, ...}) = 0 stat("/home/pks/.gitconfig", 0x7ffe70540b40) = -1 ENOENT (No such file or directory) stat("/home/pks/.gitconfig", 0x7ffe70540b20) = -1 ENOENT (No such file or directory) stat("/home/pks/.config/git/config", {st_mode=S_IFREG|0644, st_size=1154, ...}) = 0 The following benchmark has been performed with and without the stat cache in a best-of-ten run: ``` int lg2_repro(git_repository *repo, int argc, char **argv) { git_config *cfg; int32_t dummy; int i; UNUSED(argc); UNUSED(argv); check_lg2(git_repository_config(&cfg, repo), "Could not obtain config", NULL); for (i = 1; i < 100000; ++i) git_config_get_int32(&dummy, cfg, "foo.bar"); git_config_free(cfg); return 0; } ``` Without stat cache: $ time lg2 repro real 0m1.528s user 0m0.568s sys 0m0.944s With stat cache: $ time lg2 repro real 0m0.526s user 0m0.268s sys 0m0.258s This benchmark shows a nearly three-fold performance improvement. This change requires that we check our configuration stress tests as we're now in fact becoming more racy. If somebody is writing a configuration file at nearly the same time (there is a window of 100ns on Windows-based systems), then it might be that we realize that this file has actually changed and thus may not re-read it. This will only happen if either an external process is rewriting the configuration file or if the same process has multiple `git_config` structures pointing to the same time, where one of both is being used to write and the other one is used to read values.
1 parent d086864 commit d7f58ea

File tree

4 files changed

+21
-0
lines changed

4 files changed

+21
-0
lines changed

src/config_file.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ static int config_is_modified(int *modified, git_config_file *file)
143143

144144
*modified = 0;
145145

146+
if (!git_futils_filestamp_check(&file->stamp, file->path))
147+
goto check_includes;
148+
146149
if ((error = git_futils_readbuffer(&buf, file->path)) < 0)
147150
goto out;
148151

@@ -154,6 +157,7 @@ static int config_is_modified(int *modified, git_config_file *file)
154157
goto out;
155158
}
156159

160+
check_includes:
157161
git_array_foreach(file->includes, i, include) {
158162
if ((error = config_is_modified(modified, include)) < 0 || *modified)
159163
goto out;
@@ -861,18 +865,25 @@ static int config_read(
861865
diskfile_parse_state parse_data;
862866
git_config_parser reader;
863867
git_buf contents = GIT_BUF_INIT;
868+
struct stat st;
864869
int error;
865870

866871
if (depth >= MAX_INCLUDE_DEPTH) {
867872
git_error_set(GIT_ERROR_CONFIG, "maximum config include depth reached");
868873
return -1;
869874
}
870875

876+
if (p_stat(file->path, &st) < 0) {
877+
error = git_path_set_error(errno, file->path, "stat");
878+
goto out;
879+
}
880+
871881
if ((error = git_futils_readbuffer(&contents, file->path)) < 0)
872882
goto out;
873883

874884
git_parse_ctx_init(&reader.ctx, contents.ptr, contents.size);
875885

886+
git_futils_filestamp_set_from_stat(&file->stamp, &st);
876887
if ((error = git_hash_buf(&file->checksum, contents.ptr, contents.size)) < 0)
877888
goto out;
878889

src/config_parse.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,17 @@
88
#define INCLUDE_config_parse_h__
99

1010
#include "common.h"
11+
1112
#include "array.h"
13+
#include "fileops.h"
1214
#include "oid.h"
1315
#include "parse.h"
1416

1517
extern const char *git_config_escapes;
1618
extern const char *git_config_escaped;
1719

1820
typedef struct config_file {
21+
git_futils_filestamp stamp;
1922
git_oid checksum;
2023
char *path;
2124
git_array_t(struct config_file) includes;

tests/clar_libgit2.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,12 @@ void cl_fake_home_cleanup(void *);
219219

220220
void cl_sandbox_set_search_path_defaults(void);
221221

222+
#ifdef GIT_WIN32
223+
# define cl_msleep(x) Sleep(x)
224+
#else
225+
# define cl_msleep(x) usleep(1000 * (x))
226+
#endif
227+
222228
#ifdef GIT_WIN32
223229
bool cl_sandbox_supports_8dot3(void);
224230
#endif

tests/config/stress.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ void test_config_stress__quick_write(void)
123123
for (i = 0; i < 10; i++) {
124124
int32_t val;
125125
cl_git_pass(git_config_set_int32(config_w, key, i));
126+
cl_msleep(1);
126127
cl_git_pass(git_config_get_int32(&val, config_r, key));
127128
cl_assert_equal_i(i, val);
128129
}

0 commit comments

Comments
 (0)