Skip to content

Commit 0952064

Browse files
author
Dave Bartolomeo
committed
Move sync-identical-files.py into public repo as sync-files.py
We currently use a script to keep certain duplicate QL files in sync across the repo. For historical reasons, this script has lived in the private repo alongside the rest of CodeQL, even though it's only used for files in the public `ql` repo. This PR moves the script into the public `ql` repo. It is still invoked by Jenkins scripts that live in the private repo during CI, but it can also be invoked directly without having a checkout of the private repo. This is useful for anyone who is modifying the dataflow or IR libraries with only a QL checkout.
1 parent 1baf5df commit 0952064

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
.vs/*
1515
!.vs/VSWorkspaceSettings.json
1616

17+
# Byte-compiled python files
18+
*.pyc
19+
1720
# It's useful (though not required) to be able to unpack codeql in the ql checkout itself
1821
/codeql/
1922
.vscode/settings.json

config/sync-files.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env python3
2+
3+
# Due to various technical limitations, we sometimes have files that need to be
4+
# kept identical in the repository. This script loads a database of such
5+
# files and can perform two functions: check whether they are still identical,
6+
# and overwrite the others with a master copy if needed.
7+
8+
import hashlib
9+
import shutil
10+
import os
11+
import sys
12+
import json
13+
import re
14+
from os import path
15+
16+
file_groups = {}
17+
18+
def add_prefix(prefix, relative):
19+
result = path.join(prefix, relative)
20+
if path.commonprefix((path.realpath(result), path.realpath(prefix))) != \
21+
path.realpath(prefix):
22+
raise Exception("Path {} is not below {}".format(
23+
result, prefix))
24+
return result
25+
26+
def load_if_exists(prefix, json_file_relative):
27+
json_file_name = path.join(prefix, json_file_relative)
28+
if path.isfile(json_file_name):
29+
print("Loading file groups from", json_file_name)
30+
with open(json_file_name, 'r', encoding='utf-8') as fp:
31+
raw_groups = json.load(fp)
32+
prefixed_groups = {
33+
name: [
34+
add_prefix(prefix, relative)
35+
for relative in relatives
36+
]
37+
for name, relatives in raw_groups.items()
38+
}
39+
file_groups.update(prefixed_groups)
40+
41+
# Generates a list of C# test files that should be in sync
42+
def csharp_test_files():
43+
test_file_re = re.compile('.*(Bad|Good)[0-9]*\\.cs$')
44+
csharp_doc_files = {
45+
file:os.path.join(root, file)
46+
for root, dirs, files in os.walk("csharp/ql/src")
47+
for file in files
48+
if test_file_re.match(file)
49+
}
50+
return {
51+
"C# test '" + file + "'" : [os.path.join(root, file), csharp_doc_files[file]]
52+
for root, dirs, files in os.walk("csharp/ql/test")
53+
for file in files
54+
if file in csharp_doc_files
55+
}
56+
57+
def file_checksum(filename):
58+
with open(filename, 'rb') as file_handle:
59+
return hashlib.sha1(file_handle.read()).hexdigest()
60+
61+
def check_group(group_name, files, master_file_picker, emit_error):
62+
checksums = {file_checksum(f) for f in files}
63+
64+
if len(checksums) == 1:
65+
return
66+
67+
master_file = master_file_picker(files)
68+
if master_file is None:
69+
emit_error(__file__, 0,
70+
"Files from group '"+ group_name +"' not in sync.")
71+
emit_error(__file__, 0,
72+
"Run this script with a file-name argument among the "
73+
"following to overwrite the remaining files with the contents "
74+
"of that file or run with the --latest switch to update each "
75+
"group of files from the most recently modified file in the group.")
76+
for filename in files:
77+
emit_error(__file__, 0, " " + filename)
78+
else:
79+
print(" Syncing others from", master_file)
80+
for filename in files:
81+
if filename == master_file:
82+
continue
83+
print(" " + filename)
84+
os.replace(filename, filename + '~')
85+
shutil.copy(master_file, filename)
86+
print(" Backups written with '~' appended to file names")
87+
88+
def chdir_repo_root():
89+
root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')
90+
os.chdir(root_path)
91+
92+
def choose_master_file(master_file, files):
93+
if master_file in files:
94+
return master_file
95+
else:
96+
return None
97+
98+
def choose_latest_file(files):
99+
latest_time = None
100+
latest_file = None
101+
for filename in files:
102+
file_time = os.path.getmtime(filename)
103+
if (latest_time is None) or (latest_time < file_time):
104+
latest_time = file_time
105+
latest_file = filename
106+
return latest_file
107+
108+
local_error_count = 0
109+
def emit_local_error(path, line, error):
110+
print('ERROR: ' + path + ':' + line + " - " + error)
111+
global local_error_count
112+
local_error_count += 1
113+
114+
# This function is invoked directly by a CI script, which passes a different error-handling
115+
# callback.
116+
def sync_identical_files(emit_error):
117+
if len(sys.argv) == 1:
118+
master_file_picker = lambda files: None
119+
elif len(sys.argv) == 2:
120+
if sys.argv[1] == "--latest":
121+
master_file_picker = lambda files: choose_latest_file(files)
122+
elif os.path.isfile(sys.argv[1]):
123+
master_file_picker = lambda files: choose_master_file(sys.argv[1], files)
124+
else:
125+
raise Exception("File not found")
126+
else:
127+
raise Exception("Bad command line or file not found")
128+
chdir_repo_root()
129+
load_if_exists('.', 'config/identical-files.json')
130+
file_groups.update(csharp_test_files())
131+
for group_name, files in file_groups.items():
132+
check_group(group_name, files, master_file_picker, emit_error)
133+
134+
def main():
135+
sync_identical_files(emit_local_error)
136+
if local_error_count > 0:
137+
exit(1)
138+
139+
if __name__ == "__main__":
140+
main()

0 commit comments

Comments
 (0)