Skip to content

Commit fa2d097

Browse files
committed
compute empty files in c++ binary
1 parent 088d9a3 commit fa2d097

File tree

2 files changed

+97
-26
lines changed

2 files changed

+97
-26
lines changed

python/private/py_executable.bzl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,9 @@ def _create_zip_file(ctx, *, output, zip_main, runfiles):
832832
args.add("--repo-mapping-manifest", zip_repo_mapping_manifest.path)
833833
inputs.append(zip_repo_mapping_manifest)
834834

835-
args.add_all(runfiles.empty_filenames, map_each=_get_zip_empty_path_arg)
835+
# Empty __init__.py files are now generated by the C++ binary
836+
# based on the directories containing Python files. This eliminates
837+
# the expensive flattening of the files depset during analysis.
836838
args.add_all(runfiles.files, map_each=_get_zip_path_arg)
837839

838840
ctx.actions.run(
@@ -846,10 +848,6 @@ def _create_zip_file(ctx, *, output, zip_main, runfiles):
846848
)
847849

848850

849-
def _get_zip_empty_path_arg(file):
850-
return "{}=".format(file.short_path)
851-
852-
853851
def _get_zip_path_arg(file):
854852
return "{}={}".format(file.short_path, file.path)
855853

python/private/py_executable_zip_gen.cc

Lines changed: 94 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <fstream>
3030
#include <iostream>
3131
#include <memory>
32+
#include <set>
3233
#include <string>
3334
#include <vector>
3435

@@ -68,24 +69,6 @@ bool starts_with(const std::string& str, const std::string& prefix) {
6869

6970
} // namespace path
7071

71-
// Get zip runfiles path for a file
72-
std::string get_zip_runfiles_path(const std::string& path,
73-
const std::string& workspace_name,
74-
bool legacy_external_runfiles) {
75-
std::string zip_runfiles_path;
76-
77-
if (legacy_external_runfiles && path::starts_with(path, "external/")) {
78-
// Remove "external/" prefix
79-
zip_runfiles_path = path::relativize(path, "external");
80-
} else {
81-
// Normalize workspace_name/path
82-
std::string combined = workspace_name + "/" + path;
83-
zip_runfiles_path = path::normalize(combined);
84-
}
85-
86-
return "runfiles/" + zip_runfiles_path;
87-
}
88-
8972
// Parse a file entry in "short_path=disk_path" or "short_path=" format
9073
struct FileEntry {
9174
std::string short_path;
@@ -108,6 +91,84 @@ struct FileEntry {
10891
}
10992
};
11093

94+
// Get zip runfiles path for a file
95+
std::string get_zip_runfiles_path(const std::string& path,
96+
const std::string& workspace_name,
97+
bool legacy_external_runfiles) {
98+
std::string zip_runfiles_path;
99+
100+
if (legacy_external_runfiles && path::starts_with(path, "external/")) {
101+
// Remove "external/" prefix
102+
zip_runfiles_path = path::relativize(path, "external");
103+
} else {
104+
// Normalize workspace_name/path
105+
std::string combined = workspace_name + "/" + path;
106+
zip_runfiles_path = path::normalize(combined);
107+
}
108+
109+
return "runfiles/" + zip_runfiles_path;
110+
}
111+
112+
// Check if a path is a Python file (.py, .pyc, .so, .pyd)
113+
bool is_python_file(const std::string& path) {
114+
if (path.empty()) return false;
115+
116+
// Check for Python file extensions
117+
if (path.size() >= 3 && path.substr(path.size() - 3) == ".py") return true;
118+
if (path.size() >= 4 && path.substr(path.size() - 4) == ".pyc") return true;
119+
if (path.size() >= 3 && path.substr(path.size() - 3) == ".so") return true;
120+
if (path.size() >= 4 && path.substr(path.size() - 4) == ".pyd") return true;
121+
122+
return false;
123+
}
124+
125+
// Extract directory path from a file path
126+
std::string get_directory(const std::string& path) {
127+
size_t last_slash = path.find_last_of('/');
128+
if (last_slash == std::string::npos) {
129+
return "";
130+
}
131+
return path.substr(0, last_slash);
132+
}
133+
134+
// Generate all parent directories (excluding root)
135+
std::vector<std::string> get_parent_directories(const std::string& dir) {
136+
std::vector<std::string> parents;
137+
std::string current = dir;
138+
139+
while (!current.empty()) {
140+
parents.push_back(current);
141+
size_t last_slash = current.find_last_of('/');
142+
if (last_slash == std::string::npos) {
143+
break;
144+
}
145+
current = current.substr(0, last_slash);
146+
}
147+
148+
return parents;
149+
}
150+
151+
// Collect all directories that need __init__.py files
152+
// This replicates the logic of Bazel's EmptyFilesSupplier Java class
153+
std::set<std::string> collect_init_directories(const std::vector<FileEntry>& files) {
154+
std::set<std::string> init_dirs;
155+
156+
// For each Python file, add its directory and all parent directories
157+
for (const auto& file : files) {
158+
if (is_python_file(file.short_path)) {
159+
std::string dir = get_directory(file.short_path);
160+
if (!dir.empty()) {
161+
// Add all parent directories (this replicates EmptyFilesSupplier logic)
162+
for (const auto& parent : get_parent_directories(dir)) {
163+
init_dirs.insert(parent);
164+
}
165+
}
166+
}
167+
}
168+
169+
return init_dirs;
170+
}
171+
111172
int main(int argc, char* argv[]) {
112173
if (argc != 2) {
113174
std::cerr << "Usage: " << argv[0] << " <params_file>" << std::endl;
@@ -194,6 +255,10 @@ int main(int argc, char* argv[]) {
194255
return 1;
195256
}
196257

258+
// Collect directories that need __init__.py files
259+
// This replicates the logic of Bazel's EmptyFilesSupplier Java class
260+
std::set<std::string> init_dirs = collect_init_directories(files);
261+
197262
// Generate zip manifest
198263
// Order must match main branch for reproducible builds
199264
std::string manifest_file = "zip_manifest.txt";
@@ -206,17 +271,25 @@ int main(int argc, char* argv[]) {
206271
// 1. Main file
207272
manifest << "__main__.py=" << main_file << "\n";
208273

209-
// 2. Default empty files
274+
// 2. Default empty files at root
210275
manifest << "__init__.py=\n";
211276
manifest << get_zip_runfiles_path("__init__.py", workspace_name, legacy_external_runfiles) << "=\n";
212277

213-
// 3. Process file entries
278+
// 3. Generate empty __init__.py files for all directories containing Python files
279+
// This replicates the EmptyFilesSupplier logic from Bazel's Java code
280+
for (const auto& dir : init_dirs) {
281+
std::string init_path = dir + "/__init__.py";
282+
std::string zip_path = get_zip_runfiles_path(init_path, workspace_name, legacy_external_runfiles);
283+
manifest << zip_path << "=\n";
284+
}
285+
286+
// 4. Process file entries
214287
for (const auto& file : files) {
215288
std::string zip_path = get_zip_runfiles_path(file.short_path, workspace_name, legacy_external_runfiles);
216289
manifest << zip_path << "=" << file.disk_path << "\n";
217290
}
218291

219-
// 4. Repo mapping manifest (last, to match main branch order)
292+
// 5. Repo mapping manifest (last, to match main branch order)
220293
if (!repo_mapping_manifest.empty()) {
221294
manifest << "runfiles/_repo_mapping=" << repo_mapping_manifest << "\n";
222295
}

0 commit comments

Comments
 (0)