11use clap:: Args ;
2+ use codeql_extractor:: file_paths:: PathTransformer ;
23use lazy_static:: lazy_static;
34use rayon:: prelude:: * ;
45use std:: borrow:: Cow ;
6+ use std:: collections:: HashSet ;
57use std:: fs;
68use std:: io:: BufRead ;
79use std:: path:: { Path , PathBuf } ;
@@ -78,6 +80,9 @@ pub fn run(options: Options) -> std::io::Result<()> {
7880
7981 let file_list = fs:: File :: open ( file_paths:: path_from_string ( & options. file_list ) ) ?;
8082
83+ let overlay_changed_files: Option < HashSet < PathBuf > > = get_overlay_changed_files ( ) ;
84+ let path_transformer = file_paths:: load_path_transformer ( ) ?;
85+
8186 let language: Language = tree_sitter_ruby:: LANGUAGE . into ( ) ;
8287 let erb: Language = tree_sitter_embedded_template:: LANGUAGE . into ( ) ;
8388 // Look up tree-sitter kind ids now, to avoid string comparisons when scanning ERB files.
@@ -94,7 +99,14 @@ pub fn run(options: Options) -> std::io::Result<()> {
9499 . try_for_each ( |line| {
95100 let mut diagnostics_writer = diagnostics. logger ( ) ;
96101 let path = PathBuf :: from ( line) . canonicalize ( ) ?;
97- let src_archive_file = file_paths:: path_for ( & src_archive_dir, & path, "" ) ;
102+ match & overlay_changed_files {
103+ Some ( changed_files) if !changed_files. contains ( & path) => {
104+ // We are extracting an overlay and this file is not in the list of changes files, so we should skip it.
105+ return Result :: Ok ( ( ) ) ;
106+ }
107+ _ => { } ,
108+ }
109+ let src_archive_file = file_paths:: path_for ( & src_archive_dir, & path, "" , path_transformer. as_ref ( ) ) ;
98110 let mut source = std:: fs:: read ( & path) ?;
99111 let mut needs_conversion = false ;
100112 let code_ranges;
@@ -107,6 +119,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
107119 & erb_schema,
108120 & mut diagnostics_writer,
109121 & mut trap_writer,
122+ path_transformer. as_ref ( ) ,
110123 & path,
111124 & source,
112125 & [ ] ,
@@ -151,7 +164,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
151164 "character-decoding-error" ,
152165 "Character decoding error" ,
153166 )
154- . file ( & file_paths:: normalize_path ( & path) )
167+ . file ( & file_paths:: normalize_and_transform_path ( & path, path_transformer . as_ref ( ) ) )
155168 . message (
156169 "Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}." ,
157170 & [
@@ -171,7 +184,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
171184 diagnostics_writer. write (
172185 diagnostics_writer
173186 . new_entry ( "unknown-character-encoding" , "Could not process some files due to an unknown character encoding" )
174- . file ( & file_paths:: normalize_path ( & path) )
187+ . file ( & file_paths:: normalize_and_transform_path ( & path, path_transformer . as_ref ( ) ) )
175188 . message (
176189 "Unknown character encoding {} in {} {}." ,
177190 & [
@@ -194,6 +207,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
194207 & schema,
195208 & mut diagnostics_writer,
196209 & mut trap_writer,
210+ path_transformer. as_ref ( ) ,
197211 & path,
198212 & source,
199213 & code_ranges,
@@ -204,14 +218,26 @@ pub fn run(options: Options) -> std::io::Result<()> {
204218 } else {
205219 std:: fs:: copy ( & path, & src_archive_file) ?;
206220 }
207- write_trap ( & trap_dir, path, & trap_writer, trap_compression)
221+ write_trap ( & trap_dir, path, & trap_writer, trap_compression, path_transformer . as_ref ( ) )
208222 } )
209223 . expect ( "failed to extract files" ) ;
210224
211225 let path = PathBuf :: from ( "extras" ) ;
212226 let mut trap_writer = trap:: Writer :: new ( ) ;
213227 extractor:: populate_empty_location ( & mut trap_writer) ;
214- let res = write_trap ( & trap_dir, path, & trap_writer, trap_compression) ;
228+ let res = write_trap (
229+ & trap_dir,
230+ path,
231+ & trap_writer,
232+ trap_compression,
233+ path_transformer. as_ref ( ) ,
234+ ) ;
235+ if let Ok ( output_path) = std:: env:: var ( "CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT" ) {
236+ // We're extracting an overlay base. For now, we don't have any metadata we need to store
237+ // that would get read when extracting the overlay, but the CLI expects us to write
238+ // *something*. An empty file will do.
239+ std:: fs:: write ( output_path, b"" ) ?;
240+ }
215241 tracing:: info!( "Extraction complete" ) ;
216242 res
217243}
@@ -237,8 +263,14 @@ fn write_trap(
237263 path : PathBuf ,
238264 trap_writer : & trap:: Writer ,
239265 trap_compression : trap:: Compression ,
266+ path_transformer : Option < & PathTransformer > ,
240267) -> std:: io:: Result < ( ) > {
241- let trap_file = file_paths:: path_for ( trap_dir, & path, trap_compression. extension ( ) ) ;
268+ let trap_file = file_paths:: path_for (
269+ trap_dir,
270+ & path,
271+ trap_compression. extension ( ) ,
272+ path_transformer,
273+ ) ;
242274 std:: fs:: create_dir_all ( trap_file. parent ( ) . unwrap ( ) ) ?;
243275 trap_writer. write_to_file ( & trap_file, trap_compression)
244276}
@@ -302,6 +334,39 @@ fn skip_space(content: &[u8], index: usize) -> usize {
302334 }
303335 index
304336}
337+
338+ /**
339+ * If the relevant environment variable has been set by the CLI, indicating that we are extracting
340+ * an overlay, this function reads the JSON file at the path given by its value, and returns a set
341+ * of canonicalized paths of source files that have changed and should therefore be extracted.
342+ *
343+ * If the environment variable is not set (i.e. we're not extracting an overlay), or if the file
344+ * cannot be read, this function returns `None`. In that case, all files should be extracted.
345+ */
346+ fn get_overlay_changed_files ( ) -> Option < HashSet < PathBuf > > {
347+ let path = std:: env:: var ( "CODEQL_EXTRACTOR_RUBY_OVERLAY_CHANGES" ) . ok ( ) ?;
348+ let file_content = fs:: read_to_string ( path) . ok ( ) ?;
349+ let json_value: serde_json:: Value = serde_json:: from_str ( & file_content) . ok ( ) ?;
350+
351+ // The JSON file is expected to have the following structure:
352+ // {
353+ // "changes": [
354+ // "relative/path/to/changed/file1.rb",
355+ // "relative/path/to/changed/file2.rb",
356+ // ...
357+ // ]
358+ // }
359+ Some (
360+ json_value
361+ . get ( "changes" ) ?
362+ . as_array ( ) ?
363+ . iter ( )
364+ . filter_map ( |change| change. as_str ( ) )
365+ . filter_map ( |s| PathBuf :: from ( s) . canonicalize ( ) . ok ( ) )
366+ . collect ( ) ,
367+ )
368+ }
369+
305370fn scan_coding_comment ( content : & [ u8 ] ) -> std:: option:: Option < Cow < str > > {
306371 let mut index = 0 ;
307372 // skip UTF-8 BOM marker if there is one
0 commit comments