Skip to content

Commit dcfa42e

Browse files
committed
Sync upstream ruby
1 parent 13d2b18 commit dcfa42e

File tree

23 files changed

+6525
-126
lines changed

23 files changed

+6525
-126
lines changed

ruby/downgrades/dc51d416301df12df5b70fbc4338de6cc1f82bfd/old.dbscheme

Lines changed: 1532 additions & 0 deletions
Large diffs are not rendered by default.

ruby/downgrades/dc51d416301df12df5b70fbc4338de6cc1f82bfd/ruby.dbscheme

Lines changed: 1526 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
description: Add databaseMetadata relation
2+
compatibility: full
3+
databaseMetadata.rel: delete

ruby/extractor/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@ rayon = "1.10.0"
1717
regex = "1.11.1"
1818
encoding = "0.2"
1919
lazy_static = "1.5.0"
20+
serde_json = "1.0.140"
2021

2122
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }

ruby/extractor/src/extractor.rs

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use clap::Args;
2+
use codeql_extractor::file_paths::PathTransformer;
23
use lazy_static::lazy_static;
34
use rayon::prelude::*;
45
use std::borrow::Cow;
6+
use std::collections::HashSet;
57
use std::fs;
68
use std::io::BufRead;
79
use std::path::{Path, PathBuf};
@@ -78,6 +80,9 @@ pub fn run(options: Options) -> std::io::Result<()> {
7880

7981
let file_list = fs::File::open(file_paths::path_from_string(&options.file_list))?;
8082

83+
let overlay_changed_files: Option<HashSet<PathBuf>> = get_overlay_changed_files();
84+
let path_transformer = file_paths::load_path_transformer()?;
85+
8186
let language: Language = tree_sitter_ruby::LANGUAGE.into();
8287
let erb: Language = tree_sitter_embedded_template::LANGUAGE.into();
8388
// Look up tree-sitter kind ids now, to avoid string comparisons when scanning ERB files.
@@ -94,7 +99,14 @@ pub fn run(options: Options) -> std::io::Result<()> {
9499
.try_for_each(|line| {
95100
let mut diagnostics_writer = diagnostics.logger();
96101
let path = PathBuf::from(line).canonicalize()?;
97-
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "");
102+
match &overlay_changed_files {
103+
Some(changed_files) if !changed_files.contains(&path) => {
104+
// We are extracting an overlay and this file is not in the list of changes files, so we should skip it.
105+
return Result::Ok(());
106+
}
107+
_ => {},
108+
}
109+
let src_archive_file = file_paths::path_for(&src_archive_dir, &path, "", path_transformer.as_ref());
98110
let mut source = std::fs::read(&path)?;
99111
let mut needs_conversion = false;
100112
let code_ranges;
@@ -107,6 +119,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
107119
&erb_schema,
108120
&mut diagnostics_writer,
109121
&mut trap_writer,
122+
path_transformer.as_ref(),
110123
&path,
111124
&source,
112125
&[],
@@ -151,7 +164,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
151164
"character-decoding-error",
152165
"Character decoding error",
153166
)
154-
.file(&file_paths::normalize_path(&path))
167+
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
155168
.message(
156169
"Could not decode the file contents as {}: {}. The contents of the file must match the character encoding specified in the {} {}.",
157170
&[
@@ -171,7 +184,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
171184
diagnostics_writer.write(
172185
diagnostics_writer
173186
.new_entry("unknown-character-encoding", "Could not process some files due to an unknown character encoding")
174-
.file(&file_paths::normalize_path(&path))
187+
.file(&file_paths::normalize_and_transform_path(&path, path_transformer.as_ref()))
175188
.message(
176189
"Unknown character encoding {} in {} {}.",
177190
&[
@@ -194,6 +207,7 @@ pub fn run(options: Options) -> std::io::Result<()> {
194207
&schema,
195208
&mut diagnostics_writer,
196209
&mut trap_writer,
210+
path_transformer.as_ref(),
197211
&path,
198212
&source,
199213
&code_ranges,
@@ -204,14 +218,26 @@ pub fn run(options: Options) -> std::io::Result<()> {
204218
} else {
205219
std::fs::copy(&path, &src_archive_file)?;
206220
}
207-
write_trap(&trap_dir, path, &trap_writer, trap_compression)
221+
write_trap(&trap_dir, path, &trap_writer, trap_compression, path_transformer.as_ref())
208222
})
209223
.expect("failed to extract files");
210224

211225
let path = PathBuf::from("extras");
212226
let mut trap_writer = trap::Writer::new();
213227
extractor::populate_empty_location(&mut trap_writer);
214-
let res = write_trap(&trap_dir, path, &trap_writer, trap_compression);
228+
let res = write_trap(
229+
&trap_dir,
230+
path,
231+
&trap_writer,
232+
trap_compression,
233+
path_transformer.as_ref(),
234+
);
235+
if let Ok(output_path) = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_BASE_METADATA_OUT") {
236+
// We're extracting an overlay base. For now, we don't have any metadata we need to store
237+
// that would get read when extracting the overlay, but the CLI expects us to write
238+
// *something*. An empty file will do.
239+
std::fs::write(output_path, b"")?;
240+
}
215241
tracing::info!("Extraction complete");
216242
res
217243
}
@@ -237,8 +263,14 @@ fn write_trap(
237263
path: PathBuf,
238264
trap_writer: &trap::Writer,
239265
trap_compression: trap::Compression,
266+
path_transformer: Option<&PathTransformer>,
240267
) -> std::io::Result<()> {
241-
let trap_file = file_paths::path_for(trap_dir, &path, trap_compression.extension());
268+
let trap_file = file_paths::path_for(
269+
trap_dir,
270+
&path,
271+
trap_compression.extension(),
272+
path_transformer,
273+
);
242274
std::fs::create_dir_all(trap_file.parent().unwrap())?;
243275
trap_writer.write_to_file(&trap_file, trap_compression)
244276
}
@@ -302,6 +334,39 @@ fn skip_space(content: &[u8], index: usize) -> usize {
302334
}
303335
index
304336
}
337+
338+
/**
339+
* If the relevant environment variable has been set by the CLI, indicating that we are extracting
340+
* an overlay, this function reads the JSON file at the path given by its value, and returns a set
341+
* of canonicalized paths of source files that have changed and should therefore be extracted.
342+
*
343+
* If the environment variable is not set (i.e. we're not extracting an overlay), or if the file
344+
* cannot be read, this function returns `None`. In that case, all files should be extracted.
345+
*/
346+
fn get_overlay_changed_files() -> Option<HashSet<PathBuf>> {
347+
let path = std::env::var("CODEQL_EXTRACTOR_RUBY_OVERLAY_CHANGES").ok()?;
348+
let file_content = fs::read_to_string(path).ok()?;
349+
let json_value: serde_json::Value = serde_json::from_str(&file_content).ok()?;
350+
351+
// The JSON file is expected to have the following structure:
352+
// {
353+
// "changes": [
354+
// "relative/path/to/changed/file1.rb",
355+
// "relative/path/to/changed/file2.rb",
356+
// ...
357+
// ]
358+
// }
359+
Some(
360+
json_value
361+
.get("changes")?
362+
.as_array()?
363+
.iter()
364+
.filter_map(|change| change.as_str())
365+
.filter_map(|s| PathBuf::from(s).canonicalize().ok())
366+
.collect(),
367+
)
368+
}
369+
305370
fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
306371
let mut index = 0;
307372
// skip UTF-8 BOM marker if there is one

ruby/ql/lib/codeql/ruby/ast/internal/Literal.qll

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,12 +582,27 @@ abstract class StringlikeLiteralImpl extends Expr, TStringlikeLiteral {
582582
)
583583
}
584584

585+
pragma[nomagic]
586+
private StringComponentImpl getComponentImplRestricted(int n) {
587+
result = this.getComponentImpl(n) and
588+
strictsum(int length, int i | length = this.getComponentImpl(i).getValue().length() | length) <
589+
10000
590+
}
591+
585592
// 0 components results in the empty string
586-
// if all interpolations have a known string value, we will get a result
593+
// if all interpolations have a known string value, we will get a result, unless the
594+
// combined length exceeds 10,000 characters
587595
language[monotonicAggregates]
588596
final string getStringValue() {
597+
not exists(this.getComponentImpl(_)) and
598+
result = ""
599+
or
589600
result =
590-
concat(StringComponentImpl c, int i | c = this.getComponentImpl(i) | c.getValue() order by i)
601+
strictconcat(StringComponentImpl c, int i |
602+
c = this.getComponentImplRestricted(i)
603+
|
604+
c.getValue() order by i
605+
)
591606
}
592607
}
593608

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1426,4 +1426,4 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) {
14261426
ppos.isAnyNamed() and apos.isKeyword(_)
14271427
or
14281428
apos.isAnyNamed() and ppos.isKeyword(_)
1429-
}
1429+
}

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2560,4 +2560,4 @@ module TypeInference {
25602560
predicate hasModuleType(Node n, DataFlowType t) {
25612561
exists(Module tp | t = TModuleDataFlowType(tp) | hasType(n, tp, _))
25622562
}
2563-
}
2563+
}

0 commit comments

Comments
 (0)