Skip to content

Commit 969fe6e

Browse files
author
Max Schaefer
authored
Merge pull request #657 from esben-semmle/js/classify-more-files
JS: classify additional files
2 parents e8c8360 + 7cc6f2f commit 969fe6e

File tree

16 files changed

+108
-6
lines changed

16 files changed

+108
-6
lines changed

change-notes/1.20/analysis-javascript.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Support for popular libraries has been improved. Consequently, queries may produce more results on code bases that use the following features:
66
- client-side code, for example [React](https://reactjs.org/)
77
- server-side code, for example [hapi](https://hapijs.com/)
8+
* File classification has been improved to recognize additional generated files, for example files from [HTML Tidy](html-tidy.org).
89

910
## New queries
1011

javascript/ql/src/semmle/javascript/GeneratedCode.qll

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ class CodeGeneratorMarkerComment extends GeneratedCodeMarkerComment {
4242
*/
4343
private predicate codeGeneratorMarkerComment(Comment c, string tool) {
4444
exists (string toolPattern |
45-
toolPattern = "js_of_ocaml|CoffeeScript|LiveScript|dart2js|ANTLR|PEG\\.js|Opal|JSX|jison(?:-lex)?" and
46-
tool = c.getText().regexpCapture("(?s)[\\s*]*(?:parser )?[gG]eneratedy? (?:from .*)?by (" + toolPattern + ")\\b.*", 1)
45+
toolPattern = "js_of_ocaml|CoffeeScript|LiveScript|dart2js|ANTLR|PEG\\.js|Opal|JSX|jison(?:-lex)?|(?:Microsoft \\(R\\) AutoRest Code Generator)|purs" and
46+
tool = c.getText().regexpCapture("(?s)[\\s*]*(?:parser |Code )?[gG]eneratedy? (?:from .*)?by (" + toolPattern + ")\\b.*", 1)
4747
)
4848
}
4949

@@ -114,6 +114,17 @@ private predicate isData(File f) {
114114
)
115115
}
116116

117+
/**
118+
* Holds if `f` is a generated HTML file.
119+
*/
120+
private predicate isGeneratedHtml(File f) {
121+
exists(HTML::Element e |
122+
e.getFile() = f and
123+
e.getName() = "meta" and
124+
e.getAttributeByName("name").getValue() = "generator"
125+
)
126+
}
127+
117128
/**
118129
* Holds if `tl` looks like it contains generated code.
119130
*/
@@ -124,12 +135,14 @@ predicate isGenerated(TopLevel tl) {
124135
tl instanceof DartGeneratedTopLevel or
125136
exists (GeneratedCodeMarkerComment gcmc | tl = gcmc.getTopLevel()) or
126137
hasManyInvocations(tl) or
127-
isData(tl.getFile())
138+
isData(tl.getFile()) or
139+
isGeneratedHtml(tl.getFile())
128140
}
129141

130142
/**
131143
* Holds if `file` look like it contains generated code.
132144
*/
133145
predicate isGeneratedCode(File file) {
134-
isGenerated(file.getATopLevel())
146+
isGenerated(file.getATopLevel()) or
147+
isGeneratedHtml(file)
135148
}

javascript/ql/src/semmle/javascript/frameworks/Bundling.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,10 +222,10 @@ predicate isMultiLicenseBundle(TopLevel tl) {
222222
)
223223
) > 1
224224
or
225-
// case: ordinary block comments with "@license" lines
225+
// case: ordinary block comments lines that start with a license
226226
count(BlockComment head |
227227
head.getTopLevel() = tl and
228-
head.getLine(_).regexpMatch("(?i) *\\* @license .*")
228+
head.getLine(_).regexpMatch("(?i)[\\s*]*(@license\\b.*|The [a-z0-9-]+ License (\\([a-z0-9-]+\\))?\\s*)")
229229
) > 1
230230
}
231231

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/*
2+
* Code generated by Microsoft (R) AutoRest Code Generator.
3+
* Changes may cause incorrect behavior and will be lost if the code is
4+
* regenerated.
5+
*/

javascript/ql/test/query-tests/filters/ClassifyFiles/ClassifyFiles.expected

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
| AutoRest.js:0:0:0:0 | AutoRest.js | generated |
12
| ai.1.2.3-build0123.js:0:0:0:0 | ai.1.2.3-build0123.js | library |
23
| bundle-directive.js:0:0:0:0 | bundle-directive.js | generated |
34
| data.js:0:0:0:0 | data.js | generated |
5+
| etherpad.html:0:0:0:0 | etherpad.html | generated |
46
| exported-data.js:0:0:0:0 | exported-data.js | generated |
7+
| htmltidy.html:0:0:0:0 | htmltidy.html | generated |
58
| jison-lex.js:0:0:0:0 | jison-lex.js | generated |
69
| jison.js:0:0:0:0 | jison.js | generated |
710
| jquery-datatables.js:0:0:0:0 | jquery-datatables.js | library |
@@ -12,12 +15,19 @@
1215
| multi-part-bundle.html:0:0:0:0 | multi-part-bundle.html | generated |
1316
| multi-part-bundle.js:0:0:0:0 | multi-part-bundle.js | generated |
1417
| multiple-licenses-2.js:0:0:0:0 | multiple-licenses-2.js | generated |
18+
| multiple-licenses-3.js:0:0:0:0 | multiple-licenses-3.js | generated |
19+
| multiple-licenses-4.js:0:0:0:0 | multiple-licenses-4.js | generated |
1520
| multiple-licenses.js:0:0:0:0 | multiple-licenses.js | generated |
1621
| opal-test.js:0:0:0:0 | opal-test.js | generated |
22+
| orgmode.html:0:0:0:0 | orgmode.html | generated |
23+
| pandoc.html:0:0:0:0 | pandoc.html | generated |
1724
| peg-js.js:0:0:0:0 | peg-js.js | generated |
1825
| polymer.html:0:0:0:0 | polymer.html | template |
26+
| purs-bundle.js:0:0:0:0 | purs-bundle.js | generated |
27+
| purs.js:0:0:0:0 | purs.js | generated |
1928
| some-template.html:0:0:0:0 | some-template.html | template |
2029
| templ.js:0:0:0:0 | templ.js | template |
30+
| textmate.html:0:0:0:0 | textmate.html | generated |
2131
| tmpl2.html:0:0:0:0 | tmpl2.html | template |
2232
| tmpl.html:0:0:0:0 | tmpl.html | template |
2333
| tst.browserify.js:0:0:0:0 | tst.browserify.js | generated |
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
| AutoRest.js:1:1:5:3 | /*\\n * C ... ed.\\n */ | Microsoft (R) AutoRest Code Generator |
12
| jison-lex.js:1:1:1:34 | /* gene ... .2.1 */ | jison-lex |
23
| jison.js:1:1:1:38 | /* pars ... 4.13 */ | jison |
34
| jsx-old.js:1:1:1:106 | // gene ... 977102) | JSX |
45
| jsx.js:1:1:1:105 | // gene ... 977102) | JSX |
56
| opal-test.js:1:1:1:30 | /* Gene ... 10.3 */ | Opal |
67
| peg-js.js:2:3:6:5 | /*\\n * ... /\\n */ | PEG.js |
8+
| purs-bundle.js:1:1:1:34 | // Gene ... 0.11.7 | purs |
9+
| purs.js:1:1:1:35 | // Gene ... 0.11.7 | purs |
710
| tst.dart.js:1:1:1:57 | // Gene ... mpiler. | dart2js |
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<!doctype html>
2+
<html lang="en">
3+
<head>
4+
<title><%- padId %></title>
5+
<meta name="generator" content="Etherpad">
6+
<script type="text/javascript">//</script>
7+
</head>
8+
<body>
9+
</body>
10+
</html>
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
2+
<html>
3+
<head>
4+
<meta name="generator" content="HTML Tidy for Linux/x86 (vers 25 March 2009), see www.w3.org">
5+
<script type="text/javascript">//</script>
6+
</head>
7+
<body>
8+
</body>
9+
</html>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/**
2+
* @license
3+
*/
4+
/**
5+
* @license
6+
*/
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/**
2+
* The MIT License (MIT)
3+
*/
4+
/**
5+
* The MIT License (MIT)
6+
*/

0 commit comments

Comments
 (0)