Skip to content

Commit d9c7549

Browse files
authored
Merge pull request #2279 from max-schaefer/js/touchstone-files
Approved by asger-semmle
2 parents 9b346b1 + d7831d2 commit d9c7549

File tree

3 files changed

+25
-9
lines changed

3 files changed

+25
-9
lines changed

javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
package com.semmle.js.extractor;
22

3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileInputStream;
6+
import java.io.FileReader;
7+
import java.io.IOException;
8+
import java.nio.charset.Charset;
9+
import java.nio.charset.StandardCharsets;
10+
import java.util.LinkedHashSet;
11+
import java.util.Set;
12+
import java.util.regex.Pattern;
13+
314
import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase;
415
import com.semmle.js.extractor.trapcache.CachingTrapWriter;
516
import com.semmle.js.extractor.trapcache.ITrapCache;
@@ -10,15 +21,6 @@
1021
import com.semmle.util.io.WholeIO;
1122
import com.semmle.util.trap.TrapWriter;
1223
import com.semmle.util.trap.TrapWriter.Label;
13-
import java.io.BufferedReader;
14-
import java.io.File;
15-
import java.io.FileInputStream;
16-
import java.io.FileReader;
17-
import java.io.IOException;
18-
import java.nio.charset.Charset;
19-
import java.util.LinkedHashSet;
20-
import java.util.Set;
21-
import java.util.regex.Pattern;
2224

2325
/**
2426
* The file extractor extracts a single file and handles source archive population and TRAP caching;
@@ -154,6 +156,9 @@ private boolean hasBadFileHeader(File f, String lcExt, ExtractorConfig config) {
154156
byte[] bytes = new byte[fileHeaderSize];
155157
int length = fis.read(bytes);
156158

159+
if (length == -1)
160+
return false;
161+
157162
// Avoid invalid or unprintable UTF-8 files.
158163
if (config.getDefaultEncoding().equals("UTF-8") && hasUnprintableUtf8(bytes, length)) {
159164
return true;
@@ -167,6 +172,9 @@ private boolean hasBadFileHeader(File f, String lcExt, ExtractorConfig config) {
167172
return true;
168173
}
169174

175+
// Avoid Touchstone files
176+
if (isTouchstone(bytes, length)) return true;
177+
170178
return false;
171179
} catch (IOException e) {
172180
Exceptions.ignore(e, "Let extractor handle this one.");
@@ -198,6 +206,11 @@ private boolean isXml(byte[] bytes, int length) {
198206
return false;
199207
}
200208

209+
private boolean isTouchstone(byte[] bytes, int length) {
210+
String s = new String(bytes, 0, length, StandardCharsets.US_ASCII);
211+
return s.startsWith("! TOUCHSTONE file ") || s.startsWith("[Version] 2.0");
212+
}
213+
201214
/**
202215
* Returns true if the byte sequence contains invalid UTF-8 or unprintable ASCII characters.
203216
*/
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
! TOUCHSTONE file generated by me
2+
[Version] 2.0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[Version] 2.0

0 commit comments

Comments
 (0)