|
1 | 1 | package com.semmle.js.extractor; |
2 | 2 |
|
| 3 | +import java.io.BufferedReader; |
| 4 | +import java.io.File; |
| 5 | +import java.io.FileInputStream; |
| 6 | +import java.io.FileReader; |
| 7 | +import java.io.IOException; |
| 8 | +import java.nio.charset.Charset; |
| 9 | +import java.nio.charset.StandardCharsets; |
| 10 | +import java.util.LinkedHashSet; |
| 11 | +import java.util.Set; |
| 12 | +import java.util.regex.Pattern; |
| 13 | + |
3 | 14 | import com.semmle.js.extractor.ExtractionMetrics.ExtractionPhase; |
4 | 15 | import com.semmle.js.extractor.trapcache.CachingTrapWriter; |
5 | 16 | import com.semmle.js.extractor.trapcache.ITrapCache; |
|
10 | 21 | import com.semmle.util.io.WholeIO; |
11 | 22 | import com.semmle.util.trap.TrapWriter; |
12 | 23 | import com.semmle.util.trap.TrapWriter.Label; |
13 | | -import java.io.BufferedReader; |
14 | | -import java.io.File; |
15 | | -import java.io.FileInputStream; |
16 | | -import java.io.FileReader; |
17 | | -import java.io.IOException; |
18 | | -import java.nio.charset.Charset; |
19 | | -import java.util.LinkedHashSet; |
20 | | -import java.util.Set; |
21 | | -import java.util.regex.Pattern; |
22 | 24 |
|
23 | 25 | /** |
24 | 26 | * The file extractor extracts a single file and handles source archive population and TRAP caching; |
@@ -167,6 +169,9 @@ private boolean hasBadFileHeader(File f, String lcExt, ExtractorConfig config) { |
167 | 169 | return true; |
168 | 170 | } |
169 | 171 |
|
| 172 | + // Avoid Touchstone files |
| 173 | + if (isTouchstone(bytes, length)) return true; |
| 174 | + |
170 | 175 | return false; |
171 | 176 | } catch (IOException e) { |
172 | 177 | Exceptions.ignore(e, "Let extractor handle this one."); |
@@ -198,6 +203,11 @@ private boolean isXml(byte[] bytes, int length) { |
198 | 203 | return false; |
199 | 204 | } |
200 | 205 |
|
| 206 | + private boolean isTouchstone(byte[] bytes, int length) { |
| 207 | + String s = new String(bytes, 0, length, StandardCharsets.US_ASCII); |
| 208 | + return s.startsWith("! TOUCHSTONE file ") || s.startsWith("[Version] 2.0"); |
| 209 | + } |
| 210 | + |
201 | 211 | /** |
202 | 212 | * Returns true if the byte sequence contains invalid UTF-8 or unprintable ASCII characters. |
203 | 213 | */ |
|
0 commit comments