11package com .semmle .js .extractor ;
22
3+ import java .io .BufferedReader ;
4+ import java .io .File ;
5+ import java .io .FileInputStream ;
6+ import java .io .FileReader ;
7+ import java .io .IOException ;
8+ import java .nio .charset .Charset ;
9+ import java .nio .charset .StandardCharsets ;
10+ import java .util .LinkedHashSet ;
11+ import java .util .Set ;
12+ import java .util .regex .Pattern ;
13+
314import com .semmle .js .extractor .ExtractionMetrics .ExtractionPhase ;
415import com .semmle .js .extractor .trapcache .CachingTrapWriter ;
516import com .semmle .js .extractor .trapcache .ITrapCache ;
1021import com .semmle .util .io .WholeIO ;
1122import com .semmle .util .trap .TrapWriter ;
1223import com .semmle .util .trap .TrapWriter .Label ;
13- import java .io .BufferedReader ;
14- import java .io .File ;
15- import java .io .FileInputStream ;
16- import java .io .FileReader ;
17- import java .io .IOException ;
18- import java .nio .charset .Charset ;
19- import java .util .LinkedHashSet ;
20- import java .util .Set ;
21- import java .util .regex .Pattern ;
2224
2325/**
2426 * The file extractor extracts a single file and handles source archive population and TRAP caching;
@@ -154,6 +156,9 @@ private boolean hasBadFileHeader(File f, String lcExt, ExtractorConfig config) {
154156 byte [] bytes = new byte [fileHeaderSize ];
155157 int length = fis .read (bytes );
156158
159+ if (length == -1 )
160+ return false ;
161+
157162 // Avoid invalid or unprintable UTF-8 files.
158163 if (config .getDefaultEncoding ().equals ("UTF-8" ) && hasUnprintableUtf8 (bytes , length )) {
159164 return true ;
@@ -167,6 +172,9 @@ private boolean hasBadFileHeader(File f, String lcExt, ExtractorConfig config) {
167172 return true ;
168173 }
169174
175+ // Avoid Touchstone files
176+ if (isTouchstone (bytes , length )) return true ;
177+
170178 return false ;
171179 } catch (IOException e ) {
172180 Exceptions .ignore (e , "Let extractor handle this one." );
@@ -198,6 +206,11 @@ private boolean isXml(byte[] bytes, int length) {
198206 return false ;
199207 }
200208
209+ private boolean isTouchstone (byte [] bytes , int length ) {
210+ String s = new String (bytes , 0 , length , StandardCharsets .US_ASCII );
211+ return s .startsWith ("! TOUCHSTONE file " ) || s .startsWith ("[Version] 2.0" );
212+ }
213+
201214 /**
202215 * Returns true if the byte sequence contains invalid UTF-8 or unprintable ASCII characters.
203216 */
0 commit comments