11package com .semmle .js .extractor ;
22
3- import com .semmle .js .extractor .ExtractionMetrics .ExtractionPhase ;
4- import com .semmle .js .extractor .trapcache .CachingTrapWriter ;
5- import com .semmle .js .extractor .trapcache .ITrapCache ;
6- import com .semmle .util .data .StringUtil ;
7- import com .semmle .util .exception .Exceptions ;
8- import com .semmle .util .extraction .ExtractorOutputConfig ;
9- import com .semmle .util .files .FileUtil ;
10- import com .semmle .util .io .WholeIO ;
11- import com .semmle .util .trap .TrapWriter ;
12- import com .semmle .util .trap .TrapWriter .Label ;
133import java .io .BufferedReader ;
144import java .io .File ;
155import java .io .FileInputStream ;
166import java .io .FileReader ;
177import java .io .IOException ;
188import java .nio .charset .Charset ;
199import java .nio .charset .StandardCharsets ;
10+ import java .nio .file .Path ;
2011import java .util .LinkedHashSet ;
2112import java .util .Set ;
2213import java .util .regex .Pattern ;
2314
15+ import com .semmle .js .extractor .ExtractionMetrics .ExtractionPhase ;
16+ import com .semmle .js .extractor .trapcache .CachingTrapWriter ;
17+ import com .semmle .js .extractor .trapcache .ITrapCache ;
18+ import com .semmle .util .data .StringUtil ;
19+ import com .semmle .util .exception .Exceptions ;
20+ import com .semmle .util .extraction .ExtractorOutputConfig ;
21+ import com .semmle .util .files .FileUtil ;
22+ import com .semmle .util .io .WholeIO ;
23+ import com .semmle .util .trap .TrapWriter ;
24+ import com .semmle .util .trap .TrapWriter .Label ;
25+
2426/**
2527 * The file extractor extracts a single file and handles source archive population and TRAP caching;
2628 * it delegates to the appropriate {@link IExtractor} for extracting the contents of the file.
@@ -47,7 +49,7 @@ public static enum FileType {
4749 HTML (".htm" , ".html" , ".xhtm" , ".xhtml" , ".vue" ) {
4850 @ Override
4951 public IExtractor mkExtractor (ExtractorConfig config , ExtractorState state ) {
50- return new HTMLExtractor (config );
52+ return new HTMLExtractor (config , state );
5153 }
5254
5355 @ Override
@@ -293,7 +295,7 @@ private boolean hasUnrecognizedShebang(byte[] bytes, int length) {
293295
294296 @ Override
295297 public IExtractor mkExtractor (ExtractorConfig config , ExtractorState state ) {
296- return new TypeScriptExtractor (config , state . getTypeScriptParser () );
298+ return new TypeScriptExtractor (config , state );
297299 }
298300
299301 @ Override
@@ -398,6 +400,10 @@ public boolean supports(File f) {
398400
399401 /** @return the number of lines of code extracted, or {@code null} if the file was cached */
400402 public Integer extract (File f , ExtractorState state ) throws IOException {
403+ FileSnippet snippet = state .getSnippets ().get (f .toPath ());
404+ if (snippet != null ) {
405+ return this .extractSnippet (f .toPath (), snippet , state );
406+ }
401407
402408 // populate source archive
403409 String source = new WholeIO (config .getDefaultEncoding ()).strictread (f );
@@ -414,6 +420,25 @@ public Integer extract(File f, ExtractorState state) throws IOException {
414420 return extractContents (f , fileLabel , source , locationManager , state );
415421 }
416422
423+ /**
424+ * Extract the contents of a file that is a snippet from another file.
425+ *
426+ * <p>A trap file will be derived from the snippet file, but its file label, source locations, and
427+ * source archive entry are based on the original file.
428+ */
429+ private Integer extractSnippet (Path file , FileSnippet origin , ExtractorState state ) throws IOException {
430+ TrapWriter trapwriter = outputConfig .getTrapWriterFactory ().mkTrapWriter (file .toFile ());
431+
432+ File originalFile = origin .getOriginalFile ().toFile ();
433+ Label fileLabel = trapwriter .populateFile (originalFile );
434+ LocationManager locationManager = new LocationManager (originalFile , trapwriter , fileLabel );
435+ locationManager .setStart (origin .getLine (), origin .getColumn ());
436+
437+ String source = new WholeIO (config .getDefaultEncoding ()).strictread (file );
438+
439+ return extractContents (file .toFile (), fileLabel , source , locationManager , state );
440+ }
441+
417442 /**
418443 * Extract the contents of a file, potentially making use of cached information.
419444 *
@@ -436,20 +461,20 @@ public Integer extract(File f, ExtractorState state) throws IOException {
436461 * obviously, no caching is done in that scenario.
437462 */
438463 private Integer extractContents (
439- File f , Label fileLabel , String source , LocationManager locationManager , ExtractorState state )
464+ File extractedFile , Label fileLabel , String source , LocationManager locationManager , ExtractorState state )
440465 throws IOException {
441466 ExtractionMetrics metrics = new ExtractionMetrics ();
442467 metrics .startPhase (ExtractionPhase .FileExtractor_extractContents );
443468 metrics .setLength (source .length ());
444469 metrics .setFileLabel (fileLabel );
445470 TrapWriter trapwriter = locationManager .getTrapWriter ();
446- FileType fileType = getFileType (f );
471+ FileType fileType = getFileType (extractedFile );
447472
448473 File cacheFile = null , // the cache file for this extraction
449474 resultFile = null ; // the final result TRAP file for this extraction
450475
451476 if (bumpIdCounter (trapwriter )) {
452- resultFile = outputConfig .getTrapWriterFactory ().getTrapFileFor (f );
477+ resultFile = outputConfig .getTrapWriterFactory ().getTrapFileFor (extractedFile );
453478 }
454479 // check whether we can perform caching
455480 if (resultFile != null && fileType .isTrapCachingAllowed ()) {
@@ -475,7 +500,7 @@ private Integer extractContents(
475500 trapwriter = new CachingTrapWriter (cacheFile , resultFile );
476501 bumpIdCounter (trapwriter );
477502 // re-initialise the location manager, since it keeps a reference to the TRAP writer
478- locationManager = new LocationManager (f , trapwriter , locationManager .getFileLabel ());
503+ locationManager = new LocationManager (extractedFile , trapwriter , locationManager .getFileLabel ());
479504 }
480505
481506 // now do the extraction itself
@@ -484,7 +509,7 @@ private Integer extractContents(
484509 IExtractor extractor = fileType .mkExtractor (config , state );
485510 TextualExtractor textualExtractor =
486511 new TextualExtractor (
487- trapwriter , locationManager , source , config .getExtractLines (), metrics );
512+ trapwriter , locationManager , source , config .getExtractLines (), metrics , extractedFile );
488513 LoCInfo loc = extractor .extract (textualExtractor );
489514 int numLines = textualExtractor .getNumLines ();
490515 int linesOfCode = loc .getLinesOfCode (), linesOfComments = loc .getLinesOfComments ();
0 commit comments