22
33import java .io .File ;
44import java .io .IOException ;
5+ import java .nio .file .Path ;
6+ import java .nio .file .Paths ;
57import java .util .ArrayList ;
68import java .util .LinkedHashSet ;
79import java .util .List ;
3133import com .semmle .util .language .LegacyLanguage ;
3234import com .semmle .util .process .ArgsParser ;
3335import com .semmle .util .process .ArgsParser .FileMode ;
36+ import com .semmle .util .process .Env ;
37+ import com .semmle .util .process .Env .Var ;
3438import com .semmle .util .trap .TrapWriter ;
3539
3640/** The main entry point of the JavaScript extractor. */
@@ -134,12 +138,6 @@ public void run(String[] args) {
134138 return ;
135139 }
136140
137- TypeScriptParser tsParser = extractorState .getTypeScriptParser ();
138- tsParser .setTypescriptRam (extractorConfig .getTypeScriptRam ());
139- if (containsTypeScriptFiles ()) {
140- tsParser .verifyInstallation (!ap .has (P_QUIET ));
141- }
142-
143141 // Sort files for determinism
144142 projectFiles = projectFiles .stream ()
145143 .sorted (AutoBuild .FILE_ORDERING )
@@ -149,16 +147,30 @@ public void run(String[] args) {
149147 .sorted (AutoBuild .FILE_ORDERING )
150148 .collect (Collectors .toCollection (() -> new LinkedHashSet <>()));
151149
150+ // Extract HTML files first, as they may contain embedded TypeScript code
151+ for (File file : files ) {
152+ if (FileType .forFile (file , extractorConfig ) == FileType .HTML ) {
153+ ensureFileIsExtracted (file , ap );
154+ }
155+ }
156+
157+ TypeScriptParser tsParser = extractorState .getTypeScriptParser ();
158+ tsParser .setTypescriptRam (extractorConfig .getTypeScriptRam ());
159+ if (containsTypeScriptFiles ()) {
160+ tsParser .verifyInstallation (!ap .has (P_QUIET ));
161+ }
162+
152163 for (File projectFile : projectFiles ) {
153164
154165 long start = verboseLogStartTimer (ap , "Opening project " + projectFile );
155- ParsedProject project = tsParser .openProject (projectFile , DependencyInstallationResult .empty , VirtualSourceRoot . none );
166+ ParsedProject project = tsParser .openProject (projectFile , DependencyInstallationResult .empty , extractorConfig . getVirtualSourceRoot () );
156167 verboseLogEndTimer (ap , start );
157168 // Extract all files belonging to this project which are also matched
158169 // by our include/exclude filters.
159170 List <File > filesToExtract = new ArrayList <>();
160171 for (File sourceFile : project .getOwnFiles ()) {
161- if (files .contains (normalizeFile (sourceFile ))
172+ File normalizedFile = normalizeFile (sourceFile );
173+ if ((files .contains (normalizedFile ) || extractorState .getSnippets ().containsKey (normalizedFile .toPath ()))
162174 && !extractedFiles .contains (sourceFile .getAbsoluteFile ())
163175 && FileType .TYPESCRIPT .getExtensions ().contains (FileUtil .extension (sourceFile ))) {
164176 filesToExtract .add (sourceFile );
@@ -287,10 +299,14 @@ private boolean containsTypeScriptFiles() {
287299 }
288300
289301 public void collectFiles (ArgsParser ap ) {
290- for (File f : ap . getOneOrMoreFiles ( "files" , FileMode . FILE_OR_DIRECTORY_MUST_EXIST ))
302+ for (File f : getFilesArg ( ap ))
291303 collectFiles (f , true );
292304 }
293305
306+ private List <File > getFilesArg (ArgsParser ap ) {
307+ return ap .getOneOrMoreFiles ("files" , FileMode .FILE_OR_DIRECTORY_MUST_EXIST );
308+ }
309+
294310 public void setupMatchers (ArgsParser ap ) {
295311 Set <String > includes = new LinkedHashSet <>();
296312
@@ -444,6 +460,21 @@ private static TypeScriptMode getTypeScriptMode(ArgsParser ap) {
444460 if (ap .has (P_TYPESCRIPT )) return TypeScriptMode .BASIC ;
445461 return TypeScriptMode .NONE ;
446462 }
463+
464+ private Path inferSourceRoot (ArgsParser ap ) {
465+ List <File > files = getFilesArg (ap );
466+ Path sourceRoot = files .iterator ().next ().toPath ().toAbsolutePath ().getParent ();
467+ for (File file : files ) {
468+ Path path = file .toPath ().toAbsolutePath ().getParent ();
469+ for (int i = 0 ; i < sourceRoot .getNameCount (); ++i ) {
470+ if (!(i < path .getNameCount () && path .getName (i ).equals (sourceRoot .getName (i )))) {
471+ sourceRoot = sourceRoot .subpath (0 , i );
472+ break ;
473+ }
474+ }
475+ }
476+ return sourceRoot ;
477+ }
447478
448479 private ExtractorConfig parseJSOptions (ArgsParser ap ) {
449480 ExtractorConfig cfg =
@@ -466,6 +497,17 @@ private ExtractorConfig parseJSOptions(ArgsParser ap) {
466497 ? UnitParser .parseOpt (ap .getString (P_TYPESCRIPT_RAM ), UnitParser .MEGABYTES )
467498 : 0 );
468499 if (ap .has (P_DEFAULT_ENCODING )) cfg = cfg .withDefaultEncoding (ap .getString (P_DEFAULT_ENCODING ));
500+
501+ // Make a usable virtual source root mapping.
502+ // The concept of source root and scratch directory do not exist in the legacy extractor,
503+ // so we construct these based on what we have.
504+ String odasaDbDir = Env .systemEnv ().getNonEmpty (Var .ODASA_DB );
505+ VirtualSourceRoot virtualSourceRoot =
506+ odasaDbDir == null
507+ ? VirtualSourceRoot .none
508+ : new VirtualSourceRoot (inferSourceRoot (ap ), Paths .get (odasaDbDir , "working" ));
509+ cfg = cfg .withVirtualSourceRoot (virtualSourceRoot );
510+
469511 return cfg ;
470512 }
471513
0 commit comments