Skip to content

Commit d625ebf

Browse files
author
Max Schaefer
committed
JavaScript: Extract JavaScript files after TypeScript files.
This way we only start multi-threaded extraction after the TypeScript parser has already been shut down, reducing the chance of running out of memory.
1 parent 57133f9 commit d625ebf

2 files changed

Lines changed: 91 additions & 72 deletions

File tree

javascript/extractor/src/com/semmle/js/extractor/AutoBuild.java

Lines changed: 85 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -377,8 +377,8 @@ private boolean addPathPattern(Set<Path> patterns, Path base, String pattern) {
377377
public void run() throws IOException {
378378
threadPool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
379379
try {
380-
extractExterns();
381380
extractSource();
381+
extractExterns();
382382
} finally {
383383
threadPool.shutdown();
384384
}
@@ -442,9 +442,90 @@ private void extractSource() throws IOException {
442442
if (defaultEncoding != null)
443443
config = config.withDefaultEncoding(defaultEncoding);
444444
FileExtractor extractor = new FileExtractor(config, outputConfig, trapCache);
445+
446+
Set<Path> filesToExtract = new LinkedHashSet<>();
447+
List<Path> tsconfigFiles = new ArrayList<>();
448+
findFilesToExtract(extractor, filesToExtract, tsconfigFiles);
449+
450+
// extract TypeScript projects and files
451+
Set<Path> extractedFiles = extractTypeScript(extractor, filesToExtract, tsconfigFiles);
452+
453+
// extract remaining files
454+
for (Path f : filesToExtract) {
455+
if (extractedFiles.add(f)) {
456+
extract(extractor, f, null);
457+
}
458+
}
459+
}
460+
461+
private Set<Path> extractTypeScript(FileExtractor extractor, Set<Path> files, List<Path> tsconfig) {
462+
Set<Path> extractedFiles = new LinkedHashSet<>();
463+
464+
if (hasTypeScriptFiles(files) || !tsconfig.isEmpty()) {
465+
ExtractorState extractorState = new ExtractorState();
466+
TypeScriptParser tsParser = extractorState.getTypeScriptParser();
467+
verifyTypeScriptInstallation(extractorState);
468+
469+
// Extract TypeScript projects
470+
for (Path projectPath : tsconfig) {
471+
File projectFile = projectPath.toFile();
472+
long start = logBeginProcess("Opening project " + projectFile);
473+
ParsedProject project = tsParser.openProject(projectFile);
474+
logEndProcess(start, "Done opening project " + projectFile);
475+
// Extract all files belonging to this project which are also matched
476+
// by our include/exclude filters.
477+
List<File> typeScriptFiles = new ArrayList<File>();
478+
for (File sourceFile : project.getSourceFiles()) {
479+
Path sourcePath = sourceFile.toPath();
480+
if (!files.contains(normalizePath(sourcePath)))
481+
continue;
482+
if (!extractedFiles.contains(sourcePath)) {
483+
typeScriptFiles.add(sourcePath.toFile());
484+
}
485+
}
486+
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractor, extractorState);
487+
tsParser.closeProject(projectFile);
488+
}
489+
490+
// Extract all the types discovered when extracting the ASTs.
491+
if (!tsconfig.isEmpty()) {
492+
TypeTable typeTable = tsParser.getTypeTable();
493+
extractTypeTable(tsconfig.iterator().next(), typeTable);
494+
}
495+
496+
// Extract remaining TypeScript files.
497+
List<File> remainingTypeScriptFiles = new ArrayList<File>();
498+
for (Path f : files) {
499+
if (!extractedFiles.contains(f) && FileType.forFileExtension(f.toFile()) == FileType.TYPESCRIPT) {
500+
remainingTypeScriptFiles.add(f.toFile());
501+
}
502+
}
503+
if (!remainingTypeScriptFiles.isEmpty()) {
504+
extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractor, extractorState);
505+
}
506+
507+
// The TypeScript compiler instance is no longer needed.
508+
tsParser.killProcess();
509+
}
510+
511+
return extractedFiles;
512+
}
513+
514+
private boolean hasTypeScriptFiles(Set<Path> filesToExtract) {
515+
for (Path file : filesToExtract) {
516+
// Check if there are any files with the TypeScript extension.
517+
// Do not use FileType.forFile as it involves I/O for file header checks,
518+
// and files with a bad header have already been excluded.
519+
if (FileType.forFileExtension(file.toFile()) == FileType.TYPESCRIPT)
520+
return true;
521+
}
522+
return false;
523+
}
524+
525+
private void findFilesToExtract(FileExtractor extractor,
526+
final Set<Path> filesToExtract, final List<Path> tsconfigFiles)
527+
throws IOException {
445528
Path[] currentRoot = new Path[1];
446-
final Set<Path> filesToExtract = new LinkedHashSet<>();
447-
final List<Path> tsconfigFiles = new ArrayList<>();
448529
FileVisitor<? super Path> visitor = new SimpleFileVisitor<Path>() {
449530
private boolean isFileIncluded(Path file) {
450531
// normalise path for matching
@@ -486,72 +567,6 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) th
486567
currentRoot[0] = root;
487568
Files.walkFileTree(currentRoot[0], visitor);
488569
}
489-
490-
// If there are any .ts files, verify that TypeScript is installed.
491-
ExtractorState extractorState = new ExtractorState();
492-
TypeScriptParser tsParser = extractorState.getTypeScriptParser();
493-
boolean hasTypeScriptFiles = false;
494-
for (Path file : filesToExtract) {
495-
// Check if there are any files with the TypeScript extension.
496-
// Do not use FileType.forFile as it involves I/O for file header checks,
497-
// and files with a bad header have already been excluded.
498-
if (FileType.forFileExtension(file.toFile()) == FileType.TYPESCRIPT) {
499-
hasTypeScriptFiles = true;
500-
break;
501-
}
502-
}
503-
if (hasTypeScriptFiles || !tsconfigFiles.isEmpty()) {
504-
verifyTypeScriptInstallation(extractorState);
505-
}
506-
507-
// Extract TypeScript projects
508-
Set<Path> extractedFiles = new LinkedHashSet<>();
509-
for (Path projectPath : tsconfigFiles) {
510-
File projectFile = projectPath.toFile();
511-
long start = logBeginProcess("Opening project " + projectFile);
512-
ParsedProject project = tsParser.openProject(projectFile);
513-
logEndProcess(start, "Done opening project " + projectFile);
514-
// Extract all files belonging to this project which are also matched
515-
// by our include/exclude filters.
516-
List<File> typeScriptFiles = new ArrayList<File>();
517-
for (File sourceFile : project.getSourceFiles()) {
518-
Path sourcePath = sourceFile.toPath();
519-
if (!filesToExtract.contains(normalizePath(sourcePath)))
520-
continue;
521-
if (!extractedFiles.contains(sourcePath)) {
522-
typeScriptFiles.add(sourcePath.toFile());
523-
}
524-
}
525-
extractTypeScriptFiles(typeScriptFiles, extractedFiles, extractor, extractorState);
526-
tsParser.closeProject(projectFile);
527-
}
528-
529-
if (!tsconfigFiles.isEmpty()) {
530-
// Extract all the types discovered when extracting the ASTs.
531-
TypeTable typeTable = tsParser.getTypeTable();
532-
extractTypeTable(tsconfigFiles.iterator().next(), typeTable);
533-
}
534-
535-
// Extract remaining TypeScript files.
536-
List<File> remainingTypeScriptFiles = new ArrayList<File>();
537-
for (Path f : filesToExtract) {
538-
if (!extractedFiles.contains(f) && FileType.forFileExtension(f.toFile()) == FileType.TYPESCRIPT) {
539-
remainingTypeScriptFiles.add(f.toFile());
540-
}
541-
}
542-
if (!remainingTypeScriptFiles.isEmpty()) {
543-
extractTypeScriptFiles(remainingTypeScriptFiles, extractedFiles, extractor, extractorState);
544-
}
545-
546-
// The TypeScript compiler instance is no longer needed.
547-
tsParser.killProcess();
548-
549-
// Extract non-TypeScript files
550-
for (Path f : filesToExtract) {
551-
if (extractedFiles.add(f)) {
552-
extract(extractor, f, null);
553-
}
554-
}
555570
}
556571

557572
/**
@@ -563,7 +578,7 @@ public void verifyTypeScriptInstallation(ExtractorState extractorState) {
563578
}
564579

565580
public void extractTypeScriptFiles(List<File> files, Set<Path> extractedFiles,
566-
FileExtractor extractor, ExtractorState extractorState) throws IOException {
581+
FileExtractor extractor, ExtractorState extractorState) {
567582
extractorState.getTypeScriptParser().prepareFiles(files);
568583
for (File f : files) {
569584
Path path = f.toPath();

javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,8 +452,7 @@ public void extract(File f, ExtractorState state) throws IOException {
452452
private void extractContents(File f, Label fileLabel, String source, LocationManager locationManager,
453453
ExtractorState state) throws IOException {
454454
TrapWriter trapwriter = locationManager.getTrapWriter();
455-
FileType fileType = config.hasFileType() ? FileType.valueOf(config.getFileType())
456-
: FileType.forFile(f, config);
455+
FileType fileType = getFileType(f);
457456

458457
File cacheFile = null, // the cache file for this extraction
459458
resultFile = null; // the final result TRAP file for this extraction
@@ -501,6 +500,11 @@ private void extractContents(File f, Label fileLabel, String source, LocationMan
501500
}
502501
}
503502

503+
public FileType getFileType(File f) {
504+
return config.hasFileType() ? FileType.valueOf(config.getFileType())
505+
: FileType.forFile(f, config);
506+
}
507+
504508
/**
505509
* Bump trap ID counter to separate path-dependent and path-independent parts of the TRAP file.
506510
*

0 commit comments

Comments
 (0)