Skip to content

Commit d743fbf

Browse files
committed
Add initial Java bytecode extractor and QL support
Introduces a new .NET-based Java bytecode extractor using IKVM.ByteCode, including main extraction logic, TRAP file writer, and configuration files. Adds QL library for JVM instructions and test queries for bytecode extraction. Updates the binary DB scheme to support JVM entities.
1 parent 85d9007 commit d743fbf

14 files changed

Lines changed: 2932 additions & 1 deletion

File tree

binary/extractor/jvm/Semmle.Extraction.Java.ByteCode/JvmExtractor.cs

Lines changed: 783 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
using Semmle.Extraction.Java.ByteCode.Trap;
2+
3+
namespace Semmle.Extraction.Java.ByteCode;
4+
5+
class Program
6+
{
7+
private static readonly HashSet<string> AllowedExtensions =
8+
new(StringComparer.OrdinalIgnoreCase) { ".class", ".jar" };
9+
10+
static int Main(string[] args)
11+
{
12+
if (args.Length == 0)
13+
{
14+
Console.WriteLine("Usage: Semmle.Extraction.Java.ByteCode <file-or-list-path> [additional-files...]");
15+
Console.WriteLine(" If the argument is a .class or .jar file, extract it directly.");
16+
Console.WriteLine(" Otherwise, treat it as a file containing paths to .class or .jar files.");
17+
return 1;
18+
}
19+
20+
var trapDir = Environment.GetEnvironmentVariable("CODEQL_EXTRACTOR_JVM_TRAP_DIR");
21+
if (string.IsNullOrEmpty(trapDir))
22+
{
23+
Console.Error.WriteLine("Error: CODEQL_EXTRACTOR_JVM_TRAP_DIR environment variable not set");
24+
return 1;
25+
}
26+
27+
var sourceArchiveDir = Environment.GetEnvironmentVariable("CODEQL_EXTRACTOR_JVM_SOURCE_ARCHIVE_DIR");
28+
if (string.IsNullOrEmpty(sourceArchiveDir))
29+
{
30+
Console.Error.WriteLine("Error: CODEQL_EXTRACTOR_JVM_SOURCE_ARCHIVE_DIR environment variable not set");
31+
return 1;
32+
}
33+
34+
var files = new List<string>();
35+
36+
foreach (var arg in args)
37+
{
38+
var extension = Path.GetExtension(arg);
39+
if (AllowedExtensions.Contains(extension))
40+
{
41+
// Direct .class or .jar file
42+
files.Add(arg);
43+
}
44+
else if (File.Exists(arg))
45+
{
46+
// File list
47+
files.AddRange(File.ReadAllLines(arg)
48+
.Where(line => !string.IsNullOrWhiteSpace(line)));
49+
}
50+
else
51+
{
52+
Console.Error.WriteLine($"Warning: Argument not found or unsupported: {arg}");
53+
}
54+
}
55+
56+
Console.WriteLine($"Processing {files.Count} file(s)...");
57+
58+
int successCount = 0;
59+
int errorCount = 0;
60+
61+
foreach (var filePath in files)
62+
{
63+
if (!File.Exists(filePath))
64+
{
65+
Console.WriteLine($"Warning: File does not exist: {filePath}");
66+
errorCount++;
67+
continue;
68+
}
69+
70+
var extension = Path.GetExtension(filePath);
71+
if (!AllowedExtensions.Contains(extension))
72+
{
73+
Console.WriteLine($"Skipping unsupported file type: {filePath}");
74+
continue;
75+
}
76+
77+
var baseName = Path.GetFileNameWithoutExtension(filePath);
78+
var outputPath = Path.Combine(trapDir, baseName + ".trap");
79+
80+
Console.WriteLine($"Extracting: {filePath}");
81+
82+
try
83+
{
84+
using var trapWriter = new TrapWriter(outputPath);
85+
var extractor = new JvmExtractor(trapWriter);
86+
extractor.Extract(filePath);
87+
88+
// Copy to source archive
89+
ArchiveFile(filePath, sourceArchiveDir);
90+
91+
successCount++;
92+
Console.WriteLine($" -> {outputPath}");
93+
}
94+
catch (Exception ex)
95+
{
96+
Console.Error.WriteLine($"Error extracting {filePath}: {ex.Message}");
97+
Console.Error.WriteLine(ex.StackTrace);
98+
errorCount++;
99+
}
100+
}
101+
102+
Console.WriteLine($"\nExtraction complete: {successCount} succeeded, {errorCount} failed");
103+
return errorCount > 0 ? 1 : 0;
104+
}
105+
106+
private static void ArchiveFile(string sourcePath, string archiveDir)
107+
{
108+
// Convert absolute path to relative for archive: strip leading / or drive letter
109+
var relativePath = sourcePath.TrimStart('/').Replace(":", "_");
110+
var archivePath = Path.Combine(archiveDir, relativePath);
111+
var dir = Path.GetDirectoryName(archivePath);
112+
113+
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
114+
{
115+
Directory.CreateDirectory(dir);
116+
}
117+
118+
File.Copy(sourcePath, archivePath, true);
119+
}
120+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net8.0</TargetFramework>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
<RuntimeIdentifiers>osx-arm64;osx-x64;linux-x64;win-x64</RuntimeIdentifiers>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<!-- Primary parser: IKVM.ByteCode for Java class file parsing -->
13+
<PackageReference Include="IKVM.ByteCode" Version="9.3.11" />
14+
</ItemGroup>
15+
16+
</Project>
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
using System.IO;
2+
3+
namespace Semmle.Extraction.Java.ByteCode.Trap;
4+
5+
/// <summary>
6+
/// Simple TRAP file writer - writes tuples as text lines.
7+
/// Reused pattern from CIL extractor.
8+
/// </summary>
9+
public class TrapWriter : IDisposable {
10+
private readonly TextWriter writer;
11+
private readonly string trapFilePath;
12+
private int nextId = 1;
13+
14+
public TrapWriter(string outputPath) {
15+
trapFilePath = outputPath;
16+
// Ensure directory exists
17+
var dir = Path.GetDirectoryName(trapFilePath);
18+
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) {
19+
Directory.CreateDirectory(dir);
20+
}
21+
writer = new StreamWriter(trapFilePath);
22+
}
23+
24+
/// <summary>
25+
/// Get a unique ID for an entity.
26+
/// </summary>
27+
public int GetId() {
28+
return nextId++;
29+
}
30+
31+
/// <summary>
32+
/// Write a tuple to the TRAP file.
33+
/// Format: predicate(arg1, arg2, ...)
34+
/// </summary>
35+
public void WriteTuple(string predicate, params object[] args) {
36+
writer.Write(predicate);
37+
writer.Write('(');
38+
39+
for (int i = 0; i < args.Length; i++) {
40+
if (i > 0)
41+
writer.Write(", ");
42+
43+
WriteValue(args[i]);
44+
}
45+
46+
writer.WriteLine(')');
47+
}
48+
49+
private void WriteValue(object value) {
50+
switch (value) {
51+
case int i:
52+
writer.Write(i);
53+
break;
54+
case long l:
55+
writer.Write(l);
56+
break;
57+
case float f:
58+
WriteFloat(f);
59+
break;
60+
case double d:
61+
WriteDouble(d);
62+
break;
63+
case string s:
64+
// Escape string and wrap in quotes
65+
writer.Write('"');
66+
writer.Write(EscapeString(s));
67+
writer.Write('"');
68+
break;
69+
case null:
70+
writer.Write("null");
71+
break;
72+
default:
73+
writer.Write(value.ToString());
74+
break;
75+
}
76+
}
77+
78+
private void WriteFloat(float f) {
79+
// Use InvariantCulture to ensure decimal point is '.' not ','
80+
var str = f.ToString("G", System.Globalization.CultureInfo.InvariantCulture);
81+
writer.Write(str);
82+
// Ensure there's always a decimal point so CodeQL parses it as a float
83+
if (!str.Contains('.') && !str.Contains('E') && !str.Contains('e'))
84+
writer.Write(".0");
85+
}
86+
87+
private void WriteDouble(double d) {
88+
// Use InvariantCulture to ensure decimal point is '.' not ','
89+
var str = d.ToString("G", System.Globalization.CultureInfo.InvariantCulture);
90+
writer.Write(str);
91+
// Ensure there's always a decimal point so CodeQL parses it as a float
92+
if (!str.Contains('.') && !str.Contains('E') && !str.Contains('e'))
93+
writer.Write(".0");
94+
}
95+
96+
private string EscapeString(string s) {
97+
// Basic escaping for TRAP format
98+
return s.Replace("\\", "\\\\")
99+
.Replace("\"", "\\\"")
100+
.Replace("\n", "\\n")
101+
.Replace("\r", "\\r")
102+
.Replace("\t", "\\t");
103+
}
104+
105+
public void Dispose() {
106+
writer.Dispose();
107+
}
108+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: "jvm"
2+
aliases:
3+
- "jvm"
4+
- "java-bytecode"
5+
display_name: "Java Bytecode"
6+
version: 0.0.1
7+
column_kind: "utf16"
8+
build_modes:
9+
- none
10+
file_types:
11+
- name: jvm
12+
display_name: Java Bytecode
13+
extensions:
14+
- .class
15+
- .jar
16+
dbscheme: semmlecode.binary.dbscheme

0 commit comments

Comments
 (0)