Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### New Features

- CodeGraph now indexes CUDA (.cu / .cuh) and IEC 61131-3 ST (.scl / .st) files. CUDA reuses the C++ parser and shares its resolution paths — `#include` headers, function calls, struct definitions, and cross-language references between `.cu`, `.cpp`, and `.c`/`.h` files all resolve correctly. SCL files are tracked at the file-record level (no symbol extraction, matching the YAML and Twig conventions).

### Fixes

- SCL (.scl / .st) files were silently dropped during single-file indexing instead of being tracked as file records, because `isLanguageSupported` returned `false` for the language. They now behave consistently across both the batch and single-file indexing paths.

## [1.0.1] - 2026-06-13

Expand Down
149 changes: 148 additions & 1 deletion __tests__/extraction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import * as path from 'path';
import * as os from 'os';
import { CodeGraph } from '../src';
import { extractFromSource, scanDirectory, buildDefaultIgnore } from '../src/extraction';
import { detectLanguage, isLanguageSupported, getSupportedLanguages, initGrammars, loadAllGrammars, isSourceFile } from '../src/extraction/grammars';
import { detectLanguage, isLanguageSupported, getSupportedLanguages, isGrammarLoaded, isFileLevelOnlyLanguage, initGrammars, loadAllGrammars, isSourceFile } from '../src/extraction/grammars';
import { normalizePath } from '../src/utils';

beforeAll(async () => {
Expand Down Expand Up @@ -7161,3 +7161,150 @@ GeomPoint <- ggproto("GeomPoint", Geom,
});
});
});

// =============================================================================
// SCL (IEC 61131-3 Structured Text) — file-level-only
// =============================================================================

describe('SCL (IEC 61131-3 ST)', () => {
describe('Language detection', () => {
it('should detect SCL files by extension', () => {
expect(detectLanguage('main.scl')).toBe('scl');
expect(detectLanguage('control.st')).toBe('scl');
expect(detectLanguage('src/plc/axis.scl')).toBe('scl');
});

it('should report SCL as supported', () => {
expect(isLanguageSupported('scl')).toBe(true);
expect(getSupportedLanguages()).toContain('scl');
});

it('should report SCL grammar as loaded (file-level-only, no WASM needed)', () => {
expect(isGrammarLoaded('scl')).toBe(true);
});

it('should recognize SCL as file-level-only language', () => {
expect(isFileLevelOnlyLanguage('scl')).toBe(true);
});

it('should track SCL source files (no errors for file-level-only)', () => {
const code = `FUNCTION_BLOCK Motor\nVAR\n speed : INT;\nEND_VAR\nspeed := 100;\nEND_FUNCTION_BLOCK`;
const result = extractFromSource('motor.scl', code);
expect(result.errors).toHaveLength(0);
});
});

describe('File-level tracking', () => {
it('should index SCL files as file records with zero symbol nodes', async () => {
const tempDir = createTempDir();
try {
fs.writeFileSync(path.join(tempDir, 'motor.scl'), 'FUNCTION_BLOCK Motor\nVAR speed : INT; END_VAR\nEND_FUNCTION_BLOCK\n');
fs.writeFileSync(path.join(tempDir, 'conveyor.st'), 'PROGRAM Conveyor\nVAR state : BOOL; END_VAR\nEND_PROGRAM\n');

const cg = CodeGraph.initSync(tempDir);
const result = await cg.indexAll();

expect(result.success).toBe(true);
expect(result.filesIndexed).toBe(2);

const files = cg.getFiles();
expect(files.length).toBe(2);
const pathsAndLangs = files.map((f: any) => `${f.path}:${f.language}`).sort();
// Normalize path separators for cross-platform
const normalized = pathsAndLangs.map((p: string) => p.replace(/\\/g, '/'));
expect(normalized).toEqual(['conveyor.st:scl', 'motor.scl:scl']);

// SCL files produce zero symbol nodes
const symbols = cg.getNodesInFile('motor.scl');
expect(symbols).toHaveLength(0);

cg.close();
} finally {
cleanupTempDir(tempDir);
}
});
});
});

// =============================================================================
// CUDA (C++ dialect, reuses tree-sitter-cpp.wasm)
// =============================================================================

describe('CUDA', () => {
describe('Language detection', () => {
it('should detect CUDA files by extension', () => {
expect(detectLanguage('kernel.cu')).toBe('cuda');
expect(detectLanguage('common.cuh')).toBe('cuda');
expect(detectLanguage('cuda/kernel.cu')).toBe('cuda');
});

it('should report CUDA as supported', () => {
expect(isLanguageSupported('cuda')).toBe(true);
expect(getSupportedLanguages()).toContain('cuda');
});
});

describe('Extraction', () => {
it('should extract functions from a CUDA kernel file (fixture)', () => {
const fixturePath = path.join(__dirname, 'fixtures', 'cuda', 'selective_scan.cu');
const code = fs.readFileSync(fixturePath, 'utf-8');
const result = extractFromSource('selective_scan.cu', code);

// File node should be present with correct language
const fileNode = result.nodes.find((n) => n.kind === 'file');
expect(fileNode).toBeDefined();
expect(fileNode?.language).toBe('cuda');

// Should extract the __global__ kernel function
const kernelFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'selective_scan_kernel');
expect(kernelFn).toBeDefined();
expect(kernelFn?.language).toBe('cuda');

// Should extract the __host__ launch function
const hostFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'launch_selective_scan');
expect(hostFn).toBeDefined();
expect(hostFn?.language).toBe('cuda');
});

it('should extract #include as import nodes', () => {
const code = '#include <cuda_runtime.h>\n__global__ void kernel() {}\n';
const result = extractFromSource('kernel.cu', code);

const importNode = result.nodes.find((n) => n.kind === 'import');
expect(importNode).toBeDefined();
expect(importNode?.name).toBe('cuda_runtime.h');

const importRef = result.unresolvedReferences.find(
(r) => r.referenceKind === 'imports' && r.referenceName === 'cuda_runtime.h'
);
expect(importRef).toBeDefined();
});

it('should extract function calls within CUDA code', () => {
const code = `
__global__ void kernel(float* data, int n) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < n) data[idx] = idx;
}

void launch() {
// NOTE: tree-sitter-cpp misparses the triple-angle-bracket kernel launch
// syntax <<<grid,block>>> as nested shift operators, so a plain
// function call is used for testing instead of the launch expression.
cudaDeviceSynchronize();
}
`;
const result = extractFromSource('simple.cu', code);
const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls');
expect(calls.some((c) => c.referenceName === 'cudaDeviceSynchronize')).toBe(true);
});

it('should extract struct definitions from CUDA headers', () => {
const code = 'struct GPUKernelParams { int threads; int blocks; float shared_mem; };\n';
const result = extractFromSource('params.cuh', code);
const structNode = result.nodes.find((n) => n.kind === 'struct' && n.name === 'GPUKernelParams');
expect(structNode).toBeDefined();
expect(structNode?.language).toBe('cuda');
});
});
});
39 changes: 39 additions & 0 deletions __tests__/fixtures/cuda/selective_scan.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// CUDA kernel for selective scan
#include <cuda_runtime.h>

#define BLOCK_SIZE 256

extern "C" __global__ void selective_scan_kernel(
const float* __restrict__ u,
const float* __restrict__ delta,
const float* __restrict__ A,
const float* __restrict__ B,
const float* __restrict__ C,
float* __restrict__ out,
float* __restrict__ h_last,
int batch, int dim, int dstate, int seqlen
) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= batch * dim) return;
// selective scan computation
int b = idx / dim;
int d = idx % dim;
for (int t = 0; t < seqlen; t++) {
float delta_t = delta[t * batch * dim + b * dim + d];
// ... scan logic
}
}

__host__ void launch_selective_scan(
const float* u, const float* delta,
const float* A, const float* B, const float* C,
float* out, float* h_last,
int batch, int dim, int dstate, int seqlen
) {
int threads = BLOCK_SIZE;
int blocks = (batch * dim + threads - 1) / threads;
selective_scan_kernel<<<blocks, threads>>>(
u, delta, A, B, C, out, h_last,
batch, dim, dstate, seqlen
);
}
15 changes: 12 additions & 3 deletions src/extraction/grammars.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import * as path from 'path';
import { Parser, Language as WasmLanguage } from 'web-tree-sitter';
import { Language } from '../types';

export type GrammarLanguage = Exclude<Language, 'svelte' | 'vue' | 'astro' | 'liquid' | 'razor' | 'yaml' | 'twig' | 'xml' | 'properties' | 'unknown'>;
export type GrammarLanguage = Exclude<Language, 'svelte' | 'vue' | 'astro' | 'liquid' | 'razor' | 'yaml' | 'twig' | 'xml' | 'properties' | 'scl' | 'unknown'>;

/**
* WASM filename map — maps each language to its .wasm grammar file
Expand All @@ -37,6 +37,7 @@ const WASM_GRAMMAR_FILES: Record<GrammarLanguage, string> = {
scala: 'tree-sitter-scala.wasm',
lua: 'tree-sitter-lua.wasm',
r: 'tree-sitter-r.wasm',
cuda: 'tree-sitter-cpp.wasm',
luau: 'tree-sitter-luau.wasm',
objc: 'tree-sitter-objc.wasm',
};
Expand Down Expand Up @@ -69,6 +70,10 @@ export const EXTENSION_MAP: Record<string, Language> = {
'.cxx': 'cpp',
'.hpp': 'cpp',
'.hxx': 'cpp',
'.cu': 'cuda',
'.cuh': 'cuda',
'.scl': 'scl',
'.st': 'scl',
'.cs': 'csharp',
// ASP.NET Razor / Blazor markup — custom RazorExtractor (links @model/@inject/
// component tags to their C# types; markup isn't a tree-sitter grammar).
Expand Down Expand Up @@ -318,6 +323,7 @@ export function isLanguageSupported(language: Language): boolean {
if (language === 'twig') return true; // file-level tracking only
if (language === 'xml') return true; // MyBatis mapper extractor
if (language === 'properties') return true; // Spring config keys
if (language === 'scl') return true; // file-level tracking only (IEC 61131-3 ST)
if (language === 'unknown') return false;
return language in WASM_GRAMMAR_FILES;
}
Expand All @@ -329,6 +335,7 @@ export function isGrammarLoaded(language: Language): boolean {
if (language === 'svelte' || language === 'vue' || language === 'astro' || language === 'liquid' || language === 'razor') return true;
if (language === 'yaml' || language === 'twig') return true; // no WASM grammar needed
if (language === 'xml' || language === 'properties') return true; // no WASM grammar needed
if (language === 'scl') return true; // no WASM grammar needed (file-level-only)
return languageCache.has(language);
}

Expand All @@ -342,14 +349,14 @@ export function isGrammarLoaded(language: Language): boolean {
* indexed rather than skipped, so it must stay in sync with that branch.
*/
export function isFileLevelOnlyLanguage(language: Language): boolean {
return language === 'yaml' || language === 'twig' || language === 'properties';
return language === 'yaml' || language === 'twig' || language === 'properties' || language === 'scl';
}

/**
* Get all supported languages (those with grammar definitions).
*/
export function getSupportedLanguages(): Language[] {
return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'vue', 'astro', 'liquid'];
return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'vue', 'astro', 'liquid', 'scl'];
}

/**
Expand Down Expand Up @@ -420,8 +427,10 @@ export function getLanguageDisplayName(language: Language): string {
liquid: 'Liquid',
pascal: 'Pascal / Delphi',
scala: 'Scala',
scl: 'IEC 61131-3 ST',
lua: 'Lua',
luau: 'Luau',
cuda: 'CUDA',
objc: 'Objective-C',
yaml: 'YAML',
twig: 'Twig',
Expand Down
1 change: 1 addition & 0 deletions src/extraction/languages/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export const EXTRACTORS: Partial<Record<Language, LanguageExtractor>> = {
java: javaExtractor,
c: cExtractor,
cpp: cppExtractor,
cuda: cppExtractor,
csharp: csharpExtractor,
php: phpExtractor,
ruby: rubyExtractor,
Expand Down
1 change: 1 addition & 0 deletions src/extraction/tree-sitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2788,6 +2788,7 @@ export class TreeSitterExtractor {
} else if (
(this.language === 'cpp' ||
this.language === 'c' ||
this.language === 'cuda' ||
this.language === 'kotlin' ||
this.language === 'swift' ||
this.language === 'rust' ||
Expand Down
4 changes: 2 additions & 2 deletions src/resolution/callback-synthesizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -424,11 +424,11 @@ function cppOverrideEdges(queries: QueryBuilder): Edge[] {
.map((e) => queries.getNodeById(e.target))
.filter((n): n is Node => !!n && n.kind === 'method');
for (const cls of queries.getNodesByKind('class')) {
const subMethods = methodsOf(cls.id).filter((n) => n.language === 'cpp');
const subMethods = methodsOf(cls.id).filter((n) => n.language === 'cpp' || n.language === 'cuda');
if (subMethods.length === 0) continue;
for (const ext of queries.getOutgoingEdges(cls.id, ['extends'])) {
const base = queries.getNodeById(ext.target);
if (!base || base.language !== 'cpp' || base.id === cls.id) continue;
if (!base || (base.language !== 'cpp' && base.language !== 'cuda') || base.id === cls.id) continue;
const baseMethods = new Map(methodsOf(base.id).map((m) => [m.name, m]));
let added = 0;
for (const m of subMethods) {
Expand Down
9 changes: 5 additions & 4 deletions src/resolution/import-resolver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const EXTENSION_RESOLUTION: Record<string, string[]> = {
java: ['.java'],
c: ['.h', '.c'],
cpp: ['.h', '.hpp', '.hxx', '.cpp', '.cc', '.cxx'],
cuda: ['.h', '.cuh', '.hpp', '.hxx', '.cpp', '.cc', '.cxx', '.cu'],
csharp: ['.cs'],
php: ['.php'],
ruby: ['.rb'],
Expand Down Expand Up @@ -69,7 +70,7 @@ export function resolveImportPath(
// C/C++ include directory search: when neither relative nor aliased
// resolution found a match, search -I directories from
// compile_commands.json or heuristic probing.
if (language === 'c' || language === 'cpp') {
if (language === 'c' || language === 'cpp' || language === 'cuda') {
return resolveCppIncludePath(importPath, language, context);
}

Expand Down Expand Up @@ -189,7 +190,7 @@ function isExternalImport(
return true;
}

if (language === 'c' || language === 'cpp') {
if (language === 'c' || language === 'cpp' || language === 'cuda') {
// C/C++ standard library headers — both C-style (<stdio.h>) and
// C++-style (<cstdio>, <vector>) forms. Checked against the import
// path (which the extractor strips of <> or "" delimiters).
Expand Down Expand Up @@ -602,7 +603,7 @@ export function extractImportMappings(
mappings.push(...extractJavaImports(content));
} else if (language === 'php') {
mappings.push(...extractPHPImports(content));
} else if (language === 'c' || language === 'cpp') {
} else if (language === 'c' || language === 'cpp' || language === 'cuda') {
mappings.push(...extractCppImports(content));
}

Expand Down Expand Up @@ -1131,7 +1132,7 @@ export function resolveViaImport(
// include-dir scan path inside resolveImportPath never produces an
// edge — resolveViaImport's symbol lookup below would search the
// resolved file for a symbol named like the file extension and fail.
if ((ref.language === 'c' || ref.language === 'cpp') && ref.referenceKind === 'imports') {
if ((ref.language === 'c' || ref.language === 'cpp' || ref.language === 'cuda') && ref.referenceKind === 'imports') {
// C/C++ quoted includes (`#include "X.h"`) resolve relative to the
// INCLUDING file's own directory first (the C standard's quoted-include
// search order). Prefer a same-directory header over an -I directory or a
Expand Down
2 changes: 1 addition & 1 deletion src/resolution/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1108,7 +1108,7 @@ export class ReferenceResolver {
// those resolutions makes the graph wrong, not cleaner. We only filter
// when there's no user node with this name — then name-matching would
// produce zero edges anyway and the filter just short-circuits work.
if (ref.language === 'c' || ref.language === 'cpp') {
if (ref.language === 'c' || ref.language === 'cpp' || ref.language === 'cuda') {
// C++ std:: namespace prefix — safe to filter unconditionally,
// since `std::foo` is never a user-defined qualified name in
// tree-sitter output.
Expand Down
Loading