From a20629edf5b64c57fe8a131d094ca0b3d9e21ef5 Mon Sep 17 00:00:00 2001 From: zouyuanqing Date: Sun, 14 Jun 2026 14:50:57 +0800 Subject: [PATCH 1/2] feat: add CUDA (.cu/.cuh) and IEC 61131-3 SCL (.scl/.st) language support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CUDA: - New 'cuda' language type, reuses tree-sitter-cpp.wasm + cppExtractor - Full C/C++ resolution pipeline integration: #include resolution, compile_commands.json -I directory search, standard library detection, import mappings all share CUDA path alongside C/C++ - Cross-language reference resolution: CUDA added to C family (c/cpp/cuda) in name-matcher, so .cu ↔ .c ↔ .cpp ↔ .h references resolve as same-family - Extension resolution for .cuh/.cu in import-resolver SCL (IEC 61131-3 Structured Text): - New 'scl' language type, .scl/.st extensions - File-level tracking only (matching yaml/twig/properties conventions) - GrammarLanguage type fixed to exclude 'scl' - Fix: SCL files no longer silently dropped during single-file indexing (isLanguageSupported now returns true) Tests: - SCL: language detection, grammar loaded, file-level-only flag, file-record tracking with zero-symbol validation - CUDA: fixture extraction (selective_scan.cu kernel), #include import nodes, function call edges, .cuh struct extraction, language attribution on all node kinds Closes #387, advances #441 --- CHANGELOG.md | 7 + __tests__/extraction.test.ts | 149 +++++++++++++++++++++- __tests__/fixtures/cuda/selective_scan.cu | 39 ++++++ src/extraction/grammars.ts | 15 ++- src/extraction/languages/index.ts | 1 + src/resolution/import-resolver.ts | 9 +- src/resolution/index.ts | 2 +- src/resolution/name-matcher.ts | 2 +- src/types.ts | 2 + 9 files changed, 216 insertions(+), 10 deletions(-) create mode 100644 __tests__/fixtures/cuda/selective_scan.cu diff --git a/CHANGELOG.md b/CHANGELOG.md index 9806578b3..87e30fe3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,13 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### New Features + +- CodeGraph now indexes CUDA (.cu / .cuh) and IEC 61131-3 ST (.scl / .st) files. CUDA reuses the C++ parser and shares its resolution paths — `#include` headers, function calls, struct definitions, and cross-language references between `.cu`, `.cpp`, and `.c`/`.h` files all resolve correctly. SCL files are tracked at the file-record level (no symbol extraction, matching the YAML and Twig conventions). + +### Fixes + +- SCL (.scl / .st) files were silently dropped during single-file indexing instead of being tracked as file records, because `isLanguageSupported` returned `false` for the language. They now behave consistently across both the batch and single-file indexing paths. ## [1.0.1] - 2026-06-13 diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index df825f529..068d9a093 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -10,7 +10,7 @@ import * as path from 'path'; import * as os from 'os'; import { CodeGraph } from '../src'; import { extractFromSource, scanDirectory, buildDefaultIgnore } from '../src/extraction'; -import { detectLanguage, isLanguageSupported, getSupportedLanguages, initGrammars, loadAllGrammars, isSourceFile } from '../src/extraction/grammars'; +import { detectLanguage, isLanguageSupported, getSupportedLanguages, isGrammarLoaded, isFileLevelOnlyLanguage, initGrammars, loadAllGrammars, isSourceFile } from '../src/extraction/grammars'; import { normalizePath } from '../src/utils'; beforeAll(async () => { @@ -7161,3 +7161,150 @@ GeomPoint <- ggproto("GeomPoint", Geom, }); }); }); + +// ============================================================================= +// SCL (IEC 61131-3 Structured Text) — file-level-only +// ============================================================================= + +describe('SCL (IEC 61131-3 ST)', () => { + describe('Language detection', () => { + it('should detect SCL files by extension', () => { + expect(detectLanguage('main.scl')).toBe('scl'); + expect(detectLanguage('control.st')).toBe('scl'); + expect(detectLanguage('src/plc/axis.scl')).toBe('scl'); + }); + + it('should report SCL as supported', () => { + expect(isLanguageSupported('scl')).toBe(true); + expect(getSupportedLanguages()).toContain('scl'); + }); + + it('should report SCL grammar as loaded (file-level-only, no WASM needed)', () => { + expect(isGrammarLoaded('scl')).toBe(true); + }); + + it('should recognize SCL as file-level-only language', () => { + expect(isFileLevelOnlyLanguage('scl')).toBe(true); + }); + + it('should track SCL source files (no errors for file-level-only)', () => { + const code = `FUNCTION_BLOCK Motor\nVAR\n speed : INT;\nEND_VAR\nspeed := 100;\nEND_FUNCTION_BLOCK`; + const result = extractFromSource('motor.scl', code); + expect(result.errors).toHaveLength(0); + }); + }); + + describe('File-level tracking', () => { + it('should index SCL files as file records with zero symbol nodes', async () => { + const tempDir = createTempDir(); + try { + fs.writeFileSync(path.join(tempDir, 'motor.scl'), 'FUNCTION_BLOCK Motor\nVAR speed : INT; END_VAR\nEND_FUNCTION_BLOCK\n'); + fs.writeFileSync(path.join(tempDir, 'conveyor.st'), 'PROGRAM Conveyor\nVAR state : BOOL; END_VAR\nEND_PROGRAM\n'); + + const cg = CodeGraph.initSync(tempDir); + const result = await cg.indexAll(); + + expect(result.success).toBe(true); + expect(result.filesIndexed).toBe(2); + + const files = cg.getFiles(); + expect(files.length).toBe(2); + const pathsAndLangs = files.map((f: any) => `${f.path}:${f.language}`).sort(); + // Normalize path separators for cross-platform + const normalized = pathsAndLangs.map((p: string) => p.replace(/\\/g, '/')); + expect(normalized).toEqual(['conveyor.st:scl', 'motor.scl:scl']); + + // SCL files produce zero symbol nodes + const symbols = cg.getNodesInFile('motor.scl'); + expect(symbols).toHaveLength(0); + + cg.close(); + } finally { + cleanupTempDir(tempDir); + } + }); + }); +}); + +// ============================================================================= +// CUDA (C++ dialect, reuses tree-sitter-cpp.wasm) +// ============================================================================= + +describe('CUDA', () => { + describe('Language detection', () => { + it('should detect CUDA files by extension', () => { + expect(detectLanguage('kernel.cu')).toBe('cuda'); + expect(detectLanguage('common.cuh')).toBe('cuda'); + expect(detectLanguage('cuda/kernel.cu')).toBe('cuda'); + }); + + it('should report CUDA as supported', () => { + expect(isLanguageSupported('cuda')).toBe(true); + expect(getSupportedLanguages()).toContain('cuda'); + }); + }); + + describe('Extraction', () => { + it('should extract functions from a CUDA kernel file (fixture)', () => { + const fixturePath = path.join(__dirname, 'fixtures', 'cuda', 'selective_scan.cu'); + const code = fs.readFileSync(fixturePath, 'utf-8'); + const result = extractFromSource('selective_scan.cu', code); + + // File node should be present with correct language + const fileNode = result.nodes.find((n) => n.kind === 'file'); + expect(fileNode).toBeDefined(); + expect(fileNode?.language).toBe('cuda'); + + // Should extract the __global__ kernel function + const kernelFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'selective_scan_kernel'); + expect(kernelFn).toBeDefined(); + expect(kernelFn?.language).toBe('cuda'); + + // Should extract the __host__ launch function + const hostFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'launch_selective_scan'); + expect(hostFn).toBeDefined(); + expect(hostFn?.language).toBe('cuda'); + }); + + it('should extract #include as import nodes', () => { + const code = '#include \n__global__ void kernel() {}\n'; + const result = extractFromSource('kernel.cu', code); + + const importNode = result.nodes.find((n) => n.kind === 'import'); + expect(importNode).toBeDefined(); + expect(importNode?.name).toBe('cuda_runtime.h'); + + const importRef = result.unresolvedReferences.find( + (r) => r.referenceKind === 'imports' && r.referenceName === 'cuda_runtime.h' + ); + expect(importRef).toBeDefined(); + }); + + it('should extract function calls within CUDA code', () => { + const code = ` +__global__ void kernel(float* data, int n) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < n) data[idx] = idx; +} + +void launch() { + // NOTE: tree-sitter-cpp misparses the triple-angle-bracket kernel launch + // syntax <<>> as nested shift operators, so a plain + // function call is used for testing instead of the launch expression. + cudaDeviceSynchronize(); +} +`; + const result = extractFromSource('simple.cu', code); + const calls = result.unresolvedReferences.filter((r) => r.referenceKind === 'calls'); + expect(calls.some((c) => c.referenceName === 'cudaDeviceSynchronize')).toBe(true); + }); + + it('should extract struct definitions from CUDA headers', () => { + const code = 'struct GPUKernelParams { int threads; int blocks; float shared_mem; };\n'; + const result = extractFromSource('params.cuh', code); + const structNode = result.nodes.find((n) => n.kind === 'struct' && n.name === 'GPUKernelParams'); + expect(structNode).toBeDefined(); + expect(structNode?.language).toBe('cuda'); + }); + }); +}); diff --git a/__tests__/fixtures/cuda/selective_scan.cu b/__tests__/fixtures/cuda/selective_scan.cu new file mode 100644 index 000000000..732dd90bf --- /dev/null +++ b/__tests__/fixtures/cuda/selective_scan.cu @@ -0,0 +1,39 @@ +// CUDA kernel for selective scan +#include + +#define BLOCK_SIZE 256 + +extern "C" __global__ void selective_scan_kernel( + const float* __restrict__ u, + const float* __restrict__ delta, + const float* __restrict__ A, + const float* __restrict__ B, + const float* __restrict__ C, + float* __restrict__ out, + float* __restrict__ h_last, + int batch, int dim, int dstate, int seqlen +) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= batch * dim) return; + // selective scan computation + int b = idx / dim; + int d = idx % dim; + for (int t = 0; t < seqlen; t++) { + float delta_t = delta[t * batch * dim + b * dim + d]; + // ... scan logic + } +} + +__host__ void launch_selective_scan( + const float* u, const float* delta, + const float* A, const float* B, const float* C, + float* out, float* h_last, + int batch, int dim, int dstate, int seqlen +) { + int threads = BLOCK_SIZE; + int blocks = (batch * dim + threads - 1) / threads; + selective_scan_kernel<<>>( + u, delta, A, B, C, out, h_last, + batch, dim, dstate, seqlen + ); +} diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index ef6307a92..0444d70fd 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -10,7 +10,7 @@ import * as path from 'path'; import { Parser, Language as WasmLanguage } from 'web-tree-sitter'; import { Language } from '../types'; -export type GrammarLanguage = Exclude; +export type GrammarLanguage = Exclude; /** * WASM filename map — maps each language to its .wasm grammar file @@ -37,6 +37,7 @@ const WASM_GRAMMAR_FILES: Record = { scala: 'tree-sitter-scala.wasm', lua: 'tree-sitter-lua.wasm', r: 'tree-sitter-r.wasm', + cuda: 'tree-sitter-cpp.wasm', luau: 'tree-sitter-luau.wasm', objc: 'tree-sitter-objc.wasm', }; @@ -69,6 +70,10 @@ export const EXTENSION_MAP: Record = { '.cxx': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp', + '.cu': 'cuda', + '.cuh': 'cuda', + '.scl': 'scl', + '.st': 'scl', '.cs': 'csharp', // ASP.NET Razor / Blazor markup — custom RazorExtractor (links @model/@inject/ // component tags to their C# types; markup isn't a tree-sitter grammar). @@ -318,6 +323,7 @@ export function isLanguageSupported(language: Language): boolean { if (language === 'twig') return true; // file-level tracking only if (language === 'xml') return true; // MyBatis mapper extractor if (language === 'properties') return true; // Spring config keys + if (language === 'scl') return true; // file-level tracking only (IEC 61131-3 ST) if (language === 'unknown') return false; return language in WASM_GRAMMAR_FILES; } @@ -329,6 +335,7 @@ export function isGrammarLoaded(language: Language): boolean { if (language === 'svelte' || language === 'vue' || language === 'astro' || language === 'liquid' || language === 'razor') return true; if (language === 'yaml' || language === 'twig') return true; // no WASM grammar needed if (language === 'xml' || language === 'properties') return true; // no WASM grammar needed + if (language === 'scl') return true; // no WASM grammar needed (file-level-only) return languageCache.has(language); } @@ -342,14 +349,14 @@ export function isGrammarLoaded(language: Language): boolean { * indexed rather than skipped, so it must stay in sync with that branch. */ export function isFileLevelOnlyLanguage(language: Language): boolean { - return language === 'yaml' || language === 'twig' || language === 'properties'; + return language === 'yaml' || language === 'twig' || language === 'properties' || language === 'scl'; } /** * Get all supported languages (those with grammar definitions). */ export function getSupportedLanguages(): Language[] { - return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'vue', 'astro', 'liquid']; + return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'vue', 'astro', 'liquid', 'scl']; } /** @@ -420,8 +427,10 @@ export function getLanguageDisplayName(language: Language): string { liquid: 'Liquid', pascal: 'Pascal / Delphi', scala: 'Scala', + scl: 'IEC 61131-3 ST', lua: 'Lua', luau: 'Luau', + cuda: 'CUDA', objc: 'Objective-C', yaml: 'YAML', twig: 'Twig', diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts index 9d4a949a5..da3a80e45 100644 --- a/src/extraction/languages/index.ts +++ b/src/extraction/languages/index.ts @@ -39,6 +39,7 @@ export const EXTRACTORS: Partial> = { java: javaExtractor, c: cExtractor, cpp: cppExtractor, + cuda: cppExtractor, csharp: csharpExtractor, php: phpExtractor, ruby: rubyExtractor, diff --git a/src/resolution/import-resolver.ts b/src/resolution/import-resolver.ts index badbe4b02..63888594e 100644 --- a/src/resolution/import-resolver.ts +++ b/src/resolution/import-resolver.ts @@ -31,6 +31,7 @@ const EXTENSION_RESOLUTION: Record = { java: ['.java'], c: ['.h', '.c'], cpp: ['.h', '.hpp', '.hxx', '.cpp', '.cc', '.cxx'], + cuda: ['.h', '.cuh', '.hpp', '.hxx', '.cpp', '.cc', '.cxx', '.cu'], csharp: ['.cs'], php: ['.php'], ruby: ['.rb'], @@ -69,7 +70,7 @@ export function resolveImportPath( // C/C++ include directory search: when neither relative nor aliased // resolution found a match, search -I directories from // compile_commands.json or heuristic probing. - if (language === 'c' || language === 'cpp') { + if (language === 'c' || language === 'cpp' || language === 'cuda') { return resolveCppIncludePath(importPath, language, context); } @@ -189,7 +190,7 @@ function isExternalImport( return true; } - if (language === 'c' || language === 'cpp') { + if (language === 'c' || language === 'cpp' || language === 'cuda') { // C/C++ standard library headers — both C-style () and // C++-style (, ) forms. Checked against the import // path (which the extractor strips of <> or "" delimiters). @@ -602,7 +603,7 @@ export function extractImportMappings( mappings.push(...extractJavaImports(content)); } else if (language === 'php') { mappings.push(...extractPHPImports(content)); - } else if (language === 'c' || language === 'cpp') { + } else if (language === 'c' || language === 'cpp' || language === 'cuda') { mappings.push(...extractCppImports(content)); } @@ -1131,7 +1132,7 @@ export function resolveViaImport( // include-dir scan path inside resolveImportPath never produces an // edge — resolveViaImport's symbol lookup below would search the // resolved file for a symbol named like the file extension and fail. - if ((ref.language === 'c' || ref.language === 'cpp') && ref.referenceKind === 'imports') { + if ((ref.language === 'c' || ref.language === 'cpp' || ref.language === 'cuda') && ref.referenceKind === 'imports') { // C/C++ quoted includes (`#include "X.h"`) resolve relative to the // INCLUDING file's own directory first (the C standard's quoted-include // search order). Prefer a same-directory header over an -I directory or a diff --git a/src/resolution/index.ts b/src/resolution/index.ts index 0d7ec4309..cb9e1cd3c 100644 --- a/src/resolution/index.ts +++ b/src/resolution/index.ts @@ -1108,7 +1108,7 @@ export class ReferenceResolver { // those resolutions makes the graph wrong, not cleaner. We only filter // when there's no user node with this name — then name-matching would // produce zero edges anyway and the filter just short-circuits work. - if (ref.language === 'c' || ref.language === 'cpp') { + if (ref.language === 'c' || ref.language === 'cpp' || ref.language === 'cuda') { // C++ std:: namespace prefix — safe to filter unconditionally, // since `std::foo` is never a user-defined qualified name in // tree-sitter output. diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts index 9990d690d..fd8c594ee 100644 --- a/src/resolution/name-matcher.ts +++ b/src/resolution/name-matcher.ts @@ -114,7 +114,7 @@ const LANGUAGE_FAMILY: Record = { java: 'jvm', kotlin: 'jvm', scala: 'jvm', swift: 'apple', objc: 'apple', typescript: 'web', tsx: 'web', javascript: 'web', jsx: 'web', - c: 'c', cpp: 'c', + c: 'c', cpp: 'c', cuda: 'c', // Razor/Blazor markup names C# types — same family so `@model Foo` / // `` resolve to their `.cs` class through the cross-family gate. csharp: 'dotnet', razor: 'dotnet', diff --git a/src/types.ts b/src/types.ts index 656bb1090..7b53179c3 100644 --- a/src/types.ts +++ b/src/types.ts @@ -74,6 +74,8 @@ export const LANGUAGES = [ 'java', 'c', 'cpp', + 'cuda', + 'scl', 'csharp', 'razor', 'php', From 737aaf83da412b42a071e783350673ed5f922179 Mon Sep 17 00:00:00 2001 From: zouyuanqing Date: Sun, 14 Jun 2026 17:15:07 +0800 Subject: [PATCH 2/2] fix: add cuda to C-family resolution branches missed in initial impl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - tree-sitter.ts: chained call parsing (obj.method() via -> / .) - callback-synthesizer.ts: C++ override edge synthesis (2 places) - name-matcher.ts: bareFnOnly, dotMatch member resolution, C++ call chain resolution (Foo::instance().bar()) These 5 branches only gate on cpp/c — CUDA files would silently miss callback synthesis, bare kernel-call resolution, and method-call resolution on these paths. --- src/extraction/tree-sitter.ts | 1 + src/resolution/callback-synthesizer.ts | 4 ++-- src/resolution/name-matcher.ts | 8 ++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index bce7eee07..fc95bfda8 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -2788,6 +2788,7 @@ export class TreeSitterExtractor { } else if ( (this.language === 'cpp' || this.language === 'c' || + this.language === 'cuda' || this.language === 'kotlin' || this.language === 'swift' || this.language === 'rust' || diff --git a/src/resolution/callback-synthesizer.ts b/src/resolution/callback-synthesizer.ts index ad3f61213..82e4fa3bc 100644 --- a/src/resolution/callback-synthesizer.ts +++ b/src/resolution/callback-synthesizer.ts @@ -424,11 +424,11 @@ function cppOverrideEdges(queries: QueryBuilder): Edge[] { .map((e) => queries.getNodeById(e.target)) .filter((n): n is Node => !!n && n.kind === 'method'); for (const cls of queries.getNodesByKind('class')) { - const subMethods = methodsOf(cls.id).filter((n) => n.language === 'cpp'); + const subMethods = methodsOf(cls.id).filter((n) => n.language === 'cpp' || n.language === 'cuda'); if (subMethods.length === 0) continue; for (const ext of queries.getOutgoingEdges(cls.id, ['extends'])) { const base = queries.getNodeById(ext.target); - if (!base || base.language !== 'cpp' || base.id === cls.id) continue; + if (!base || (base.language !== 'cpp' && base.language !== 'cuda') || base.id === cls.id) continue; const baseMethods = new Map(methodsOf(base.id).map((m) => [m.name, m])); let added = 0; for (const m of subMethods) { diff --git a/src/resolution/name-matcher.ts b/src/resolution/name-matcher.ts index fd8c594ee..519eec4ed 100644 --- a/src/resolution/name-matcher.ts +++ b/src/resolution/name-matcher.ts @@ -199,8 +199,8 @@ export function matchFunctionRef( const bareFnOnly = ref.language === 'typescript' || ref.language === 'tsx' || ref.language === 'javascript' || ref.language === 'jsx' || - ref.language === 'cpp' || ref.language === 'python' || - ref.language === 'php'; + ref.language === 'cpp' || ref.language === 'cuda' || + ref.language === 'python' || ref.language === 'php'; // Qualified member-pointer (`&Widget::on_click` → "Widget::on_click"): // resolve the member ON THAT SCOPE — exempt from bareFnOnly (the `&Cls::m` @@ -950,7 +950,7 @@ export function matchMethodCall( const [, objectOrClass, methodName] = match; - if (ref.language === 'cpp' && dotMatch) { + if ((ref.language === 'cpp' || ref.language === 'cuda') && dotMatch) { const inferredType = inferCppReceiverType(objectOrClass!, ref, context); if (inferredType) { const typedMatch = resolveMethodOnType( @@ -1288,7 +1288,7 @@ export function matchReference( // 1b. C++ chained call whose receiver is another call — `Foo::instance().bar()` // encoded as `Foo::instance().bar` by the extractor (#645). Resolve the // receiver's type from what the inner call returns, then the method on it. - if (ref.language === 'cpp' || ref.language === 'c') { + if (ref.language === 'cpp' || ref.language === 'c' || ref.language === 'cuda') { result = matchCppCallChain(ref, context); if (result) return result; }