Skip to content

Commit 2b9d115

Browse files
re-taroclaudeoverlookmotel
authored
perf(linter/plugins): use SoA pattern with 2 arrays for CFG steps (#18528)
Part 2 of #17232, continuing after #18527 - CFG walker optimization. - Replace single `steps` array with 2 SoA (Struct of Arrays): - `stepTypeIds`: encoded type IDs - `stepData`: node or args array - Encode step types using type ID offset: - Enter visits: typeId directly (0-164 for node types) - CFG events: typeId directly (165-171 for event types) - Exit visits: typeId + 256 (offset to distinguish from enter) - Pre-compute type IDs during step preparation phase Benefits: - Reduces object creation overhead (no step objects needed). - Halves the number of `NODE_TYPE_IDS_MAP` hash map lookups to convert node types (strings) to type IDs. --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: overlookmotel <theoverlookmotel@gmail.com>
1 parent b6a8c1c commit 2b9d115

File tree

1 file changed

+97
-92
lines changed
  • apps/oxlint/src-js/plugins

1 file changed

+97
-92
lines changed

apps/oxlint/src-js/plugins/cfg.ts

Lines changed: 97 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -10,53 +10,52 @@ import CodePathAnalyzer from "../../node_modules/eslint/lib/linter/code-path-ana
1010
import Traverser from "../../node_modules/eslint/lib/shared/traverser.js";
1111

1212
import visitorKeys from "../generated/keys.ts";
13-
import { LEAF_NODE_TYPES_COUNT, NODE_TYPE_IDS_MAP } from "../generated/type_ids.ts";
13+
import {
14+
LEAF_NODE_TYPES_COUNT,
15+
NODE_TYPE_IDS_MAP,
16+
NODE_TYPES_COUNT,
17+
TYPE_IDS_COUNT,
18+
} from "../generated/type_ids.ts";
1419
import { ancestors } from "../generated/walk.js";
1520
import { debugAssert, typeAssertIs } from "../utils/asserts.ts";
1621

1722
import type { EnterExit, VisitFn } from "./visitor.ts";
1823
import type { Node, Program } from "../generated/types.d.ts";
1924
import type { CompiledVisitors } from "../generated/walk.js";
2025

21-
// Step type constants.
22-
// Equivalent to an enum, but minifies better.
23-
const STEP_TYPE_ENTER = 0;
24-
const STEP_TYPE_EXIT = 1;
25-
const STEP_TYPE_CALL = 2;
26-
2726
/**
28-
* Step to walk AST.
27+
* Offset added to type IDs for exit visits to distinguish them from enter visits.
28+
* Using 256 as it's a power of 2 and larger than the maximum type ID (171).
29+
*
30+
* Type ID encoding:
31+
* - Enter visit (nodes): 0 to NODE_TYPES_COUNT - 1 (0-164)
32+
* - Call method (CFG events): NODE_TYPES_COUNT to TYPE_IDS_COUNT - 1 (165-171)
33+
* - Exit visit (non-leaf nodes): Node type ID + EXIT_TYPE_ID_OFFSET (256+)
2934
*/
30-
type Step = EnterStep | ExitStep | CallStep;
35+
const EXIT_TYPE_ID_OFFSET = 256;
3136

32-
/**
33-
* Step for entering AST node.
34-
*/
35-
interface EnterStep {
36-
type: typeof STEP_TYPE_ENTER;
37-
target: Node;
38-
}
37+
debugAssert(
38+
EXIT_TYPE_ID_OFFSET >= TYPE_IDS_COUNT,
39+
"`EXIT_TYPE_ID_OFFSET` must be >= `TYPE_IDS_COUNT`",
40+
);
41+
42+
// Struct of Arrays (SoA) pattern for step storage.
43+
// Using 2 arrays instead of an array of objects reduces object creation.
3944

4045
/**
41-
* Step for exiting AST node.
46+
* Encoded type IDs for each step.
47+
* - For enter visits: Node type ID (0-164)
48+
* - For CFG events: Event type ID (165-171)
49+
* - For exit visits: Node type ID + `EXIT_TYPE_ID_OFFSET` (256+)
4250
*/
43-
interface ExitStep {
44-
type: typeof STEP_TYPE_EXIT;
45-
target: Node;
46-
}
51+
const stepTypeIds: number[] = [];
4752

4853
/**
49-
* Step for calling a CFG event handler.
54+
* Step data for each step.
55+
* - For visit steps (enter/exit): AST node
56+
* - For call steps (CFG events): Array of arguments to call CFG event handler with
5057
*/
51-
interface CallStep {
52-
type: typeof STEP_TYPE_CALL;
53-
eventName: string;
54-
args: unknown[];
55-
}
56-
57-
// Array of steps to walk AST.
58-
// Singleton array which is re-used for each walk, and emptied after each walk.
59-
const steps: Step[] = [];
58+
const stepData: (Node | unknown[])[] = [];
6059

6160
/**
6261
* Reset state for walking AST with CFG.
@@ -65,7 +64,8 @@ const steps: Step[] = [];
6564
* So it's only necessary to call this function if an error occurs during AST walking.
6665
*/
6766
export function resetCfgWalk(): void {
68-
steps.length = 0;
67+
stepTypeIds.length = 0;
68+
stepData.length = 0;
6969
}
7070

7171
/**
@@ -79,14 +79,15 @@ export function resetCfgWalk(): void {
7979
*
8080
* 1. First time to build the CFG graph.
8181
* In this first pass, it builds a list of steps to walk AST (including visiting nodes and CFG events).
82-
* This list is stored in `steps` array.
82+
* This list is stored in the SoA arrays (stepTypeIds, stepData).
8383
*
8484
* 2. Visit AST with provided visitor.
8585
* Run through the steps, in order, calling visit functions for each step.
8686
*
8787
* TODO: This is was originally copied from ESLint, and has been adapted for better performance.
88-
* But we could further improve its performance in many ways.
89-
* See TODO comments in the code below for some ideas for optimization.
88+
* We could further improve its performance by:
89+
* - Copy `CodePathAnalyzer` code into this repo and rewrite it to work entirely with type IDs instead of strings.
90+
* - Using a faster AST walker than ESLint's `Traverser`.
9091
*
9192
* @param ast - AST
9293
* @param visitors - Visitors array
@@ -96,17 +97,15 @@ export function walkProgramWithCfg(ast: Program, visitors: CompiledVisitors): vo
9697
prepareSteps(ast);
9798

9899
// Walk the AST
99-
const stepsLen = steps.length;
100-
debugAssert(stepsLen > 0, "`steps` should not be empty");
100+
const stepsLen = stepTypeIds.length;
101+
debugAssert(stepsLen > 0, "`stepTypeIds` should not be empty");
101102

102103
for (let i = 0; i < stepsLen; i++) {
103-
const step = steps[i];
104-
const stepType = step.type;
104+
let typeId = stepTypeIds[i];
105105

106-
if (stepType === STEP_TYPE_ENTER) {
107-
// Enter node - can be leaf or non-leaf node
108-
const node = step.target;
109-
const typeId = NODE_TYPE_IDS_MAP.get(node.type)!;
106+
if (typeId < NODE_TYPES_COUNT) {
107+
// Enter node. `typeId` is node type ID.
108+
const node = stepData[i] as Node;
110109
const visit = visitors[typeId];
111110

112111
if (typeId < LEAF_NODE_TYPES_COUNT) {
@@ -115,7 +114,7 @@ export function walkProgramWithCfg(ast: Program, visitors: CompiledVisitors): vo
115114
typeAssertIs<VisitFn>(visit);
116115
visit(node);
117116
}
118-
// Don't add node to `ancestors`, because we don't visit them on exit
117+
// Don't add node to `ancestors`, because we don't visit leaf nodes on exit
119118
} else {
120119
// Non-leaf node
121120
if (visit !== null) {
@@ -126,81 +125,75 @@ export function walkProgramWithCfg(ast: Program, visitors: CompiledVisitors): vo
126125

127126
ancestors.unshift(node);
128127
}
129-
} else if (stepType === STEP_TYPE_EXIT) {
130-
// Exit non-leaf node
131-
const node = step.target;
128+
} else if (typeId >= EXIT_TYPE_ID_OFFSET) {
129+
// Exit non-leaf node. `typeId` is node type ID + `EXIT_TYPE_ID_OFFSET`.
130+
typeId -= EXIT_TYPE_ID_OFFSET;
131+
const node = stepData[i] as Node;
132+
132133
ancestors.shift();
133134

134-
const typeId = NODE_TYPE_IDS_MAP.get(node.type)!;
135135
const enterExit = visitors[typeId];
136136
if (enterExit !== null) {
137137
typeAssertIs<EnterExit>(enterExit);
138138
const { exit } = enterExit;
139139
if (exit !== null) exit(node);
140140
}
141141
} else {
142-
// Call method (CFG event)
143-
const eventId = NODE_TYPE_IDS_MAP.get(step.eventName)!;
144-
const visit = visitors[eventId];
142+
// Call method (CFG event). `typeId` is event type ID.
143+
debugAssert(Array.isArray(stepData[i]), "`stepData` should contain an array for CFG events");
144+
145+
const visit = visitors[typeId];
145146
if (visit !== null) {
146-
(visit as any).apply(undefined, step.args);
147+
(visit as any).apply(undefined, stepData[i]);
147148
}
148149
}
149150
}
150151

151-
// Reset `steps` array
152-
steps.length = 0;
152+
// Reset SoA arrays
153+
stepTypeIds.length = 0;
154+
stepData.length = 0;
153155
}
154156

155157
/**
156-
* Walk AST and put a list of all steps to walk AST into `steps` array.
158+
* Walk AST and put a list of all steps to walk AST into the SoA arrays.
157159
* @param ast - AST
158160
*/
159161
function prepareSteps(ast: Program) {
160-
debugAssert(steps.length === 0, "`steps` should be empty at start of `prepareSteps`");
162+
debugAssert(stepTypeIds.length === 0, "`stepTypeIds` should be empty at start of `prepareSteps`");
163+
debugAssert(stepData.length === 0, "`stepData` should be empty at start of `prepareSteps`");
161164

162-
// Length of `steps` array after entering each node.
165+
// Length of arrays after entering each node.
163166
// Used in debug build to check that no leaf nodes emit CFG events (see below).
164167
// Minifier removes this var in release build.
165168
let stepsLenAfterEnter = 0;
166169

167170
// Create `CodePathAnalyzer`.
168-
// It stores steps to walk AST.
169-
//
170-
// We could improve performance in several ways (in ascending order of complexity):
171+
// It stores steps to walk AST using the SoA (Struct of Arrays) pattern.
171172
//
172-
// * Reduce object creation by storing steps as 2 arrays (struct of arrays pattern):
173-
// * Array 1: Step type (number).
174-
// * Array 2: Step data - AST node object for enter/exit node steps, args for CFG events.
175-
// * Alternatively, use a single array containing step objects as now, but recycle the objects
176-
// (SoA option is probably better).
177-
// * Avoid repeated conversions from `type` (string) to `typeId` (number) when iterating through steps.
178-
// * Generate separate `enterNode` / `exitNode` functions for each node type.
179-
// * Set them on `analyzer.original` before calling `analyzer.enterNode` / `analyzer.exitNode`.
180-
// * These functions would know the type ID of the node already, and then could store type ID in steps.
181-
// * When iterating through steps, use that type ID instead of converting `node.type` to `typeId` every time.
182-
// * Copy `CodePathAnalyzer` code into this repo and rewrite it to work entirely with type IDs instead of strings.
173+
// Type ID encoding:
174+
// - Enter visits: Node type ID directly (0-164 for node types)
175+
// - CFG events: Node type ID directly (165-171 for event types)
176+
// - Exit visits: Event type ID + `EXIT_TYPE_ID_OFFSET` (256+)
183177
//
184-
// TODO: Apply these optimizations (or at least some of them).
178+
// This allows us to:
179+
// 1. Avoid repeated `NODE_TYPE_IDS_MAP` hash map lookups during step execution.
180+
// 2. Reduce object creation by using 2 flat arrays instead of step objects.
185181
const analyzer = new CodePathAnalyzer({
186182
enterNode(node: Node) {
187-
steps.push({
188-
type: STEP_TYPE_ENTER,
189-
target: node,
190-
});
183+
const typeId = NODE_TYPE_IDS_MAP.get(node.type)!;
184+
stepTypeIds.push(typeId);
185+
stepData.push(node);
191186

192-
if (DEBUG) stepsLenAfterEnter = steps.length;
187+
if (DEBUG) stepsLenAfterEnter = stepTypeIds.length;
193188
},
194189

195190
leaveNode(node: Node) {
196191
const typeId = NODE_TYPE_IDS_MAP.get(node.type)!;
197192

198193
if (typeId >= LEAF_NODE_TYPES_COUNT) {
199-
// Non-leaf node
200-
steps.push({
201-
type: STEP_TYPE_EXIT,
202-
target: node,
203-
});
194+
// Non-leaf node - add exit step with offset
195+
stepTypeIds.push(typeId + EXIT_TYPE_ID_OFFSET);
196+
stepData.push(node);
204197
} else {
205198
// Leaf node.
206199
// Don't add a step.
@@ -212,11 +205,20 @@ function prepareSteps(ast: Program) {
212205
// But if CFG events were emitted between entering node and exiting node, then the order the rule's
213206
// visit functions are called in would be wrong.
214207
// `exit` visit fn would be called before the CFG event handlers, instead of after.
215-
if (DEBUG && steps.length !== stepsLenAfterEnter) {
216-
const eventNames = steps.slice(stepsLenAfterEnter).map((step) => {
217-
if (step.type === STEP_TYPE_CALL) return step.eventName;
218-
return `${step.type === STEP_TYPE_ENTER ? "enter" : "exit"} ${node.type}`;
219-
});
208+
if (DEBUG && stepTypeIds.length !== stepsLenAfterEnter) {
209+
const eventNames: string[] = [];
210+
for (let i = stepsLenAfterEnter; i < stepTypeIds.length; i++) {
211+
const typeId = stepTypeIds[i];
212+
if (typeId < NODE_TYPES_COUNT) {
213+
eventNames.push(`enter ${NODE_TYPE_IDS_MAP.get((stepData[i] as Node).type)}`);
214+
} else if (typeId >= EXIT_TYPE_ID_OFFSET) {
215+
eventNames.push(`exit ${NODE_TYPE_IDS_MAP.get((stepData[i] as Node).type)}`);
216+
} else {
217+
const eventName = NODE_TYPE_IDS_MAP.entries().find(([, id]) => id === typeId)![0];
218+
eventNames.push(eventName);
219+
}
220+
}
221+
220222
throw new Error(
221223
`CFG events emitted during visiting leaf node \`${node.type}\`: ${eventNames.join(", ")}`,
222224
);
@@ -225,11 +227,9 @@ function prepareSteps(ast: Program) {
225227
},
226228

227229
emit(eventName: string, args: unknown[]) {
228-
steps.push({
229-
type: STEP_TYPE_CALL,
230-
eventName,
231-
args,
232-
});
230+
const typeId = NODE_TYPE_IDS_MAP.get(eventName)!;
231+
stepTypeIds.push(typeId);
232+
stepData.push(args);
233233
},
234234
});
235235

@@ -247,4 +247,9 @@ function prepareSteps(ast: Program) {
247247
},
248248
visitorKeys,
249249
});
250+
251+
debugAssert(
252+
stepTypeIds.length === stepData.length,
253+
"`stepTypeIds` and `stepData` should have the same length",
254+
);
250255
}

0 commit comments

Comments
 (0)