Move maxTokens check to lexer

benjie · benjie · commit 3645eddb69ef · 2026-04-16T16:54:24.000+01:00
diff --git a/src/index.ts b/src/index.ts
@@ -242,6 +242,7 @@ export {
 
 export type {
   ParseOptions,
+  LexerOptions,
   SourceLocation,
   TokenKindEnum,
   KindEnum,
diff --git a/src/language/__tests__/parser-test.ts b/src/language/__tests__/parser-test.ts
@@ -106,6 +106,13 @@ describe('Parser', () => {
     expect(() => parse('{ foo(bar: "baz") }', { maxTokens: 7 })).to.throw(
       'Syntax Error: Document contains more that 7 tokens. Parsing aborted.',
     );
+
+    expect(() =>
+      parse('#\n{\n#\na\n#\na\n#\n}\n#', { maxTokens: 9 }),
+    ).to.not.throw();
+    expect(() => parse('#\n{\n#\na\n#\na\n#\n}\n#', { maxTokens: 8 })).to.throw(
+      'Syntax Error: Document contains more that 8 tokens. Parsing aborted.',
+    );
   });
 
   it('parses variable inline values', () => {
diff --git a/src/language/index.ts b/src/language/index.ts
@@ -12,6 +12,7 @@ export { TokenKind } from './tokenKind';
 export type { TokenKindEnum } from './tokenKind';
 
 export { Lexer } from './lexer';
+export type { LexerOptions } from './lexer';
 
 export {
   parse,
diff --git a/src/language/lexer.ts b/src/language/lexer.ts
@@ -6,14 +6,31 @@ import { isDigit, isNameContinue, isNameStart } from './characterClasses';
 import type { Source } from './source';
 import { TokenKind } from './tokenKind';
 
+/**
+ * Configuration options to control lexer behavior
+ */
+export interface LexerOptions {
+  /**
+   * Parser CPU and memory usage is linear to the number of tokens in a document
+   * however in extreme cases it becomes quadratic due to memory exhaustion.
+   * Parsing happens before validation so even invalid queries can burn lots of
+   * CPU time and memory.
+   * To prevent this you can set a maximum number of tokens allowed within a document.
+   */
+  maxTokens?: number | undefined;
+}
+
 /**
  * A Lexer interface which provides common properties and methods required for
  * lexing GraphQL source.
  *
  * @internal
  */
 export interface LexerInterface {
-  source: Source;
+  readonly _options: Readonly<LexerOptions>;
+  _tokenCounter: number;
+  readonly source: Source;
+  tokenCount: number;
   lastToken: Token;
   token: Token;
   line: number;
@@ -31,6 +48,11 @@ export interface LexerInterface {
  * whenever called.
  */
 export class Lexer implements LexerInterface {
+  /** @internal */
+  readonly _options: Readonly<LexerOptions>;
+  /** @internal */
+  _tokenCounter: number;
+
   source: Source;
 
   /**
@@ -53,9 +75,11 @@ export class Lexer implements LexerInterface {
    */
   lineStart: number;
 
-  constructor(source: Source) {
+  constructor(source: Source, options: LexerOptions = {}) {
     const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0);
 
+    this._options = options;
+    this._tokenCounter = 0;
     this.source = source;
     this.lastToken = startOfFileToken;
     this.token = startOfFileToken;
@@ -67,6 +91,10 @@ export class Lexer implements LexerInterface {
     return 'Lexer';
   }
 
+  get tokenCount(): number {
+    return this._tokenCounter;
+  }
+
   /**
    * Advances the token stream to the next non-ignored token.
    */
@@ -200,8 +228,19 @@ export function createToken(
   end: number,
   value?: string,
 ): Token {
+  const { maxTokens } = lexer._options;
   const line = lexer.line;
   const col = 1 + start - lexer.lineStart;
+  if (kind !== TokenKind.EOF) {
+    ++lexer._tokenCounter;
+    if (maxTokens !== undefined && lexer._tokenCounter > maxTokens) {
+      throw syntaxError(
+        lexer.source,
+        start,
+        `Document contains more that ${maxTokens} tokens. Parsing aborted.`,
+      );
+    }
+  }
   return new Token(kind, start, end, line, col, value);
 }
 
diff --git a/src/language/parser.ts b/src/language/parser.ts
@@ -111,10 +111,17 @@ export interface ParseOptions {
    * ```
    */
   allowLegacyFragmentVariables?: boolean;
+}
 
+/**
+ * @internal
+ */
+export interface ParseOptionsInternal extends ParseOptions {
   /**
    * You may override the Lexer class used to lex the source; this is used by
    * schema coordinates to introduce a lexer with a restricted syntax.
+   *
+   * Cannot be set if `maxTokens` is set.
    */
   lexer?: LexerInterface | undefined;
 }
@@ -204,10 +211,15 @@ export function parseType(
  */
 export function parseSchemaCoordinate(
   source: string | Source,
+  options?: ParseOptions | undefined,
 ): SchemaCoordinateNode {
   const sourceObj = isSource(source) ? source : new Source(source);
-  const lexer = new SchemaCoordinateLexer(sourceObj);
-  const parser = new Parser(source, { lexer });
+  const lexer = new SchemaCoordinateLexer(sourceObj, options);
+  const parser = new Parser(source, {
+    ...options,
+    maxTokens: undefined, // Handled by SchemaCoordinateLexer
+    lexer,
+  });
   parser.expectToken(TokenKind.SOF);
   const coordinate = parser.parseSchemaCoordinate();
   parser.expectToken(TokenKind.EOF);
@@ -226,26 +238,30 @@ export function parseSchemaCoordinate(
  * @internal
  */
 export class Parser {
-  protected _options: Omit<ParseOptions, 'lexer'>;
+  protected _options: ParseOptions;
   protected _lexer: LexerInterface;
-  protected _tokenCounter: number;
 
-  constructor(source: string | Source, options: ParseOptions = {}) {
+  constructor(source: string | Source, options: ParseOptionsInternal = {}) {
     const { lexer, ..._options } = options;
 
     if (lexer) {
+      if (options.maxTokens != null) {
+        throw new Error(
+          'Setting maxTokens has no effect when a custom lexer is passed',
+        );
+      }
       this._lexer = lexer;
     } else {
       const sourceObj = isSource(source) ? source : new Source(source);
-      this._lexer = new Lexer(sourceObj);
+      const { maxTokens } = options;
+      this._lexer = new Lexer(sourceObj, { maxTokens });
     }
 
     this._options = _options;
-    this._tokenCounter = 0;
   }
 
   get tokenCount(): number {
-    return this._tokenCounter;
+    return this._lexer.tokenCount;
   }
 
   /**
@@ -1690,19 +1706,7 @@ export class Parser {
   }
 
   advanceLexer(): void {
-    const { maxTokens } = this._options;
-    const token = this._lexer.advance();
-
-    if (token.kind !== TokenKind.EOF) {
-      ++this._tokenCounter;
-      if (maxTokens !== undefined && this._tokenCounter > maxTokens) {
-        throw syntaxError(
-          this._lexer.source,
-          token.start,
-          `Document contains more that ${maxTokens} tokens. Parsing aborted.`,
-        );
-      }
-    }
+    this._lexer.advance();
   }
 }
 
diff --git a/src/language/schemaCoordinateLexer.ts b/src/language/schemaCoordinateLexer.ts
@@ -2,7 +2,7 @@ import { syntaxError } from '../error/syntaxError';
 
 import { Token } from './ast';
 import { isNameStart } from './characterClasses';
-import type { LexerInterface } from './lexer';
+import type { LexerInterface, LexerOptions } from './lexer';
 import { createToken, printCodePointAt, readName } from './lexer';
 import type { Source } from './source';
 import { TokenKind } from './tokenKind';
@@ -16,6 +16,11 @@ import { TokenKind } from './tokenKind';
  * whenever called.
  */
 export class SchemaCoordinateLexer implements LexerInterface {
+  /** @internal */
+  public readonly _options: Readonly<LexerOptions>;
+  /** @internal */
+  public _tokenCounter: number;
+
   source: Source;
 
   /**
@@ -40,9 +45,11 @@ export class SchemaCoordinateLexer implements LexerInterface {
    */
   lineStart: 0 = 0 as const;
 
-  constructor(source: Source) {
+  constructor(source: Source, options: LexerOptions = {}) {
     const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0);
 
+    this._options = options;
+    this._tokenCounter = 0;
     this.source = source;
     this.lastToken = startOfFileToken;
     this.token = startOfFileToken;
@@ -52,6 +59,10 @@ export class SchemaCoordinateLexer implements LexerInterface {
     return 'SchemaCoordinateLexer';
   }
 
+  get tokenCount(): number {
+    return this._tokenCounter;
+  }
+
   /**
    * Advances the token stream to the next non-ignored token.
    */