import { createToken, Lexer, EmbeddedActionsParser } from 'chevrotain'; import { type QueryFilter, type QueryCondition, queryFilterSchema } from './query-parser.schemas.js'; // ----------------- Lexer ----------------- // Whitespace (skipped) const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED }); // Keywords (must be defined before Identifier to take precedence) const And = createToken({ name: 'And', pattern: /AND/i, longer_alt: undefined }); const Or = createToken({ name: 'Or', pattern: /OR/i, longer_alt: undefined }); const Like = createToken({ name: 'Like', pattern: /LIKE/i, longer_alt: undefined }); const Not = createToken({ name: 'Not', pattern: /NOT/i, longer_alt: undefined }); const In = createToken({ name: 'In', pattern: /IN/i, longer_alt: undefined }); const Is = createToken({ name: 'Is', pattern: /IS/i, longer_alt: undefined }); const Null = createToken({ name: 'Null', pattern: /NULL/i, longer_alt: undefined }); // Identifier (must come after keywords) const Identifier = createToken({ name: 'Identifier', pattern: /[a-zA-Z_][a-zA-Z0-9_]*/ }); // Set longer_alt for keywords to handle cases like "ANDROID" not matching "AND" And.LONGER_ALT = Identifier; Or.LONGER_ALT = Identifier; Like.LONGER_ALT = Identifier; Not.LONGER_ALT = Identifier; In.LONGER_ALT = Identifier; Is.LONGER_ALT = Identifier; Null.LONGER_ALT = Identifier; // Literals const StringLiteral = createToken({ name: 'StringLiteral', pattern: /'(?:''|[^'])*'/, }); const NumberLiteral = createToken({ name: 'NumberLiteral', pattern: /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/, }); // Operators const NotEquals = createToken({ name: 'NotEquals', pattern: /!=/ }); const GreaterThanOrEqual = createToken({ name: 'GreaterThanOrEqual', pattern: />=/ }); const LessThanOrEqual = createToken({ name: 'LessThanOrEqual', pattern: /<=/ }); const Equals = createToken({ name: 'Equals', pattern: /=/ }); const GreaterThan = createToken({ name: 'GreaterThan', pattern: />/ }); const LessThan = createToken({ name: 'LessThan', pattern: / { let left = this.SUBRULE(this.#andExpression); this.MANY(() => { this.CONSUME(Or); const right = this.SUBRULE2(this.#andExpression); left = this.ACTION(() => this.#combineWithOperator(left, right, 'or')); }); return left; }); // AND has higher precedence than OR #andExpression = this.RULE('andExpression', (): QueryFilter => { let left = this.SUBRULE(this.#primaryExpression); this.MANY(() => { this.CONSUME(And); const right = this.SUBRULE2(this.#primaryExpression); left = this.ACTION(() => this.#combineWithOperator(left, right, 'and')); }); return left; }); // Primary: parenthesized expression or condition #primaryExpression = this.RULE('primaryExpression', (): QueryFilter => { return this.OR([ { ALT: () => { this.CONSUME(LParen); const expr = this.SUBRULE(this.#orExpression); this.CONSUME(RParen); return expr; }, }, { ALT: () => this.SUBRULE(this.#condition) }, ]); }); // Condition: field followed by operator and value(s) #condition = this.RULE('condition', (): QueryCondition => { const field = this.SUBRULE(this.#fieldReference); return this.OR([ // IS NULL / IS NOT NULL { ALT: () => { this.CONSUME(Is); const isNot = this.OPTION(() => this.CONSUME(Not)) !== undefined; this.CONSUME(Null); return this.ACTION(() => ({ type: 'text' as const, field, conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null }, })); }, }, // NOT IN (strings) - LA(1)=NOT, LA(2)=IN, LA(3)=(, LA(4)=value { GATE: () => this.LA(4).tokenType === StringLiteral, ALT: () => { this.CONSUME2(Not); this.CONSUME(In); const values = this.SUBRULE(this.#stringInList); return this.ACTION(() => ({ type: 'text' as const, field, conditions: { notIn: values }, })); }, }, // NOT IN (numbers) { GATE: () => this.LA(4).tokenType === NumberLiteral, ALT: () => { this.CONSUME3(Not); this.CONSUME2(In); const values = this.SUBRULE(this.#numberInList); return this.ACTION(() => ({ type: 'number' as const, field, conditions: { notIn: values }, })); }, }, // NOT LIKE { ALT: () => { this.CONSUME4(Not); this.CONSUME(Like); const pattern = this.CONSUME(StringLiteral); return this.ACTION(() => ({ type: 'text' as const, field, conditions: { notLike: this.#extractStringValue(pattern.image) }, })); }, }, // IN (strings) - LA(1)=IN, LA(2)=(, LA(3)=value { GATE: () => this.LA(3).tokenType === StringLiteral, ALT: () => { this.CONSUME3(In); const values = this.SUBRULE2(this.#stringInList); return this.ACTION(() => ({ type: 'text' as const, field, conditions: { in: values }, })); }, }, // IN (numbers) { GATE: () => this.LA(3).tokenType === NumberLiteral, ALT: () => { this.CONSUME4(In); const values = this.SUBRULE2(this.#numberInList); return this.ACTION(() => ({ type: 'number' as const, field, conditions: { in: values }, })); }, }, // LIKE { ALT: () => { this.CONSUME2(Like); const pattern = this.CONSUME2(StringLiteral); return this.ACTION(() => ({ type: 'text' as const, field, conditions: { like: this.#extractStringValue(pattern.image) }, })); }, }, // = string { GATE: () => this.LA(2).tokenType === StringLiteral, ALT: () => { this.CONSUME(Equals); const token = this.CONSUME3(StringLiteral); return this.ACTION(() => ({ type: 'text' as const, field, conditions: { equal: this.#extractStringValue(token.image) }, })); }, }, // = number { GATE: () => this.LA(2).tokenType === NumberLiteral, ALT: () => { this.CONSUME2(Equals); const token = this.CONSUME(NumberLiteral); return this.ACTION(() => ({ type: 'number' as const, field, conditions: { equals: parseFloat(token.image) }, })); }, }, // = NULL { ALT: () => { this.CONSUME3(Equals); this.CONSUME2(Null); return this.ACTION(() => ({ type: 'text' as const, field, conditions: { equal: null }, })); }, }, // != string { GATE: () => this.LA(2).tokenType === StringLiteral, ALT: () => { this.CONSUME(NotEquals); const token = this.CONSUME4(StringLiteral); return this.ACTION(() => ({ type: 'text' as const, field, conditions: { notEqual: this.#extractStringValue(token.image) }, })); }, }, // != number { ALT: () => { this.CONSUME2(NotEquals); const token = this.CONSUME2(NumberLiteral); return this.ACTION(() => ({ type: 'number' as const, field, conditions: { notEquals: parseFloat(token.image) }, })); }, }, // > number { ALT: () => { this.CONSUME(GreaterThan); const token = this.CONSUME3(NumberLiteral); return this.ACTION(() => ({ type: 'number' as const, field, conditions: { greaterThan: parseFloat(token.image) }, })); }, }, // >= number { ALT: () => { this.CONSUME(GreaterThanOrEqual); const token = this.CONSUME4(NumberLiteral); return this.ACTION(() => ({ type: 'number' as const, field, conditions: { greaterThanOrEqual: parseFloat(token.image) }, })); }, }, // < number { ALT: () => { this.CONSUME(LessThan); const token = this.CONSUME5(NumberLiteral); return this.ACTION(() => ({ type: 'number' as const, field, conditions: { lessThan: parseFloat(token.image) }, })); }, }, // <= number { ALT: () => { this.CONSUME(LessThanOrEqual); const token = this.CONSUME6(NumberLiteral); return this.ACTION(() => ({ type: 'number' as const, field, conditions: { lessThanOrEqual: parseFloat(token.image) }, })); }, }, ]); }); // Field reference: identifier.identifier.identifier... #fieldReference = this.RULE('fieldReference', (): string[] => { const parts: string[] = []; const first = this.CONSUME(Identifier); this.ACTION(() => parts.push(first.image)); this.MANY(() => { this.CONSUME(Dot); const next = this.CONSUME2(Identifier); this.ACTION(() => parts.push(next.image)); }); return parts; }); // String IN list: ('val1', 'val2', ...) #stringInList = this.RULE('stringInList', (): string[] => { const values: string[] = []; this.CONSUME(LParen); const first = this.CONSUME(StringLiteral); this.ACTION(() => values.push(this.#extractStringValue(first.image))); this.MANY(() => { this.CONSUME(Comma); const next = this.CONSUME2(StringLiteral); this.ACTION(() => values.push(this.#extractStringValue(next.image))); }); this.CONSUME(RParen); return values; }); // Number IN list: (1, 2, 3, ...) #numberInList = this.RULE('numberInList', (): number[] => { const values: number[] = []; this.CONSUME2(LParen); const first = this.CONSUME(NumberLiteral); this.ACTION(() => values.push(parseFloat(first.image))); this.MANY(() => { this.CONSUME2(Comma); const next = this.CONSUME2(NumberLiteral); this.ACTION(() => values.push(parseFloat(next.image))); }); this.CONSUME2(RParen); return values; }); // Extract string value from quoted literal, handling escaped quotes #extractStringValue(image: string): string { // Remove surrounding quotes and unescape doubled quotes return image.slice(1, -1).replace(/''/g, "'"); } // Combine two filters with an operator, flattening if possible #combineWithOperator(left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter { if (left.type === 'operator' && left.operator === operator) { return { type: 'operator', operator, conditions: [...left.conditions, right], }; } return { type: 'operator', operator, conditions: [left, right], }; } // Entry point #query = this.RULE('query', (): QueryFilter => { return this.SUBRULE(this.#orExpression); }); public parse = ( input: string, schema: T = queryFilterSchema as unknown as T, ): QueryFilter => { const lexResult = QueryLexer.tokenize(input); if (lexResult.errors.length > 0) { const error = lexResult.errors[0]; // Check if this looks like an unterminated string (starts with ' but lexer failed) if (error.message.includes("'") || input.slice(error.offset).startsWith("'")) { // Count unescaped single quotes const unescapedQuotes = input.replace(/''/g, '').match(/'/g); if (unescapedQuotes && unescapedQuotes.length % 2 !== 0) { throw new Error(`Unterminated string starting at position ${error.offset}`); } } throw new Error(`Lexer error at position ${error.offset}: ${error.message}`); } this.input = lexResult.tokens; const result = this.#query(); if (this.errors.length > 0) { const error = this.errors[0]; throw new Error(`Parse error: ${error.message}`); } return schema.parse(result); }; } export { QueryParserParser, QueryLexer };