Files
stash/packages/query-dsl/src/query-parser.parser.ts
Morten Olsen 25f614a730
Some checks failed
Build and release / Build (push) Failing after 2m28s
Build and release / update-release-draft (push) Has been skipped
Build and release / Release (push) Has been skipped
ci: add server build
2025-12-10 21:52:20 +01:00

461 lines
13 KiB
TypeScript

import { createToken, Lexer, EmbeddedActionsParser } from 'chevrotain';
import { type QueryFilter, type QueryCondition, queryFilterSchema } from './query-parser.schemas.js';
// ----------------- Lexer -----------------
// Whitespace (skipped)
const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED });
// Keywords (must be defined before Identifier to take precedence)
const And = createToken({ name: 'And', pattern: /AND/i, longer_alt: undefined });
const Or = createToken({ name: 'Or', pattern: /OR/i, longer_alt: undefined });
const Like = createToken({ name: 'Like', pattern: /LIKE/i, longer_alt: undefined });
const Not = createToken({ name: 'Not', pattern: /NOT/i, longer_alt: undefined });
const In = createToken({ name: 'In', pattern: /IN/i, longer_alt: undefined });
const Is = createToken({ name: 'Is', pattern: /IS/i, longer_alt: undefined });
const Null = createToken({ name: 'Null', pattern: /NULL/i, longer_alt: undefined });
// Identifier (must come after keywords)
const Identifier = createToken({ name: 'Identifier', pattern: /[a-zA-Z_][a-zA-Z0-9_]*/ });
// Set longer_alt for keywords to handle cases like "ANDROID" not matching "AND"
And.LONGER_ALT = Identifier;
Or.LONGER_ALT = Identifier;
Like.LONGER_ALT = Identifier;
Not.LONGER_ALT = Identifier;
In.LONGER_ALT = Identifier;
Is.LONGER_ALT = Identifier;
Null.LONGER_ALT = Identifier;
// Literals
const StringLiteral = createToken({
name: 'StringLiteral',
pattern: /'(?:''|[^'])*'/,
});
const NumberLiteral = createToken({
name: 'NumberLiteral',
pattern: /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/,
});
// Operators
const NotEquals = createToken({ name: 'NotEquals', pattern: /!=/ });
const GreaterThanOrEqual = createToken({ name: 'GreaterThanOrEqual', pattern: />=/ });
const LessThanOrEqual = createToken({ name: 'LessThanOrEqual', pattern: /<=/ });
const Equals = createToken({ name: 'Equals', pattern: /=/ });
const GreaterThan = createToken({ name: 'GreaterThan', pattern: />/ });
const LessThan = createToken({ name: 'LessThan', pattern: /</ });
// Punctuation
const LParen = createToken({ name: 'LParen', pattern: /\(/ });
const RParen = createToken({ name: 'RParen', pattern: /\)/ });
const Comma = createToken({ name: 'Comma', pattern: /,/ });
const Dot = createToken({ name: 'Dot', pattern: /\./ });
// Token order matters! More specific patterns first.
const allTokens = [
WhiteSpace,
// Multi-char operators first
NotEquals,
GreaterThanOrEqual,
LessThanOrEqual,
// Single-char operators
Equals,
GreaterThan,
LessThan,
// Punctuation
LParen,
RParen,
Comma,
Dot,
// Keywords (before Identifier)
And,
Or,
Like,
Not,
In,
Is,
Null,
// Literals
StringLiteral,
NumberLiteral,
// Identifier last
Identifier,
];
const QueryLexer = new Lexer(allTokens);
// ----------------- Parser -----------------
class QueryParserParser extends EmbeddedActionsParser {
constructor() {
super(allTokens);
this.performSelfAnalysis();
}
// OR has lowest precedence
#orExpression = this.RULE('orExpression', (): QueryFilter => {
let left = this.SUBRULE(this.#andExpression);
this.MANY(() => {
this.CONSUME(Or);
const right = this.SUBRULE2(this.#andExpression);
left = this.ACTION(() => this.#combineWithOperator(left, right, 'or'));
});
return left;
});
// AND has higher precedence than OR
#andExpression = this.RULE('andExpression', (): QueryFilter => {
let left = this.SUBRULE(this.#primaryExpression);
this.MANY(() => {
this.CONSUME(And);
const right = this.SUBRULE2(this.#primaryExpression);
left = this.ACTION(() => this.#combineWithOperator(left, right, 'and'));
});
return left;
});
// Primary: parenthesized expression or condition
#primaryExpression = this.RULE('primaryExpression', (): QueryFilter => {
return this.OR([
{
ALT: () => {
this.CONSUME(LParen);
const expr = this.SUBRULE(this.#orExpression);
this.CONSUME(RParen);
return expr;
},
},
{ ALT: () => this.SUBRULE(this.#condition) },
]);
});
// Condition: field followed by operator and value(s)
#condition = this.RULE('condition', (): QueryCondition => {
const field = this.SUBRULE(this.#fieldReference);
return this.OR([
// IS NULL / IS NOT NULL
{
ALT: () => {
this.CONSUME(Is);
const isNot = this.OPTION(() => this.CONSUME(Not)) !== undefined;
this.CONSUME(Null);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null },
}));
},
},
// NOT IN (strings) - LA(1)=NOT, LA(2)=IN, LA(3)=(, LA(4)=value
{
GATE: () => this.LA(4).tokenType === StringLiteral,
ALT: () => {
this.CONSUME2(Not);
this.CONSUME(In);
const values = this.SUBRULE(this.#stringInList);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { notIn: values },
}));
},
},
// NOT IN (numbers)
{
GATE: () => this.LA(4).tokenType === NumberLiteral,
ALT: () => {
this.CONSUME3(Not);
this.CONSUME2(In);
const values = this.SUBRULE(this.#numberInList);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { notIn: values },
}));
},
},
// NOT LIKE
{
ALT: () => {
this.CONSUME4(Not);
this.CONSUME(Like);
const pattern = this.CONSUME(StringLiteral);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { notLike: this.#extractStringValue(pattern.image) },
}));
},
},
// IN (strings) - LA(1)=IN, LA(2)=(, LA(3)=value
{
GATE: () => this.LA(3).tokenType === StringLiteral,
ALT: () => {
this.CONSUME3(In);
const values = this.SUBRULE2(this.#stringInList);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { in: values },
}));
},
},
// IN (numbers)
{
GATE: () => this.LA(3).tokenType === NumberLiteral,
ALT: () => {
this.CONSUME4(In);
const values = this.SUBRULE2(this.#numberInList);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { in: values },
}));
},
},
// LIKE
{
ALT: () => {
this.CONSUME2(Like);
const pattern = this.CONSUME2(StringLiteral);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { like: this.#extractStringValue(pattern.image) },
}));
},
},
// = string
{
GATE: () => this.LA(2).tokenType === StringLiteral,
ALT: () => {
this.CONSUME(Equals);
const token = this.CONSUME3(StringLiteral);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { equal: this.#extractStringValue(token.image) },
}));
},
},
// = number
{
GATE: () => this.LA(2).tokenType === NumberLiteral,
ALT: () => {
this.CONSUME2(Equals);
const token = this.CONSUME(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { equals: parseFloat(token.image) },
}));
},
},
// = NULL
{
ALT: () => {
this.CONSUME3(Equals);
this.CONSUME2(Null);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { equal: null },
}));
},
},
// != string
{
GATE: () => this.LA(2).tokenType === StringLiteral,
ALT: () => {
this.CONSUME(NotEquals);
const token = this.CONSUME4(StringLiteral);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { notEqual: this.#extractStringValue(token.image) },
}));
},
},
// != number
{
ALT: () => {
this.CONSUME2(NotEquals);
const token = this.CONSUME2(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { notEquals: parseFloat(token.image) },
}));
},
},
// > number
{
ALT: () => {
this.CONSUME(GreaterThan);
const token = this.CONSUME3(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { greaterThan: parseFloat(token.image) },
}));
},
},
// >= number
{
ALT: () => {
this.CONSUME(GreaterThanOrEqual);
const token = this.CONSUME4(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { greaterThanOrEqual: parseFloat(token.image) },
}));
},
},
// < number
{
ALT: () => {
this.CONSUME(LessThan);
const token = this.CONSUME5(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { lessThan: parseFloat(token.image) },
}));
},
},
// <= number
{
ALT: () => {
this.CONSUME(LessThanOrEqual);
const token = this.CONSUME6(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { lessThanOrEqual: parseFloat(token.image) },
}));
},
},
]);
});
// Field reference: identifier.identifier.identifier...
#fieldReference = this.RULE('fieldReference', (): string[] => {
const parts: string[] = [];
const first = this.CONSUME(Identifier);
this.ACTION(() => parts.push(first.image));
this.MANY(() => {
this.CONSUME(Dot);
const next = this.CONSUME2(Identifier);
this.ACTION(() => parts.push(next.image));
});
return parts;
});
// String IN list: ('val1', 'val2', ...)
#stringInList = this.RULE('stringInList', (): string[] => {
const values: string[] = [];
this.CONSUME(LParen);
const first = this.CONSUME(StringLiteral);
this.ACTION(() => values.push(this.#extractStringValue(first.image)));
this.MANY(() => {
this.CONSUME(Comma);
const next = this.CONSUME2(StringLiteral);
this.ACTION(() => values.push(this.#extractStringValue(next.image)));
});
this.CONSUME(RParen);
return values;
});
// Number IN list: (1, 2, 3, ...)
#numberInList = this.RULE('numberInList', (): number[] => {
const values: number[] = [];
this.CONSUME2(LParen);
const first = this.CONSUME(NumberLiteral);
this.ACTION(() => values.push(parseFloat(first.image)));
this.MANY(() => {
this.CONSUME2(Comma);
const next = this.CONSUME2(NumberLiteral);
this.ACTION(() => values.push(parseFloat(next.image)));
});
this.CONSUME2(RParen);
return values;
});
// Extract string value from quoted literal, handling escaped quotes
#extractStringValue(image: string): string {
// Remove surrounding quotes and unescape doubled quotes
return image.slice(1, -1).replace(/''/g, "'");
}
// Combine two filters with an operator, flattening if possible
#combineWithOperator(left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter {
if (left.type === 'operator' && left.operator === operator) {
return {
type: 'operator',
operator,
conditions: [...left.conditions, right],
};
}
return {
type: 'operator',
operator,
conditions: [left, right],
};
}
// Entry point
#query = this.RULE('query', (): QueryFilter => {
return this.SUBRULE(this.#orExpression);
});
public parse = <T extends typeof queryFilterSchema>(
input: string,
schema: T = queryFilterSchema as unknown as T,
): QueryFilter => {
const lexResult = QueryLexer.tokenize(input);
if (lexResult.errors.length > 0) {
const error = lexResult.errors[0];
// Check if this looks like an unterminated string (starts with ' but lexer failed)
if (error.message.includes("'") || input.slice(error.offset).startsWith("'")) {
// Count unescaped single quotes
const unescapedQuotes = input.replace(/''/g, '').match(/'/g);
if (unescapedQuotes && unescapedQuotes.length % 2 !== 0) {
throw new Error(`Unterminated string starting at position ${error.offset}`);
}
}
throw new Error(`Lexer error at position ${error.offset}: ${error.message}`);
}
this.input = lexResult.tokens;
const result = this.#query();
if (this.errors.length > 0) {
const error = this.errors[0];
throw new Error(`Parse error: ${error.message}`);
}
return schema.parse(result);
};
}
export { QueryParserParser, QueryLexer };