461 lines
13 KiB
TypeScript
461 lines
13 KiB
TypeScript
import { createToken, Lexer, EmbeddedActionsParser } from 'chevrotain';
|
|
|
|
import { type QueryFilter, type QueryCondition, queryFilterSchema } from './query-parser.schemas.js';
|
|
|
|
// ----------------- Lexer -----------------
|
|
|
|
// Whitespace (skipped)
|
|
const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED });
|
|
|
|
// Keywords (must be defined before Identifier to take precedence)
|
|
const And = createToken({ name: 'And', pattern: /AND/i, longer_alt: undefined });
|
|
const Or = createToken({ name: 'Or', pattern: /OR/i, longer_alt: undefined });
|
|
const Like = createToken({ name: 'Like', pattern: /LIKE/i, longer_alt: undefined });
|
|
const Not = createToken({ name: 'Not', pattern: /NOT/i, longer_alt: undefined });
|
|
const In = createToken({ name: 'In', pattern: /IN/i, longer_alt: undefined });
|
|
const Is = createToken({ name: 'Is', pattern: /IS/i, longer_alt: undefined });
|
|
const Null = createToken({ name: 'Null', pattern: /NULL/i, longer_alt: undefined });
|
|
|
|
// Identifier (must come after keywords)
|
|
const Identifier = createToken({ name: 'Identifier', pattern: /[a-zA-Z_][a-zA-Z0-9_]*/ });
|
|
|
|
// Set longer_alt for keywords to handle cases like "ANDROID" not matching "AND"
|
|
And.LONGER_ALT = Identifier;
|
|
Or.LONGER_ALT = Identifier;
|
|
Like.LONGER_ALT = Identifier;
|
|
Not.LONGER_ALT = Identifier;
|
|
In.LONGER_ALT = Identifier;
|
|
Is.LONGER_ALT = Identifier;
|
|
Null.LONGER_ALT = Identifier;
|
|
|
|
// Literals
|
|
const StringLiteral = createToken({
|
|
name: 'StringLiteral',
|
|
pattern: /'(?:''|[^'])*'/,
|
|
});
|
|
|
|
const NumberLiteral = createToken({
|
|
name: 'NumberLiteral',
|
|
pattern: /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/,
|
|
});
|
|
|
|
// Operators
|
|
const NotEquals = createToken({ name: 'NotEquals', pattern: /!=/ });
|
|
const GreaterThanOrEqual = createToken({ name: 'GreaterThanOrEqual', pattern: />=/ });
|
|
const LessThanOrEqual = createToken({ name: 'LessThanOrEqual', pattern: /<=/ });
|
|
const Equals = createToken({ name: 'Equals', pattern: /=/ });
|
|
const GreaterThan = createToken({ name: 'GreaterThan', pattern: />/ });
|
|
const LessThan = createToken({ name: 'LessThan', pattern: /</ });
|
|
|
|
// Punctuation
|
|
const LParen = createToken({ name: 'LParen', pattern: /\(/ });
|
|
const RParen = createToken({ name: 'RParen', pattern: /\)/ });
|
|
const Comma = createToken({ name: 'Comma', pattern: /,/ });
|
|
const Dot = createToken({ name: 'Dot', pattern: /\./ });
|
|
|
|
// Token order matters! More specific patterns first.
|
|
const allTokens = [
|
|
WhiteSpace,
|
|
// Multi-char operators first
|
|
NotEquals,
|
|
GreaterThanOrEqual,
|
|
LessThanOrEqual,
|
|
// Single-char operators
|
|
Equals,
|
|
GreaterThan,
|
|
LessThan,
|
|
// Punctuation
|
|
LParen,
|
|
RParen,
|
|
Comma,
|
|
Dot,
|
|
// Keywords (before Identifier)
|
|
And,
|
|
Or,
|
|
Like,
|
|
Not,
|
|
In,
|
|
Is,
|
|
Null,
|
|
// Literals
|
|
StringLiteral,
|
|
NumberLiteral,
|
|
// Identifier last
|
|
Identifier,
|
|
];
|
|
|
|
const QueryLexer = new Lexer(allTokens);
|
|
|
|
// ----------------- Parser -----------------
|
|
|
|
class QueryParserParser extends EmbeddedActionsParser {
|
|
constructor() {
|
|
super(allTokens);
|
|
this.performSelfAnalysis();
|
|
}
|
|
|
|
// OR has lowest precedence
|
|
#orExpression = this.RULE('orExpression', (): QueryFilter => {
|
|
let left = this.SUBRULE(this.#andExpression);
|
|
|
|
this.MANY(() => {
|
|
this.CONSUME(Or);
|
|
const right = this.SUBRULE2(this.#andExpression);
|
|
left = this.ACTION(() => this.#combineWithOperator(left, right, 'or'));
|
|
});
|
|
|
|
return left;
|
|
});
|
|
|
|
// AND has higher precedence than OR
|
|
#andExpression = this.RULE('andExpression', (): QueryFilter => {
|
|
let left = this.SUBRULE(this.#primaryExpression);
|
|
|
|
this.MANY(() => {
|
|
this.CONSUME(And);
|
|
const right = this.SUBRULE2(this.#primaryExpression);
|
|
left = this.ACTION(() => this.#combineWithOperator(left, right, 'and'));
|
|
});
|
|
|
|
return left;
|
|
});
|
|
|
|
// Primary: parenthesized expression or condition
|
|
#primaryExpression = this.RULE('primaryExpression', (): QueryFilter => {
|
|
return this.OR([
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME(LParen);
|
|
const expr = this.SUBRULE(this.#orExpression);
|
|
this.CONSUME(RParen);
|
|
return expr;
|
|
},
|
|
},
|
|
{ ALT: () => this.SUBRULE(this.#condition) },
|
|
]);
|
|
});
|
|
|
|
// Condition: field followed by operator and value(s)
|
|
#condition = this.RULE('condition', (): QueryCondition => {
|
|
const field = this.SUBRULE(this.#fieldReference);
|
|
|
|
return this.OR([
|
|
// IS NULL / IS NOT NULL
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME(Is);
|
|
const isNot = this.OPTION(() => this.CONSUME(Not)) !== undefined;
|
|
this.CONSUME(Null);
|
|
|
|
return this.ACTION(() => ({
|
|
type: 'text' as const,
|
|
field,
|
|
conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null },
|
|
}));
|
|
},
|
|
},
|
|
// NOT IN (strings) - LA(1)=NOT, LA(2)=IN, LA(3)=(, LA(4)=value
|
|
{
|
|
GATE: () => this.LA(4).tokenType === StringLiteral,
|
|
ALT: () => {
|
|
this.CONSUME2(Not);
|
|
this.CONSUME(In);
|
|
const values = this.SUBRULE(this.#stringInList);
|
|
return this.ACTION(() => ({
|
|
type: 'text' as const,
|
|
field,
|
|
conditions: { notIn: values },
|
|
}));
|
|
},
|
|
},
|
|
// NOT IN (numbers)
|
|
{
|
|
GATE: () => this.LA(4).tokenType === NumberLiteral,
|
|
ALT: () => {
|
|
this.CONSUME3(Not);
|
|
this.CONSUME2(In);
|
|
const values = this.SUBRULE(this.#numberInList);
|
|
return this.ACTION(() => ({
|
|
type: 'number' as const,
|
|
field,
|
|
conditions: { notIn: values },
|
|
}));
|
|
},
|
|
},
|
|
// NOT LIKE
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME4(Not);
|
|
this.CONSUME(Like);
|
|
const pattern = this.CONSUME(StringLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'text' as const,
|
|
field,
|
|
conditions: { notLike: this.#extractStringValue(pattern.image) },
|
|
}));
|
|
},
|
|
},
|
|
// IN (strings) - LA(1)=IN, LA(2)=(, LA(3)=value
|
|
{
|
|
GATE: () => this.LA(3).tokenType === StringLiteral,
|
|
ALT: () => {
|
|
this.CONSUME3(In);
|
|
const values = this.SUBRULE2(this.#stringInList);
|
|
return this.ACTION(() => ({
|
|
type: 'text' as const,
|
|
field,
|
|
conditions: { in: values },
|
|
}));
|
|
},
|
|
},
|
|
// IN (numbers)
|
|
{
|
|
GATE: () => this.LA(3).tokenType === NumberLiteral,
|
|
ALT: () => {
|
|
this.CONSUME4(In);
|
|
const values = this.SUBRULE2(this.#numberInList);
|
|
return this.ACTION(() => ({
|
|
type: 'number' as const,
|
|
field,
|
|
conditions: { in: values },
|
|
}));
|
|
},
|
|
},
|
|
// LIKE
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME2(Like);
|
|
const pattern = this.CONSUME2(StringLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'text' as const,
|
|
field,
|
|
conditions: { like: this.#extractStringValue(pattern.image) },
|
|
}));
|
|
},
|
|
},
|
|
// = string
|
|
{
|
|
GATE: () => this.LA(2).tokenType === StringLiteral,
|
|
ALT: () => {
|
|
this.CONSUME(Equals);
|
|
const token = this.CONSUME3(StringLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'text' as const,
|
|
field,
|
|
conditions: { equal: this.#extractStringValue(token.image) },
|
|
}));
|
|
},
|
|
},
|
|
// = number
|
|
{
|
|
GATE: () => this.LA(2).tokenType === NumberLiteral,
|
|
ALT: () => {
|
|
this.CONSUME2(Equals);
|
|
const token = this.CONSUME(NumberLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'number' as const,
|
|
field,
|
|
conditions: { equals: parseFloat(token.image) },
|
|
}));
|
|
},
|
|
},
|
|
// = NULL
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME3(Equals);
|
|
this.CONSUME2(Null);
|
|
return this.ACTION(() => ({
|
|
type: 'text' as const,
|
|
field,
|
|
conditions: { equal: null },
|
|
}));
|
|
},
|
|
},
|
|
// != string
|
|
{
|
|
GATE: () => this.LA(2).tokenType === StringLiteral,
|
|
ALT: () => {
|
|
this.CONSUME(NotEquals);
|
|
const token = this.CONSUME4(StringLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'text' as const,
|
|
field,
|
|
conditions: { notEqual: this.#extractStringValue(token.image) },
|
|
}));
|
|
},
|
|
},
|
|
// != number
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME2(NotEquals);
|
|
const token = this.CONSUME2(NumberLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'number' as const,
|
|
field,
|
|
conditions: { notEquals: parseFloat(token.image) },
|
|
}));
|
|
},
|
|
},
|
|
// > number
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME(GreaterThan);
|
|
const token = this.CONSUME3(NumberLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'number' as const,
|
|
field,
|
|
conditions: { greaterThan: parseFloat(token.image) },
|
|
}));
|
|
},
|
|
},
|
|
// >= number
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME(GreaterThanOrEqual);
|
|
const token = this.CONSUME4(NumberLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'number' as const,
|
|
field,
|
|
conditions: { greaterThanOrEqual: parseFloat(token.image) },
|
|
}));
|
|
},
|
|
},
|
|
// < number
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME(LessThan);
|
|
const token = this.CONSUME5(NumberLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'number' as const,
|
|
field,
|
|
conditions: { lessThan: parseFloat(token.image) },
|
|
}));
|
|
},
|
|
},
|
|
// <= number
|
|
{
|
|
ALT: () => {
|
|
this.CONSUME(LessThanOrEqual);
|
|
const token = this.CONSUME6(NumberLiteral);
|
|
return this.ACTION(() => ({
|
|
type: 'number' as const,
|
|
field,
|
|
conditions: { lessThanOrEqual: parseFloat(token.image) },
|
|
}));
|
|
},
|
|
},
|
|
]);
|
|
});
|
|
|
|
// Field reference: identifier.identifier.identifier...
|
|
#fieldReference = this.RULE('fieldReference', (): string[] => {
|
|
const parts: string[] = [];
|
|
const first = this.CONSUME(Identifier);
|
|
this.ACTION(() => parts.push(first.image));
|
|
|
|
this.MANY(() => {
|
|
this.CONSUME(Dot);
|
|
const next = this.CONSUME2(Identifier);
|
|
this.ACTION(() => parts.push(next.image));
|
|
});
|
|
|
|
return parts;
|
|
});
|
|
|
|
// String IN list: ('val1', 'val2', ...)
|
|
#stringInList = this.RULE('stringInList', (): string[] => {
|
|
const values: string[] = [];
|
|
|
|
this.CONSUME(LParen);
|
|
const first = this.CONSUME(StringLiteral);
|
|
this.ACTION(() => values.push(this.#extractStringValue(first.image)));
|
|
|
|
this.MANY(() => {
|
|
this.CONSUME(Comma);
|
|
const next = this.CONSUME2(StringLiteral);
|
|
this.ACTION(() => values.push(this.#extractStringValue(next.image)));
|
|
});
|
|
|
|
this.CONSUME(RParen);
|
|
return values;
|
|
});
|
|
|
|
// Number IN list: (1, 2, 3, ...)
|
|
#numberInList = this.RULE('numberInList', (): number[] => {
|
|
const values: number[] = [];
|
|
|
|
this.CONSUME2(LParen);
|
|
const first = this.CONSUME(NumberLiteral);
|
|
this.ACTION(() => values.push(parseFloat(first.image)));
|
|
|
|
this.MANY(() => {
|
|
this.CONSUME2(Comma);
|
|
const next = this.CONSUME2(NumberLiteral);
|
|
this.ACTION(() => values.push(parseFloat(next.image)));
|
|
});
|
|
|
|
this.CONSUME2(RParen);
|
|
return values;
|
|
});
|
|
|
|
// Extract string value from quoted literal, handling escaped quotes
|
|
#extractStringValue(image: string): string {
|
|
// Remove surrounding quotes and unescape doubled quotes
|
|
return image.slice(1, -1).replace(/''/g, "'");
|
|
}
|
|
|
|
// Combine two filters with an operator, flattening if possible
|
|
#combineWithOperator(left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter {
|
|
if (left.type === 'operator' && left.operator === operator) {
|
|
return {
|
|
type: 'operator',
|
|
operator,
|
|
conditions: [...left.conditions, right],
|
|
};
|
|
}
|
|
|
|
return {
|
|
type: 'operator',
|
|
operator,
|
|
conditions: [left, right],
|
|
};
|
|
}
|
|
|
|
// Entry point
|
|
#query = this.RULE('query', (): QueryFilter => {
|
|
return this.SUBRULE(this.#orExpression);
|
|
});
|
|
|
|
public parse = <T extends typeof queryFilterSchema>(
|
|
input: string,
|
|
schema: T = queryFilterSchema as unknown as T,
|
|
): QueryFilter => {
|
|
const lexResult = QueryLexer.tokenize(input);
|
|
|
|
if (lexResult.errors.length > 0) {
|
|
const error = lexResult.errors[0];
|
|
// Check if this looks like an unterminated string (starts with ' but lexer failed)
|
|
if (error.message.includes("'") || input.slice(error.offset).startsWith("'")) {
|
|
// Count unescaped single quotes
|
|
const unescapedQuotes = input.replace(/''/g, '').match(/'/g);
|
|
if (unescapedQuotes && unescapedQuotes.length % 2 !== 0) {
|
|
throw new Error(`Unterminated string starting at position ${error.offset}`);
|
|
}
|
|
}
|
|
throw new Error(`Lexer error at position ${error.offset}: ${error.message}`);
|
|
}
|
|
|
|
this.input = lexResult.tokens;
|
|
const result = this.#query();
|
|
|
|
if (this.errors.length > 0) {
|
|
const error = this.errors[0];
|
|
throw new Error(`Parse error: ${error.message}`);
|
|
}
|
|
|
|
return schema.parse(result);
|
|
};
|
|
}
|
|
|
|
export { QueryParserParser, QueryLexer };
|