update
This commit is contained in:
457
packages/query-dsl/src/query-parser.parser.ts
Normal file
457
packages/query-dsl/src/query-parser.parser.ts
Normal file
@@ -0,0 +1,457 @@
|
||||
import { createToken, Lexer, EmbeddedActionsParser } from 'chevrotain';
|
||||
|
||||
import type { QueryFilter, QueryCondition } from './query-parser.schemas.js';
|
||||
|
||||
// ----------------- Lexer -----------------
|
||||
|
||||
// Whitespace (skipped)
|
||||
const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED });
|
||||
|
||||
// Keywords (must be defined before Identifier to take precedence)
|
||||
const And = createToken({ name: 'And', pattern: /AND/i, longer_alt: undefined });
|
||||
const Or = createToken({ name: 'Or', pattern: /OR/i, longer_alt: undefined });
|
||||
const Like = createToken({ name: 'Like', pattern: /LIKE/i, longer_alt: undefined });
|
||||
const Not = createToken({ name: 'Not', pattern: /NOT/i, longer_alt: undefined });
|
||||
const In = createToken({ name: 'In', pattern: /IN/i, longer_alt: undefined });
|
||||
const Is = createToken({ name: 'Is', pattern: /IS/i, longer_alt: undefined });
|
||||
const Null = createToken({ name: 'Null', pattern: /NULL/i, longer_alt: undefined });
|
||||
|
||||
// Identifier (must come after keywords)
|
||||
const Identifier = createToken({ name: 'Identifier', pattern: /[a-zA-Z_][a-zA-Z0-9_]*/ });
|
||||
|
||||
// Set longer_alt for keywords to handle cases like "ANDROID" not matching "AND"
|
||||
And.LONGER_ALT = Identifier;
|
||||
Or.LONGER_ALT = Identifier;
|
||||
Like.LONGER_ALT = Identifier;
|
||||
Not.LONGER_ALT = Identifier;
|
||||
In.LONGER_ALT = Identifier;
|
||||
Is.LONGER_ALT = Identifier;
|
||||
Null.LONGER_ALT = Identifier;
|
||||
|
||||
// Literals
|
||||
const StringLiteral = createToken({
|
||||
name: 'StringLiteral',
|
||||
pattern: /'(?:''|[^'])*'/,
|
||||
});
|
||||
|
||||
const NumberLiteral = createToken({
|
||||
name: 'NumberLiteral',
|
||||
pattern: /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/,
|
||||
});
|
||||
|
||||
// Operators
|
||||
const NotEquals = createToken({ name: 'NotEquals', pattern: /!=/ });
|
||||
const GreaterThanOrEqual = createToken({ name: 'GreaterThanOrEqual', pattern: />=/ });
|
||||
const LessThanOrEqual = createToken({ name: 'LessThanOrEqual', pattern: /<=/ });
|
||||
const Equals = createToken({ name: 'Equals', pattern: /=/ });
|
||||
const GreaterThan = createToken({ name: 'GreaterThan', pattern: />/ });
|
||||
const LessThan = createToken({ name: 'LessThan', pattern: /</ });
|
||||
|
||||
// Punctuation
|
||||
const LParen = createToken({ name: 'LParen', pattern: /\(/ });
|
||||
const RParen = createToken({ name: 'RParen', pattern: /\)/ });
|
||||
const Comma = createToken({ name: 'Comma', pattern: /,/ });
|
||||
const Dot = createToken({ name: 'Dot', pattern: /\./ });
|
||||
|
||||
// Token order matters! More specific patterns first.
|
||||
const allTokens = [
|
||||
WhiteSpace,
|
||||
// Multi-char operators first
|
||||
NotEquals,
|
||||
GreaterThanOrEqual,
|
||||
LessThanOrEqual,
|
||||
// Single-char operators
|
||||
Equals,
|
||||
GreaterThan,
|
||||
LessThan,
|
||||
// Punctuation
|
||||
LParen,
|
||||
RParen,
|
||||
Comma,
|
||||
Dot,
|
||||
// Keywords (before Identifier)
|
||||
And,
|
||||
Or,
|
||||
Like,
|
||||
Not,
|
||||
In,
|
||||
Is,
|
||||
Null,
|
||||
// Literals
|
||||
StringLiteral,
|
||||
NumberLiteral,
|
||||
// Identifier last
|
||||
Identifier,
|
||||
];
|
||||
|
||||
const QueryLexer = new Lexer(allTokens);
|
||||
|
||||
// ----------------- Parser -----------------
|
||||
|
||||
class QueryParserParser extends EmbeddedActionsParser {
|
||||
constructor() {
|
||||
super(allTokens);
|
||||
this.performSelfAnalysis();
|
||||
}
|
||||
|
||||
// OR has lowest precedence
|
||||
#orExpression = this.RULE('orExpression', (): QueryFilter => {
|
||||
let left = this.SUBRULE(this.#andExpression);
|
||||
|
||||
this.MANY(() => {
|
||||
this.CONSUME(Or);
|
||||
const right = this.SUBRULE2(this.#andExpression);
|
||||
left = this.ACTION(() => this.#combineWithOperator(left, right, 'or'));
|
||||
});
|
||||
|
||||
return left;
|
||||
});
|
||||
|
||||
// AND has higher precedence than OR
|
||||
#andExpression = this.RULE('andExpression', (): QueryFilter => {
|
||||
let left = this.SUBRULE(this.#primaryExpression);
|
||||
|
||||
this.MANY(() => {
|
||||
this.CONSUME(And);
|
||||
const right = this.SUBRULE2(this.#primaryExpression);
|
||||
left = this.ACTION(() => this.#combineWithOperator(left, right, 'and'));
|
||||
});
|
||||
|
||||
return left;
|
||||
});
|
||||
|
||||
// Primary: parenthesized expression or condition
|
||||
#primaryExpression = this.RULE('primaryExpression', (): QueryFilter => {
|
||||
return this.OR([
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME(LParen);
|
||||
const expr = this.SUBRULE(this.#orExpression);
|
||||
this.CONSUME(RParen);
|
||||
return expr;
|
||||
},
|
||||
},
|
||||
{ ALT: () => this.SUBRULE(this.#condition) },
|
||||
]);
|
||||
});
|
||||
|
||||
// Condition: field followed by operator and value(s)
|
||||
#condition = this.RULE('condition', (): QueryCondition => {
|
||||
const field = this.SUBRULE(this.#fieldReference);
|
||||
|
||||
return this.OR([
|
||||
// IS NULL / IS NOT NULL
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME(Is);
|
||||
const isNot = this.OPTION(() => this.CONSUME(Not)) !== undefined;
|
||||
this.CONSUME(Null);
|
||||
|
||||
return this.ACTION(() => ({
|
||||
type: 'text' as const,
|
||||
field,
|
||||
conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// NOT IN (strings) - LA(1)=NOT, LA(2)=IN, LA(3)=(, LA(4)=value
|
||||
{
|
||||
GATE: () => this.LA(4).tokenType === StringLiteral,
|
||||
ALT: () => {
|
||||
this.CONSUME2(Not);
|
||||
this.CONSUME(In);
|
||||
const values = this.SUBRULE(this.#stringInList);
|
||||
return this.ACTION(() => ({
|
||||
type: 'text' as const,
|
||||
field,
|
||||
conditions: { notIn: values },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// NOT IN (numbers)
|
||||
{
|
||||
GATE: () => this.LA(4).tokenType === NumberLiteral,
|
||||
ALT: () => {
|
||||
this.CONSUME3(Not);
|
||||
this.CONSUME2(In);
|
||||
const values = this.SUBRULE(this.#numberInList);
|
||||
return this.ACTION(() => ({
|
||||
type: 'number' as const,
|
||||
field,
|
||||
conditions: { notIn: values },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// NOT LIKE
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME4(Not);
|
||||
this.CONSUME(Like);
|
||||
const pattern = this.CONSUME(StringLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'text' as const,
|
||||
field,
|
||||
conditions: { notLike: this.#extractStringValue(pattern.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// IN (strings) - LA(1)=IN, LA(2)=(, LA(3)=value
|
||||
{
|
||||
GATE: () => this.LA(3).tokenType === StringLiteral,
|
||||
ALT: () => {
|
||||
this.CONSUME3(In);
|
||||
const values = this.SUBRULE2(this.#stringInList);
|
||||
return this.ACTION(() => ({
|
||||
type: 'text' as const,
|
||||
field,
|
||||
conditions: { in: values },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// IN (numbers)
|
||||
{
|
||||
GATE: () => this.LA(3).tokenType === NumberLiteral,
|
||||
ALT: () => {
|
||||
this.CONSUME4(In);
|
||||
const values = this.SUBRULE2(this.#numberInList);
|
||||
return this.ACTION(() => ({
|
||||
type: 'number' as const,
|
||||
field,
|
||||
conditions: { in: values },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// LIKE
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME2(Like);
|
||||
const pattern = this.CONSUME2(StringLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'text' as const,
|
||||
field,
|
||||
conditions: { like: this.#extractStringValue(pattern.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// = string
|
||||
{
|
||||
GATE: () => this.LA(2).tokenType === StringLiteral,
|
||||
ALT: () => {
|
||||
this.CONSUME(Equals);
|
||||
const token = this.CONSUME3(StringLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'text' as const,
|
||||
field,
|
||||
conditions: { equal: this.#extractStringValue(token.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// = number
|
||||
{
|
||||
GATE: () => this.LA(2).tokenType === NumberLiteral,
|
||||
ALT: () => {
|
||||
this.CONSUME2(Equals);
|
||||
const token = this.CONSUME(NumberLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'number' as const,
|
||||
field,
|
||||
conditions: { equals: parseFloat(token.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// = NULL
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME3(Equals);
|
||||
this.CONSUME2(Null);
|
||||
return this.ACTION(() => ({
|
||||
type: 'text' as const,
|
||||
field,
|
||||
conditions: { equal: null },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// != string
|
||||
{
|
||||
GATE: () => this.LA(2).tokenType === StringLiteral,
|
||||
ALT: () => {
|
||||
this.CONSUME(NotEquals);
|
||||
const token = this.CONSUME4(StringLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'text' as const,
|
||||
field,
|
||||
conditions: { notEqual: this.#extractStringValue(token.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// != number
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME2(NotEquals);
|
||||
const token = this.CONSUME2(NumberLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'number' as const,
|
||||
field,
|
||||
conditions: { notEquals: parseFloat(token.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// > number
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME(GreaterThan);
|
||||
const token = this.CONSUME3(NumberLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'number' as const,
|
||||
field,
|
||||
conditions: { greaterThan: parseFloat(token.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// >= number
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME(GreaterThanOrEqual);
|
||||
const token = this.CONSUME4(NumberLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'number' as const,
|
||||
field,
|
||||
conditions: { greaterThanOrEqual: parseFloat(token.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// < number
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME(LessThan);
|
||||
const token = this.CONSUME5(NumberLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'number' as const,
|
||||
field,
|
||||
conditions: { lessThan: parseFloat(token.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
// <= number
|
||||
{
|
||||
ALT: () => {
|
||||
this.CONSUME(LessThanOrEqual);
|
||||
const token = this.CONSUME6(NumberLiteral);
|
||||
return this.ACTION(() => ({
|
||||
type: 'number' as const,
|
||||
field,
|
||||
conditions: { lessThanOrEqual: parseFloat(token.image) },
|
||||
}));
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
// Field reference: identifier.identifier.identifier...
|
||||
#fieldReference = this.RULE('fieldReference', (): string[] => {
|
||||
const parts: string[] = [];
|
||||
const first = this.CONSUME(Identifier);
|
||||
this.ACTION(() => parts.push(first.image));
|
||||
|
||||
this.MANY(() => {
|
||||
this.CONSUME(Dot);
|
||||
const next = this.CONSUME2(Identifier);
|
||||
this.ACTION(() => parts.push(next.image));
|
||||
});
|
||||
|
||||
return parts;
|
||||
});
|
||||
|
||||
// String IN list: ('val1', 'val2', ...)
|
||||
#stringInList = this.RULE('stringInList', (): string[] => {
|
||||
const values: string[] = [];
|
||||
|
||||
this.CONSUME(LParen);
|
||||
const first = this.CONSUME(StringLiteral);
|
||||
this.ACTION(() => values.push(this.#extractStringValue(first.image)));
|
||||
|
||||
this.MANY(() => {
|
||||
this.CONSUME(Comma);
|
||||
const next = this.CONSUME2(StringLiteral);
|
||||
this.ACTION(() => values.push(this.#extractStringValue(next.image)));
|
||||
});
|
||||
|
||||
this.CONSUME(RParen);
|
||||
return values;
|
||||
});
|
||||
|
||||
// Number IN list: (1, 2, 3, ...)
|
||||
#numberInList = this.RULE('numberInList', (): number[] => {
|
||||
const values: number[] = [];
|
||||
|
||||
this.CONSUME2(LParen);
|
||||
const first = this.CONSUME(NumberLiteral);
|
||||
this.ACTION(() => values.push(parseFloat(first.image)));
|
||||
|
||||
this.MANY(() => {
|
||||
this.CONSUME2(Comma);
|
||||
const next = this.CONSUME2(NumberLiteral);
|
||||
this.ACTION(() => values.push(parseFloat(next.image)));
|
||||
});
|
||||
|
||||
this.CONSUME2(RParen);
|
||||
return values;
|
||||
});
|
||||
|
||||
// Extract string value from quoted literal, handling escaped quotes
|
||||
#extractStringValue(image: string): string {
|
||||
// Remove surrounding quotes and unescape doubled quotes
|
||||
return image.slice(1, -1).replace(/''/g, "'");
|
||||
}
|
||||
|
||||
// Combine two filters with an operator, flattening if possible
|
||||
#combineWithOperator(left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter {
|
||||
if (left.type === 'operator' && left.operator === operator) {
|
||||
return {
|
||||
type: 'operator',
|
||||
operator,
|
||||
conditions: [...left.conditions, right],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'operator',
|
||||
operator,
|
||||
conditions: [left, right],
|
||||
};
|
||||
}
|
||||
|
||||
// Entry point
|
||||
#query = this.RULE('query', (): QueryFilter => {
|
||||
return this.SUBRULE(this.#orExpression);
|
||||
});
|
||||
|
||||
public parse = (input: string): QueryFilter => {
|
||||
const lexResult = QueryLexer.tokenize(input);
|
||||
|
||||
if (lexResult.errors.length > 0) {
|
||||
const error = lexResult.errors[0];
|
||||
// Check if this looks like an unterminated string (starts with ' but lexer failed)
|
||||
if (error.message.includes("'") || input.slice(error.offset).startsWith("'")) {
|
||||
// Count unescaped single quotes
|
||||
const unescapedQuotes = input.replace(/''/g, '').match(/'/g);
|
||||
if (unescapedQuotes && unescapedQuotes.length % 2 !== 0) {
|
||||
throw new Error(`Unterminated string starting at position ${error.offset}`);
|
||||
}
|
||||
}
|
||||
throw new Error(`Lexer error at position ${error.offset}: ${error.message}`);
|
||||
}
|
||||
|
||||
this.input = lexResult.tokens;
|
||||
const result = this.#query();
|
||||
|
||||
if (this.errors.length > 0) {
|
||||
const error = this.errors[0];
|
||||
throw new Error(`Parse error: ${error.message}`);
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
}
|
||||
|
||||
export { QueryParserParser, QueryLexer };
|
||||
Reference in New Issue
Block a user