From 9f9bc03d032a0b2c764a0b0c1e7bb8c8dd427d83 Mon Sep 17 00:00:00 2001 From: Morten Olsen Date: Tue, 9 Dec 2025 21:19:30 +0100 Subject: [PATCH] feat: add query dsl --- packages/server/docs/query-language.md | 336 ++++++++ .../src/query-parser/query-parser.lexer.ts | 202 +++++ .../src/query-parser/query-parser.parser.ts | 317 ++++++++ .../query-parser/query-parser.stringifier.ts | 135 ++++ .../src/query-parser/query-parser.test.ts | 754 ++++++++++++++++++ .../server/src/query-parser/query-parser.ts | 19 + .../src/query-parser/query-parser.types.ts | 30 + .../document-chunks.schemas.ts | 2 +- .../document-chunks/document-chunks.ts | 7 +- .../services/documents/documents.schemas.ts | 40 +- .../src/services/documents/documents.ts | 11 +- packages/server/src/utils/utils.query.ts | 1 + 12 files changed, 1837 insertions(+), 17 deletions(-) create mode 100644 packages/server/docs/query-language.md create mode 100644 packages/server/src/query-parser/query-parser.lexer.ts create mode 100644 packages/server/src/query-parser/query-parser.parser.ts create mode 100644 packages/server/src/query-parser/query-parser.stringifier.ts create mode 100644 packages/server/src/query-parser/query-parser.test.ts create mode 100644 packages/server/src/query-parser/query-parser.ts create mode 100644 packages/server/src/query-parser/query-parser.types.ts diff --git a/packages/server/docs/query-language.md b/packages/server/docs/query-language.md new file mode 100644 index 0000000..35cb181 --- /dev/null +++ b/packages/server/docs/query-language.md @@ -0,0 +1,336 @@ +# Query Language Specification + +This document describes the SQL-like query language syntax for building database queries. The language supports filtering on both text and numeric fields, including nested JSON fields, with logical operators for complex queries. + +## Overview + +The query language provides a human-readable, SQL-like syntax that can be parsed into the internal JSON query format used by the system. It supports: + +- Text field conditions (equality, pattern matching, membership) +- Numeric field conditions (comparison operators, membership) +- Nested JSON field access using dot notation +- Logical operators (AND, OR) with grouping +- NULL value checks + +## Syntax + +### Field References + +Fields are referenced using dot notation for nested JSON paths: + +``` +field_name +metadata.foo +metadata.nested.deep.field +``` + +**Examples:** +- `content` - top-level field +- `metadata.author` - nested field in metadata object +- `metadata.tags.0` - array element (if needed) + +### Text Conditions + +Text conditions operate on string values: + +| Operator | Syntax | Description | +|----------|--------|-------------| +| Equality | `field = 'value'` | Exact match | +| Inequality | `field != 'value'` | Not equal | +| NULL check | `field IS NULL` | Field is null | +| NOT NULL | `field IS NOT NULL` | Field is not null | +| Pattern match | `field LIKE 'pattern'` | SQL LIKE pattern matching | +| Not like | `field NOT LIKE 'pattern'` | Negated pattern matching | +| In list | `field IN ('val1', 'val2', 'val3')` | Value in list | +| Not in list | `field NOT IN ('val1', 'val2')` | Value not in list | + +**String Literals:** +- Single quotes: `'value'` +- Escaped quotes: `'O''Brien'` (double single quote) +- Empty string: `''` + +**LIKE Patterns:** +- `%` matches any sequence of characters +- `_` matches any single character +- Examples: `'%cat%'`, `'test_%'`, `'exact'` + +**Examples:** +```sql +content = 'hello world' +metadata.foo = 'bar' +type != 'draft' +source IS NULL +title LIKE '%cat%' +author NOT LIKE '%admin%' +status IN ('published', 'archived') +category NOT IN ('deleted', 'hidden') +``` + +### Numeric Conditions + +Numeric conditions operate on number values: + +| Operator | Syntax | Description | +|----------|--------|-------------| +| Equality | `field = 123` | Exact match | +| Inequality | `field != 123` | Not equal | +| NULL check | `field IS NULL` | Field is null | +| NOT NULL | `field IS NOT NULL` | Field is not null | +| Greater than | `field > 10` | Greater than | +| Greater or equal | `field >= 10` | Greater than or equal | +| Less than | `field < 10` | Less than | +| Less or equal | `field <= 10` | Less than or equal | +| In list | `field IN (1, 2, 3)` | Value in list | +| Not in list | `field NOT IN (1, 2, 3)` | Value not in list | + +**Numeric Literals:** +- Integers: `123`, `-45`, `0` +- Decimals: `123.45`, `-0.5`, `3.14159` +- Scientific notation: `1e10`, `2.5e-3` (if supported) + +**Examples:** +```sql +typeVersion = 1 +score > 0.5 +views >= 100 +priority < 5 +age <= 65 +rating IN (1, 2, 3, 4, 5) +count NOT IN (0, -1) +``` + +### Logical Operators + +Combine conditions using `AND` and `OR` operators: + +| Operator | Syntax | Description | +|----------|--------|-------------| +| AND | `condition1 AND condition2` | Both conditions must be true | +| OR | `condition1 OR condition2` | At least one condition must be true | + +**Grouping:** +Use parentheses `()` to group conditions and control operator precedence: + +```sql +(condition1 AND condition2) OR condition3 +condition1 AND (condition2 OR condition3) +``` + +**Examples:** +```sql +type = 'article' AND status = 'published' +metadata.foo = 'bar' OR metadata.foo = 'baz' +(type = 'post' OR type = 'page') AND views > 100 +``` + +### Operator Precedence + +1. Parentheses `()` - highest precedence +2. `AND` - evaluated before OR +3. `OR` - lowest precedence + +**Examples:** +```sql +-- Equivalent to: (A AND B) OR C +A AND B OR C + +-- Equivalent to: A AND (B OR C) +A AND (B OR C) + +-- Explicit grouping +(A OR B) AND (C OR D) +``` + +## Complete Examples + +### Simple Conditions + +```sql +-- Text equality +metadata.author = 'John Doe' + +-- Numeric comparison +views >= 1000 + +-- Pattern matching +title LIKE '%tutorial%' + +-- NULL check +source IS NULL +``` + +### Multiple Conditions + +```sql +-- AND operator +type = 'article' AND status = 'published' AND views > 100 + +-- OR operator +category = 'tech' OR category = 'science' + +-- Mixed operators +(type = 'post' OR type = 'page') AND published = true +``` + +### Complex Nested Queries + +```sql +-- Nested AND within OR +(metadata.foo = 'bar' AND type = 'demo') OR metadata.foo = 'baz' + +-- Multiple levels of nesting +((status = 'active' AND views > 100) OR (status = 'featured' AND views > 50)) AND category = 'news' + +-- Complex query with multiple field types +type = 'article' AND (metadata.author = 'John' OR metadata.author = 'Jane') AND views >= 100 AND rating IN (4, 5) +``` + +### Array/List Operations + +```sql +-- Text IN +status IN ('published', 'archived', 'draft') + +-- Numeric IN +priority IN (1, 2, 3) + +-- NOT IN +category NOT IN ('deleted', 'hidden') +``` + +## Type Inference + +The parser will infer the condition type (text vs number) based on: + +1. **Operator context**: Operators like `>`, `<`, `>=`, `<=` imply numeric +2. **Value type**: + - Quoted strings (`'value'`) → text condition + - Unquoted numbers (`123`, `45.6`) → numeric condition + - `NULL` → can be either (context-dependent) +3. **Field name**: If a field is known to be numeric, numeric operators are used + +**Examples:** +```sql +-- Text condition (quoted string) +author = 'John' + +-- Numeric condition (unquoted number) +age = 30 + +-- Numeric comparison +score > 0.5 + +-- Text pattern +title LIKE '%test%' +``` + +## Escaping and Special Characters + +### String Escaping + +- Single quotes in strings: `'O''Brien'` → `O'Brien` +- Empty string: `''` + +### Field Name Escaping + +If field names contain special characters or reserved words, they can be quoted (implementation-dependent): + +```sql +-- Reserved words or special characters (if supported) +"order" = 'asc' +"metadata.field-name" = 'value' +``` + +## Error Handling + +The parser should provide clear error messages for: + +- Invalid syntax +- Mismatched parentheses +- Invalid operators for field types +- Missing values +- Invalid escape sequences + +## Grammar (BNF-like) + +``` +query ::= expression +expression ::= condition | group +group ::= '(' expression ')' + | expression AND expression + | expression OR expression +condition ::= text_condition | numeric_condition +text_condition ::= field ( '=' | '!=' | 'LIKE' | 'NOT LIKE' ) string_literal + | field 'IS' ( 'NULL' | 'NOT NULL' ) + | field 'IN' '(' string_list ')' + | field 'NOT IN' '(' string_list ')' +numeric_condition ::= field ( '=' | '!=' | '>' | '>=' | '<' | '<=' ) number + | field 'IS' ( 'NULL' | 'NOT NULL' ) + | field 'IN' '(' number_list ')' + | field 'NOT IN' '(' number_list ')' +field ::= identifier ( '.' identifier )* +identifier ::= [a-zA-Z_][a-zA-Z0-9_]* +string_literal ::= "'" ( escaped_char | [^'] )* "'" +escaped_char ::= "''" +string_list ::= string_literal ( ',' string_literal )* +number ::= [0-9]+ ( '.' [0-9]+ )? ( [eE] [+-]? [0-9]+ )? +number_list ::= number ( ',' number )* +``` + +## Migration from JSON Format + +The SQL-like syntax maps to the JSON format as follows: + +**JSON:** +```json +{ + "type": "text", + "field": ["metadata", "foo"], + "conditions": { + "equal": "bar" + } +} +``` + +**SQL:** +```sql +metadata.foo = 'bar' +``` + +**JSON (with operator):** +```json +{ + "type": "operator", + "operator": "and", + "conditions": [ + { + "type": "text", + "field": ["metadata", "foo"], + "conditions": { + "equal": "bar" + } + }, + { + "type": "text", + "field": ["type"], + "conditions": { + "equal": "demo" + } + } + ] +} +``` + +**SQL:** +```sql +metadata.foo = 'bar' AND type = 'demo' +``` + +## Implementation Notes + +1. **Whitespace**: Whitespace is generally ignored except within string literals +2. **Case sensitivity**: + - Operators (`AND`, `OR`, `LIKE`, etc.) are case-insensitive + - Field names and string values are case-sensitive +3. **Comments**: Not supported in initial version (can be added later) +4. **Table prefixes**: The parser may support optional table name prefixes (e.g., `documents.metadata.foo`) if needed diff --git a/packages/server/src/query-parser/query-parser.lexer.ts b/packages/server/src/query-parser/query-parser.lexer.ts new file mode 100644 index 0000000..aa92156 --- /dev/null +++ b/packages/server/src/query-parser/query-parser.lexer.ts @@ -0,0 +1,202 @@ +import type { Token } from './query-parser.types.ts'; + +class Lexer { + #input: string; + #position = 0; + #tokens: Token[] = []; + + constructor(input: string) { + this.#input = input; + } + + #skipWhitespace = (): void => { + while (this.#position < this.#input.length && /\s/.test(this.#input[this.#position])) { + this.#position++; + } + }; + + #nextToken = (): Token | null => { + const char = this.#input[this.#position]; + const startPosition = this.#position; + + // Single character tokens + if (char === '(') { + this.#position++; + return { type: 'LPAREN', value: '(', position: startPosition }; + } + if (char === ')') { + this.#position++; + return { type: 'RPAREN', value: ')', position: startPosition }; + } + if (char === ',') { + this.#position++; + return { type: 'COMMA', value: ',', position: startPosition }; + } + if (char === '.') { + this.#position++; + return { type: 'DOT', value: '.', position: startPosition }; + } + + // Two-character operators + if (char === '!' && this.#input[this.#position + 1] === '=') { + this.#position += 2; + return { type: 'NOT_EQUALS', value: '!=', position: startPosition }; + } + if (char === '>' && this.#input[this.#position + 1] === '=') { + this.#position += 2; + return { type: 'GREATER_THAN_OR_EQUAL', value: '>=', position: startPosition }; + } + if (char === '<' && this.#input[this.#position + 1] === '=') { + this.#position += 2; + return { type: 'LESS_THAN_OR_EQUAL', value: '<=', position: startPosition }; + } + + // Single character operators + if (char === '=') { + this.#position++; + return { type: 'EQUALS', value: '=', position: startPosition }; + } + if (char === '>') { + this.#position++; + return { type: 'GREATER_THAN', value: '>', position: startPosition }; + } + if (char === '<') { + this.#position++; + return { type: 'LESS_THAN', value: '<', position: startPosition }; + } + + // String literal + if (char === "'") { + return this.#readString(); + } + + // Number + if (/[0-9]/.test(char) || (char === '-' && /[0-9]/.test(this.#input[this.#position + 1]))) { + return this.#readNumber(); + } + + // Identifier or keyword + if (/[a-zA-Z_]/.test(char)) { + return this.#readIdentifierOrKeyword(); + } + + throw new Error(`Unexpected character '${char}' at position ${this.#position}`); + }; + + #readString = (): Token => { + const startPosition = this.#position; + this.#position++; // Skip opening quote + let value = ''; + + while (this.#position < this.#input.length) { + const char = this.#input[this.#position]; + if (char === "'") { + // Check for escaped quote + if (this.#input[this.#position + 1] === "'") { + value += "'"; + this.#position += 2; + } else { + this.#position++; // Skip closing quote + return { type: 'STRING', value, position: startPosition }; + } + } else { + value += char; + this.#position++; + } + } + + throw new Error(`Unterminated string starting at position ${startPosition}`); + }; + + #readNumber = (): Token => { + const startPosition = this.#position; + let value = ''; + + // Optional minus sign + if (this.#input[this.#position] === '-') { + value += '-'; + this.#position++; + } + + // Integer part + while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) { + value += this.#input[this.#position]; + this.#position++; + } + + // Decimal part + if (this.#input[this.#position] === '.' && /[0-9]/.test(this.#input[this.#position + 1])) { + value += '.'; + this.#position++; + while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) { + value += this.#input[this.#position]; + this.#position++; + } + } + + // Scientific notation + if (this.#input[this.#position] === 'e' || this.#input[this.#position] === 'E') { + value += this.#input[this.#position]; + this.#position++; + if (this.#input[this.#position] === '+' || this.#input[this.#position] === '-') { + value += this.#input[this.#position]; + this.#position++; + } + while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) { + value += this.#input[this.#position]; + this.#position++; + } + } + + return { type: 'NUMBER', value, position: startPosition }; + }; + + #readIdentifierOrKeyword = (): Token => { + const startPosition = this.#position; + let value = ''; + + while (this.#position < this.#input.length && /[a-zA-Z0-9_]/.test(this.#input[this.#position])) { + value += this.#input[this.#position]; + this.#position++; + } + + const upperValue = value.toUpperCase(); + + // Keywords + switch (upperValue) { + case 'AND': + return { type: 'AND', value, position: startPosition }; + case 'OR': + return { type: 'OR', value, position: startPosition }; + case 'LIKE': + return { type: 'LIKE', value, position: startPosition }; + case 'NOT': + return { type: 'NOT', value, position: startPosition }; + case 'IN': + return { type: 'IN', value, position: startPosition }; + case 'IS': + return { type: 'IS', value, position: startPosition }; + case 'NULL': + return { type: 'NULL', value, position: startPosition }; + default: + return { type: 'IDENTIFIER', value, position: startPosition }; + } + }; + + public tokenize = (): Token[] => { + while (this.#position < this.#input.length) { + this.#skipWhitespace(); + if (this.#position >= this.#input.length) break; + + const token = this.#nextToken(); + if (token) { + this.#tokens.push(token); + } + } + + this.#tokens.push({ type: 'EOF', value: '', position: this.#position }); + return this.#tokens; + }; +} + +export { Lexer }; diff --git a/packages/server/src/query-parser/query-parser.parser.ts b/packages/server/src/query-parser/query-parser.parser.ts new file mode 100644 index 0000000..2b6ad1d --- /dev/null +++ b/packages/server/src/query-parser/query-parser.parser.ts @@ -0,0 +1,317 @@ +import { Lexer } from './query-parser.lexer.ts'; +import type { Token, TokenType } from './query-parser.types.ts'; + +import type { QueryConditionText, QueryConditionNumber, QueryFilter, QueryCondition } from '#root/utils/utils.query.ts'; + +class Parser { + #tokens: Token[] = []; + #position = 0; + + #current = (): Token => { + return this.#tokens[this.#position]; + }; + + #advance = (): Token => { + const token = this.#current(); + this.#position++; + return token; + }; + + #expect = (type: TokenType): Token => { + const token = this.#current(); + if (token.type !== type) { + throw new Error(`Expected ${type} but got ${token.type} at position ${token.position}`); + } + return this.#advance(); + }; + + #parseExpression = (): QueryFilter => { + return this.#parseOr(); + }; + + #parseOr = (): QueryFilter => { + let left = this.#parseAnd(); + + while (this.#current().type === 'OR') { + this.#advance(); + const right = this.#parseAnd(); + left = this.#combineWithOperator(left, right, 'or'); + } + + return left; + }; + + #parseAnd = (): QueryFilter => { + let left = this.#parsePrimary(); + + while (this.#current().type === 'AND') { + this.#advance(); + const right = this.#parsePrimary(); + left = this.#combineWithOperator(left, right, 'and'); + } + + return left; + }; + + #combineWithOperator = (left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter => { + // If left is already an operator of the same type, add to its conditions + if (left.type === 'operator' && left.operator === operator) { + return { + type: 'operator', + operator, + conditions: [...left.conditions, right], + }; + } + + return { + type: 'operator', + operator, + conditions: [left, right], + }; + }; + + #parsePrimary = (): QueryFilter => { + // Handle parenthesized expressions + if (this.#current().type === 'LPAREN') { + this.#advance(); + const expr = this.#parseExpression(); + this.#expect('RPAREN'); + return expr; + } + + // Must be a condition + return this.#parseCondition(); + }; + + #parseCondition = (): QueryCondition => { + const field = this.#parseField(); + + const token = this.#current(); + + // IS NULL / IS NOT NULL + if (token.type === 'IS') { + this.#advance(); + const isNot = this.#current().type === 'NOT'; + if (isNot) { + this.#advance(); + } + this.#expect('NULL'); + + // IS NULL / IS NOT NULL could be either text or number - default to text + return { + type: 'text', + field, + conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null }, + } satisfies QueryConditionText; + } + + // NOT IN / NOT LIKE + if (token.type === 'NOT') { + this.#advance(); + const nextToken = this.#current(); + + if (nextToken.type === 'IN') { + this.#advance(); + return this.#parseInCondition(field, true); + } + + if (nextToken.type === 'LIKE') { + this.#advance(); + const pattern = this.#expect('STRING').value; + return { + type: 'text', + field, + conditions: { notLike: pattern }, + }; + } + + throw new Error(`Expected IN or LIKE after NOT at position ${nextToken.position}`); + } + + // IN + if (token.type === 'IN') { + this.#advance(); + return this.#parseInCondition(field, false); + } + + // LIKE + if (token.type === 'LIKE') { + this.#advance(); + const pattern = this.#expect('STRING').value; + return { + type: 'text', + field, + conditions: { like: pattern }, + }; + } + + // Comparison operators + if (token.type === 'EQUALS') { + this.#advance(); + return this.#parseValueCondition(field, 'equals'); + } + + if (token.type === 'NOT_EQUALS') { + this.#advance(); + return this.#parseValueCondition(field, 'notEquals'); + } + + if (token.type === 'GREATER_THAN') { + this.#advance(); + const value = this.#parseNumber(); + return { + type: 'number', + field, + conditions: { greaterThan: value }, + }; + } + + if (token.type === 'GREATER_THAN_OR_EQUAL') { + this.#advance(); + const value = this.#parseNumber(); + return { + type: 'number', + field, + conditions: { greaterThanOrEqual: value }, + }; + } + + if (token.type === 'LESS_THAN') { + this.#advance(); + const value = this.#parseNumber(); + return { + type: 'number', + field, + conditions: { lessThan: value }, + }; + } + + if (token.type === 'LESS_THAN_OR_EQUAL') { + this.#advance(); + const value = this.#parseNumber(); + return { + type: 'number', + field, + conditions: { lessThanOrEqual: value }, + }; + } + + throw new Error(`Unexpected token '${token.value}' at position ${token.position}`); + }; + + #parseField = (): string[] => { + const parts: string[] = []; + parts.push(this.#expect('IDENTIFIER').value); + + while (this.#current().type === 'DOT') { + this.#advance(); + parts.push(this.#expect('IDENTIFIER').value); + } + + return parts; + }; + + #parseValueCondition = (field: string[], operator: 'equals' | 'notEquals'): QueryCondition => { + const token = this.#current(); + + if (token.type === 'STRING') { + this.#advance(); + const textCondition: QueryConditionText = { + type: 'text', + field, + conditions: operator === 'equals' ? { equal: token.value } : { notEqual: token.value }, + }; + return textCondition; + } + + if (token.type === 'NUMBER') { + this.#advance(); + const value = parseFloat(token.value); + const numCondition: QueryConditionNumber = { + type: 'number', + field, + conditions: operator === 'equals' ? { equals: value } : { notEquals: value }, + }; + return numCondition; + } + + if (token.type === 'NULL') { + this.#advance(); + // NULL equality - default to text type + return { + type: 'text', + field, + conditions: operator === 'equals' ? { equal: null } : {}, + } as QueryConditionText; + } + + throw new Error(`Expected value but got ${token.type} at position ${token.position}`); + }; + + #parseNumber = (): number => { + const token = this.#expect('NUMBER'); + return parseFloat(token.value); + }; + + #parseInCondition = (field: string[], isNot: boolean): QueryCondition => { + this.#expect('LPAREN'); + + const firstToken = this.#current(); + + if (firstToken.type === 'STRING') { + // Text IN + const values: string[] = []; + values.push(this.#advance().value); + + while (this.#current().type === 'COMMA') { + this.#advance(); + values.push(this.#expect('STRING').value); + } + + this.#expect('RPAREN'); + + return { + type: 'text', + field, + conditions: isNot ? { notIn: values } : { in: values }, + }; + } + + if (firstToken.type === 'NUMBER') { + // Numeric IN + const values: number[] = []; + values.push(parseFloat(this.#advance().value)); + + while (this.#current().type === 'COMMA') { + this.#advance(); + values.push(parseFloat(this.#expect('NUMBER').value)); + } + + this.#expect('RPAREN'); + + return { + type: 'number', + field, + conditions: isNot ? { notIn: values } : { in: values }, + }; + } + + throw new Error(`Expected STRING or NUMBER in IN list at position ${firstToken.position}`); + }; + + public parse(input: string): QueryFilter { + const lexer = new Lexer(input); + this.#tokens = lexer.tokenize(); + this.#position = 0; + + const result = this.#parseExpression(); + + if (this.#current().type !== 'EOF') { + throw new Error(`Unexpected token '${this.#current().value}' at position ${this.#current().position}`); + } + + return result; + } +} + +export { Parser }; diff --git a/packages/server/src/query-parser/query-parser.stringifier.ts b/packages/server/src/query-parser/query-parser.stringifier.ts new file mode 100644 index 0000000..f6142e3 --- /dev/null +++ b/packages/server/src/query-parser/query-parser.stringifier.ts @@ -0,0 +1,135 @@ +import type { + QueryFilter, + QueryOperator, + QueryCondition, + QueryConditionText, + QueryConditionNumber, +} from '#root/utils/utils.query.ts'; + +class Stringifier { + #stringifyFilter = (filter: QueryFilter, needsParens: boolean): string => { + if (filter.type === 'operator') { + return this.#stringifyOperator(filter, needsParens); + } + return this.#stringifyCondition(filter); + }; + + #stringifyOperator = (op: QueryOperator, needsParens: boolean): string => { + if (op.conditions.length === 0) { + return ''; + } + + if (op.conditions.length === 1) { + return this.#stringifyFilter(op.conditions[0], needsParens); + } + + const operator = op.operator.toUpperCase(); + const parts = op.conditions.map((condition) => { + // Child operators need parens if they have a different operator + const childNeedsParens = condition.type === 'operator' && condition.operator !== op.operator; + return this.#stringifyFilter(condition, childNeedsParens); + }); + + const result = parts.join(` ${operator} `); + + return needsParens ? `(${result})` : result; + }; + + #stringifyCondition = (condition: QueryCondition): string => { + const fieldStr = condition.field.join('.'); + + if (condition.type === 'text') { + return this.#stringifyTextCondition(fieldStr, condition.conditions); + } + + return this.#stringifyNumberCondition(fieldStr, condition.conditions); + }; + + #stringifyTextCondition = (field: string, conditions: QueryConditionText['conditions']): string => { + if (conditions.equal !== undefined) { + if (conditions.equal === null) { + return `${field} IS NULL`; + } + return `${field} = ${this.#escapeString(conditions.equal)}`; + } + + if (conditions.notEqual !== undefined) { + return `${field} != ${this.#escapeString(conditions.notEqual)}`; + } + + if (conditions.like !== undefined) { + return `${field} LIKE ${this.#escapeString(conditions.like)}`; + } + + if (conditions.notLike !== undefined) { + return `${field} NOT LIKE ${this.#escapeString(conditions.notLike)}`; + } + + if (conditions.in !== undefined) { + const values = conditions.in.map((v) => this.#escapeString(v)).join(', '); + return `${field} IN (${values})`; + } + + if (conditions.notIn !== undefined) { + const values = conditions.notIn.map((v) => this.#escapeString(v)).join(', '); + return `${field} NOT IN (${values})`; + } + + throw new Error('Invalid text condition: no condition specified'); + }; + + #stringifyNumberCondition = (field: string, conditions: QueryConditionNumber['conditions']): string => { + if (conditions.equals !== undefined) { + if (conditions.equals === null) { + return `${field} IS NULL`; + } + return `${field} = ${conditions.equals}`; + } + + if (conditions.notEquals !== undefined) { + if (conditions.notEquals === null) { + return `${field} IS NOT NULL`; + } + return `${field} != ${conditions.notEquals}`; + } + + if (conditions.greaterThan !== undefined) { + return `${field} > ${conditions.greaterThan}`; + } + + if (conditions.greaterThanOrEqual !== undefined) { + return `${field} >= ${conditions.greaterThanOrEqual}`; + } + + if (conditions.lessThan !== undefined) { + return `${field} < ${conditions.lessThan}`; + } + + if (conditions.lessThanOrEqual !== undefined) { + return `${field} <= ${conditions.lessThanOrEqual}`; + } + + if (conditions.in !== undefined) { + const values = conditions.in.join(', '); + return `${field} IN (${values})`; + } + + if (conditions.notIn !== undefined) { + const values = conditions.notIn.join(', '); + return `${field} NOT IN (${values})`; + } + + throw new Error('Invalid number condition: no condition specified'); + }; + + #escapeString = (value: string): string => { + const escaped = value.replace(/'/g, "''"); + return `'${escaped}'`; + }; + + public stringify = (filter: QueryFilter): string => { + return this.#stringifyFilter(filter, false); + }; +} + +export { Stringifier }; diff --git a/packages/server/src/query-parser/query-parser.test.ts b/packages/server/src/query-parser/query-parser.test.ts new file mode 100644 index 0000000..91a0851 --- /dev/null +++ b/packages/server/src/query-parser/query-parser.test.ts @@ -0,0 +1,754 @@ +import { describe, it, expect } from 'vitest'; + +import { QueryParser } from './query-parser.ts'; + +import type { QueryConditionNumber, QueryConditionText, QueryFilter, QueryOperator } from '#root/utils/utils.query.ts'; + +describe('QueryParser', () => { + const parser = new QueryParser(); + + describe('parse', () => { + describe('text conditions', () => { + it('should parse simple text equality', () => { + const result = parser.parse("name = 'John'"); + expect(result).toEqual({ + type: 'text', + field: ['name'], + conditions: { equal: 'John' }, + }); + }); + + it('should parse nested field text equality', () => { + const result = parser.parse("metadata.author = 'John'"); + expect(result).toEqual({ + type: 'text', + field: ['metadata', 'author'], + conditions: { equal: 'John' }, + }); + }); + + it('should parse deeply nested field', () => { + const result = parser.parse("metadata.nested.deep.field = 'value'"); + expect(result).toEqual({ + type: 'text', + field: ['metadata', 'nested', 'deep', 'field'], + conditions: { equal: 'value' }, + }); + }); + + it('should parse text not equal', () => { + const result = parser.parse("type != 'draft'"); + expect(result).toEqual({ + type: 'text', + field: ['type'], + conditions: { notEqual: 'draft' }, + }); + }); + + it('should parse LIKE pattern', () => { + const result = parser.parse("title LIKE '%cat%'"); + expect(result).toEqual({ + type: 'text', + field: ['title'], + conditions: { like: '%cat%' }, + }); + }); + + it('should parse NOT LIKE pattern', () => { + const result = parser.parse("author NOT LIKE '%admin%'"); + expect(result).toEqual({ + type: 'text', + field: ['author'], + conditions: { notLike: '%admin%' }, + }); + }); + + it('should parse text IN list', () => { + const result = parser.parse("status IN ('published', 'archived', 'draft')"); + expect(result).toEqual({ + type: 'text', + field: ['status'], + conditions: { in: ['published', 'archived', 'draft'] }, + }); + }); + + it('should parse text NOT IN list', () => { + const result = parser.parse("category NOT IN ('deleted', 'hidden')"); + expect(result).toEqual({ + type: 'text', + field: ['category'], + conditions: { notIn: ['deleted', 'hidden'] }, + }); + }); + + it('should parse IS NULL', () => { + const result = parser.parse('source IS NULL'); + expect(result).toEqual({ + type: 'text', + field: ['source'], + conditions: { equal: null }, + }); + }); + + it('should handle escaped quotes in strings', () => { + const result = parser.parse("name = 'O''Brien'"); + expect(result).toEqual({ + type: 'text', + field: ['name'], + conditions: { equal: "O'Brien" }, + }); + }); + + it('should handle empty string', () => { + const result = parser.parse("name = ''"); + expect(result).toEqual({ + type: 'text', + field: ['name'], + conditions: { equal: '' }, + }); + }); + }); + + describe('numeric conditions', () => { + it('should parse numeric equality', () => { + const result = parser.parse('age = 30'); + expect(result).toEqual({ + type: 'number', + field: ['age'], + conditions: { equals: 30 }, + }); + }); + + it('should parse numeric not equal', () => { + const result = parser.parse('count != 0'); + expect(result).toEqual({ + type: 'number', + field: ['count'], + conditions: { notEquals: 0 }, + }); + }); + + it('should parse greater than', () => { + const result = parser.parse('views > 100'); + expect(result).toEqual({ + type: 'number', + field: ['views'], + conditions: { greaterThan: 100 }, + }); + }); + + it('should parse greater than or equal', () => { + const result = parser.parse('views >= 100'); + expect(result).toEqual({ + type: 'number', + field: ['views'], + conditions: { greaterThanOrEqual: 100 }, + }); + }); + + it('should parse less than', () => { + const result = parser.parse('priority < 5'); + expect(result).toEqual({ + type: 'number', + field: ['priority'], + conditions: { lessThan: 5 }, + }); + }); + + it('should parse less than or equal', () => { + const result = parser.parse('age <= 65'); + expect(result).toEqual({ + type: 'number', + field: ['age'], + conditions: { lessThanOrEqual: 65 }, + }); + }); + + it('should parse decimal numbers', () => { + const result = parser.parse('score > 0.5'); + expect(result).toEqual({ + type: 'number', + field: ['score'], + conditions: { greaterThan: 0.5 }, + }); + }); + + it('should parse negative numbers', () => { + const result = parser.parse('temperature > -10'); + expect(result).toEqual({ + type: 'number', + field: ['temperature'], + conditions: { greaterThan: -10 }, + }); + }); + + it('should parse numeric IN list', () => { + const result = parser.parse('priority IN (1, 2, 3)'); + expect(result).toEqual({ + type: 'number', + field: ['priority'], + conditions: { in: [1, 2, 3] }, + }); + }); + + it('should parse numeric NOT IN list', () => { + const result = parser.parse('count NOT IN (0, -1)'); + expect(result).toEqual({ + type: 'number', + field: ['count'], + conditions: { notIn: [0, -1] }, + }); + }); + + it('should parse nested field numeric condition', () => { + const result = parser.parse('metadata.score >= 0.8'); + expect(result).toEqual({ + type: 'number', + field: ['metadata', 'score'], + conditions: { greaterThanOrEqual: 0.8 }, + }); + }); + }); + + describe('logical operators', () => { + it('should parse AND operator', () => { + const result = parser.parse("type = 'article' AND status = 'published'"); + expect(result).toEqual({ + type: 'operator', + operator: 'and', + conditions: [ + { type: 'text', field: ['type'], conditions: { equal: 'article' } }, + { type: 'text', field: ['status'], conditions: { equal: 'published' } }, + ], + }); + }); + + it('should parse OR operator', () => { + const result = parser.parse("category = 'tech' OR category = 'science'"); + expect(result).toEqual({ + type: 'operator', + operator: 'or', + conditions: [ + { type: 'text', field: ['category'], conditions: { equal: 'tech' } }, + { type: 'text', field: ['category'], conditions: { equal: 'science' } }, + ], + }); + }); + + it('should parse multiple AND conditions', () => { + const result = parser.parse("type = 'article' AND status = 'published' AND views > 100"); + expect(result).toEqual({ + type: 'operator', + operator: 'and', + conditions: [ + { type: 'text', field: ['type'], conditions: { equal: 'article' } }, + { type: 'text', field: ['status'], conditions: { equal: 'published' } }, + { type: 'number', field: ['views'], conditions: { greaterThan: 100 } }, + ], + }); + }); + + it('should parse multiple OR conditions', () => { + const result = parser.parse("type = 'a' OR type = 'b' OR type = 'c'"); + expect(result).toEqual({ + type: 'operator', + operator: 'or', + conditions: [ + { type: 'text', field: ['type'], conditions: { equal: 'a' } }, + { type: 'text', field: ['type'], conditions: { equal: 'b' } }, + { type: 'text', field: ['type'], conditions: { equal: 'c' } }, + ], + }); + }); + + it('should respect AND precedence over OR', () => { + // A AND B OR C should be parsed as (A AND B) OR C + const result = parser.parse("a = '1' AND b = '2' OR c = '3'"); + expect(result).toEqual({ + type: 'operator', + operator: 'or', + conditions: [ + { + type: 'operator', + operator: 'and', + conditions: [ + { type: 'text', field: ['a'], conditions: { equal: '1' } }, + { type: 'text', field: ['b'], conditions: { equal: '2' } }, + ], + }, + { type: 'text', field: ['c'], conditions: { equal: '3' } }, + ], + }); + }); + + it('should parse parenthesized expressions', () => { + const result = parser.parse("(type = 'post' OR type = 'page') AND views > 100"); + expect(result).toEqual({ + type: 'operator', + operator: 'and', + conditions: [ + { + type: 'operator', + operator: 'or', + conditions: [ + { type: 'text', field: ['type'], conditions: { equal: 'post' } }, + { type: 'text', field: ['type'], conditions: { equal: 'page' } }, + ], + }, + { type: 'number', field: ['views'], conditions: { greaterThan: 100 } }, + ], + }); + }); + + it('should parse nested parentheses', () => { + const result = parser.parse( + "((status = 'active' AND views > 100) OR (status = 'featured' AND views > 50)) AND category = 'news'", + ); + expect(result).toEqual({ + type: 'operator', + operator: 'and', + conditions: [ + { + type: 'operator', + operator: 'or', + conditions: [ + { + type: 'operator', + operator: 'and', + conditions: [ + { type: 'text', field: ['status'], conditions: { equal: 'active' } }, + { type: 'number', field: ['views'], conditions: { greaterThan: 100 } }, + ], + }, + { + type: 'operator', + operator: 'and', + conditions: [ + { type: 'text', field: ['status'], conditions: { equal: 'featured' } }, + { type: 'number', field: ['views'], conditions: { greaterThan: 50 } }, + ], + }, + ], + }, + { type: 'text', field: ['category'], conditions: { equal: 'news' } }, + ], + }); + }); + }); + + describe('case insensitivity', () => { + it('should parse lowercase AND', () => { + const result = parser.parse("a = '1' and b = '2'"); + expect(result.type).toBe('operator'); + expect((result as QueryOperator).operator).toBe('and'); + }); + + it('should parse lowercase OR', () => { + const result = parser.parse("a = '1' or b = '2'"); + expect(result.type).toBe('operator'); + expect((result as QueryOperator).operator).toBe('or'); + }); + + it('should parse mixed case LIKE', () => { + const result = parser.parse("title Like '%test%'"); + expect(result).toEqual({ + type: 'text', + field: ['title'], + conditions: { like: '%test%' }, + }); + }); + + it('should parse mixed case IS NULL', () => { + const result = parser.parse('field Is Null'); + expect(result).toEqual({ + type: 'text', + field: ['field'], + conditions: { equal: null }, + }); + }); + + it('should parse mixed case IN', () => { + const result = parser.parse("status In ('a', 'b')"); + expect(result).toEqual({ + type: 'text', + field: ['status'], + conditions: { in: ['a', 'b'] }, + }); + }); + }); + + describe('whitespace handling', () => { + it('should handle extra whitespace', () => { + const result = parser.parse(" name = 'John' "); + expect(result).toEqual({ + type: 'text', + field: ['name'], + conditions: { equal: 'John' }, + }); + }); + + it('should handle no whitespace around operators', () => { + const result = parser.parse("name='John'"); + expect(result).toEqual({ + type: 'text', + field: ['name'], + conditions: { equal: 'John' }, + }); + }); + + it('should handle tabs and newlines', () => { + const result = parser.parse("name\t=\n'John'"); + expect(result).toEqual({ + type: 'text', + field: ['name'], + conditions: { equal: 'John' }, + }); + }); + }); + + describe('error handling', () => { + it('should throw on invalid syntax', () => { + expect(() => parser.parse('invalid')).toThrow(); + }); + + it('should throw on mismatched parentheses', () => { + expect(() => parser.parse("(type = 'a'")).toThrow(); + }); + + it('should throw on unterminated string', () => { + expect(() => parser.parse("name = 'unterminated")).toThrow(/Unterminated string/); + }); + + it('should throw on unexpected token', () => { + expect(() => parser.parse("name = 'a' INVALID")).toThrow(); + }); + + it('should throw on missing value after operator', () => { + expect(() => parser.parse('name =')).toThrow(); + }); + }); + }); + + describe('stringify', () => { + describe('text conditions', () => { + it('should stringify text equality', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['name'], + conditions: { equal: 'John' }, + }; + expect(parser.stringify(filter)).toBe("name = 'John'"); + }); + + it('should stringify nested field', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['metadata', 'author'], + conditions: { equal: 'John' }, + }; + expect(parser.stringify(filter)).toBe("metadata.author = 'John'"); + }); + + it('should stringify text not equal', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['type'], + conditions: { notEqual: 'draft' }, + }; + expect(parser.stringify(filter)).toBe("type != 'draft'"); + }); + + it('should stringify LIKE', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['title'], + conditions: { like: '%cat%' }, + }; + expect(parser.stringify(filter)).toBe("title LIKE '%cat%'"); + }); + + it('should stringify NOT LIKE', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['author'], + conditions: { notLike: '%admin%' }, + }; + expect(parser.stringify(filter)).toBe("author NOT LIKE '%admin%'"); + }); + + it('should stringify text IN', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['status'], + conditions: { in: ['published', 'archived'] }, + }; + expect(parser.stringify(filter)).toBe("status IN ('published', 'archived')"); + }); + + it('should stringify text NOT IN', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['category'], + conditions: { notIn: ['deleted', 'hidden'] }, + }; + expect(parser.stringify(filter)).toBe("category NOT IN ('deleted', 'hidden')"); + }); + + it('should stringify IS NULL', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['source'], + conditions: { equal: null }, + }; + expect(parser.stringify(filter)).toBe('source IS NULL'); + }); + + it('should escape quotes in strings', () => { + const filter: QueryConditionText = { + type: 'text', + field: ['name'], + conditions: { equal: "O'Brien" }, + }; + expect(parser.stringify(filter)).toBe("name = 'O''Brien'"); + }); + }); + + describe('numeric conditions', () => { + it('should stringify numeric equality', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['age'], + conditions: { equals: 30 }, + }; + expect(parser.stringify(filter)).toBe('age = 30'); + }); + + it('should stringify numeric not equal', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['count'], + conditions: { notEquals: 0 }, + }; + expect(parser.stringify(filter)).toBe('count != 0'); + }); + + it('should stringify greater than', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['views'], + conditions: { greaterThan: 100 }, + }; + expect(parser.stringify(filter)).toBe('views > 100'); + }); + + it('should stringify greater than or equal', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['views'], + conditions: { greaterThanOrEqual: 100 }, + }; + expect(parser.stringify(filter)).toBe('views >= 100'); + }); + + it('should stringify less than', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['priority'], + conditions: { lessThan: 5 }, + }; + expect(parser.stringify(filter)).toBe('priority < 5'); + }); + + it('should stringify less than or equal', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['age'], + conditions: { lessThanOrEqual: 65 }, + }; + expect(parser.stringify(filter)).toBe('age <= 65'); + }); + + it('should stringify decimal numbers', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['score'], + conditions: { greaterThan: 0.5 }, + }; + expect(parser.stringify(filter)).toBe('score > 0.5'); + }); + + it('should stringify numeric IN', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['priority'], + conditions: { in: [1, 2, 3] }, + }; + expect(parser.stringify(filter)).toBe('priority IN (1, 2, 3)'); + }); + + it('should stringify numeric NOT IN', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['count'], + conditions: { notIn: [0, -1] }, + }; + expect(parser.stringify(filter)).toBe('count NOT IN (0, -1)'); + }); + + it('should stringify numeric IS NULL', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['score'], + conditions: { equals: null }, + }; + expect(parser.stringify(filter)).toBe('score IS NULL'); + }); + + it('should stringify numeric IS NOT NULL', () => { + const filter: QueryConditionNumber = { + type: 'number', + field: ['score'], + conditions: { notEquals: null }, + }; + expect(parser.stringify(filter)).toBe('score IS NOT NULL'); + }); + }); + + describe('logical operators', () => { + it('should stringify AND operator', () => { + const filter: QueryFilter = { + type: 'operator', + operator: 'and', + conditions: [ + { type: 'text', field: ['type'], conditions: { equal: 'article' } }, + { type: 'text', field: ['status'], conditions: { equal: 'published' } }, + ], + }; + expect(parser.stringify(filter)).toBe("type = 'article' AND status = 'published'"); + }); + + it('should stringify OR operator', () => { + const filter: QueryFilter = { + type: 'operator', + operator: 'or', + conditions: [ + { type: 'text', field: ['category'], conditions: { equal: 'tech' } }, + { type: 'text', field: ['category'], conditions: { equal: 'science' } }, + ], + }; + expect(parser.stringify(filter)).toBe("category = 'tech' OR category = 'science'"); + }); + + it('should stringify nested operators with parentheses', () => { + const filter: QueryFilter = { + type: 'operator', + operator: 'and', + conditions: [ + { + type: 'operator', + operator: 'or', + conditions: [ + { type: 'text', field: ['type'], conditions: { equal: 'post' } }, + { type: 'text', field: ['type'], conditions: { equal: 'page' } }, + ], + }, + { type: 'number', field: ['views'], conditions: { greaterThan: 100 } }, + ], + }; + expect(parser.stringify(filter)).toBe("(type = 'post' OR type = 'page') AND views > 100"); + }); + + it('should stringify empty operator', () => { + const filter: QueryFilter = { + type: 'operator', + operator: 'and', + conditions: [], + }; + expect(parser.stringify(filter)).toBe(''); + }); + + it('should stringify single-condition operator', () => { + const filter: QueryFilter = { + type: 'operator', + operator: 'and', + conditions: [{ type: 'text', field: ['name'], conditions: { equal: 'test' } }], + }; + expect(parser.stringify(filter)).toBe("name = 'test'"); + }); + }); + }); + + describe('roundtrip', () => { + const testCases = [ + "name = 'John'", + "metadata.author = 'Jane'", + 'views > 100', + 'score >= 0.5', + "title LIKE '%cat%'", + "author NOT LIKE '%admin%'", + "status IN ('published', 'archived')", + 'priority IN (1, 2, 3)', + "type = 'article' AND status = 'published'", + "category = 'tech' OR category = 'science'", + "(type = 'post' OR type = 'page') AND views > 100", + ]; + + testCases.forEach((query) => { + it(`should roundtrip: ${query}`, () => { + const parsed = parser.parse(query); + const stringified = parser.stringify(parsed); + const reparsed = parser.parse(stringified); + expect(reparsed).toEqual(parsed); + }); + }); + }); + + describe('complex real-world queries', () => { + it('should handle complex query with multiple field types', () => { + const query = "type = 'article' AND (metadata.author = 'John' OR metadata.author = 'Jane') AND views >= 100"; + const result = parser.parse(query); + + expect(result.type).toBe('operator'); + const operator = result as QueryOperator; + expect(operator.operator).toBe('and'); + expect(operator.conditions).toHaveLength(3); + }); + + it('should handle nested JSON paths with conditions', () => { + const query = "metadata.nested.deep.value = 'test' AND metadata.nested.count > 10"; + const result = parser.parse(query); + + expect(result.type).toBe('operator'); + const operator = result as QueryOperator; + const condition1 = operator.conditions[0] as QueryConditionText; + const condition2 = operator.conditions[1] as QueryConditionNumber; + expect(condition1.field).toEqual(['metadata', 'nested', 'deep', 'value']); + expect(condition2.field).toEqual(['metadata', 'nested', 'count']); + }); + + it('should handle query from documentation example', () => { + // From the JSON format in docs + const expectedJson: QueryFilter = { + type: 'operator', + operator: 'and', + conditions: [ + { + type: 'text', + field: ['metadata', 'foo'], + conditions: { equal: 'bar' }, + }, + { + type: 'text', + field: ['type'], + conditions: { equal: 'demo' }, + }, + ], + }; + + const sql = "metadata.foo = 'bar' AND type = 'demo'"; + const parsed = parser.parse(sql); + + expect(parsed).toEqual(expectedJson); + }); + }); +}); diff --git a/packages/server/src/query-parser/query-parser.ts b/packages/server/src/query-parser/query-parser.ts new file mode 100644 index 0000000..de02786 --- /dev/null +++ b/packages/server/src/query-parser/query-parser.ts @@ -0,0 +1,19 @@ +import { Stringifier } from './query-parser.stringifier.ts'; +import { Parser } from './query-parser.parser.ts'; + +import type { QueryFilter } from '#root/utils/utils.query.ts'; + +class QueryParser { + private parser = new Parser(); + private stringifier = new Stringifier(); + + public parse = (input: string): QueryFilter => { + return this.parser.parse(input); + }; + + public stringify = (filter: QueryFilter): string => { + return this.stringifier.stringify(filter); + }; +} + +export { QueryParser }; diff --git a/packages/server/src/query-parser/query-parser.types.ts b/packages/server/src/query-parser/query-parser.types.ts new file mode 100644 index 0000000..c29ce4f --- /dev/null +++ b/packages/server/src/query-parser/query-parser.types.ts @@ -0,0 +1,30 @@ +type TokenType = + | 'IDENTIFIER' + | 'STRING' + | 'NUMBER' + | 'AND' + | 'OR' + | 'LIKE' + | 'NOT' + | 'IN' + | 'IS' + | 'NULL' + | 'EQUALS' + | 'NOT_EQUALS' + | 'GREATER_THAN' + | 'GREATER_THAN_OR_EQUAL' + | 'LESS_THAN' + | 'LESS_THAN_OR_EQUAL' + | 'LPAREN' + | 'RPAREN' + | 'COMMA' + | 'DOT' + | 'EOF'; + +type Token = { + type: TokenType; + value: string; + position: number; +}; + +export type { TokenType, Token }; diff --git a/packages/server/src/services/document-chunks/document-chunks.schemas.ts b/packages/server/src/services/document-chunks/document-chunks.schemas.ts index 20ba079..81e344d 100644 --- a/packages/server/src/services/document-chunks/document-chunks.schemas.ts +++ b/packages/server/src/services/document-chunks/document-chunks.schemas.ts @@ -16,7 +16,7 @@ const documentChunkFilterSchema = z.object({ limit: z.number().default(20), offset: z.number().default(0), semanticText: z.string().optional(), - conditions: queryFilterSchema.optional(), + conditions: z.union([queryFilterSchema, z.string()]).optional(), }); type DocumentChunkFilter = z.infer; diff --git a/packages/server/src/services/document-chunks/document-chunks.ts b/packages/server/src/services/document-chunks/document-chunks.ts index a26aceb..1ab713d 100644 --- a/packages/server/src/services/document-chunks/document-chunks.ts +++ b/packages/server/src/services/document-chunks/document-chunks.ts @@ -8,6 +8,7 @@ import type { Services } from '#root/utils/utils.services.ts'; import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts'; import type { ExplicitAny } from '#root/global.js'; import { applyQueryFilter } from '#root/utils/utils.query.ts'; +import { QueryParser } from '#root/query-parser/query-parser.ts'; const baseFields = [ `${tableNames.documentChunks}.*`, @@ -43,7 +44,11 @@ class DocumentChunksService { query = query.orderBy('createdAt', 'desc'); } if (filter.conditions) { - query = applyQueryFilter(query, filter.conditions); + const parser = this.#services.get(QueryParser); + query = applyQueryFilter( + query, + typeof filter.conditions === 'string' ? parser.parse(filter.conditions) : filter.conditions, + ); } query = query.limit(filter.limit).offset(filter.offset); diff --git a/packages/server/src/services/documents/documents.schemas.ts b/packages/server/src/services/documents/documents.schemas.ts index eeb3e51..cfd708d 100644 --- a/packages/server/src/services/documents/documents.schemas.ts +++ b/packages/server/src/services/documents/documents.schemas.ts @@ -21,18 +21,32 @@ const documentSchema = z.object({ type Document = z.infer; -const documentUpsertSchema = z.object({ - id: z.string().nullish(), - owner: z.string().nullish(), - contentType: z.string().nullish(), - content: z.string().nullish(), - source: z.string().nullish(), - sourceId: z.string().nullish(), - type: z.string(), - typeVersion: z.int().nullish(), - searchText: z.string().nullish(), - metadata: z.unknown().nullish(), -}); +const documentUpsertSchema = z + .object({ + id: z.string().nullish(), + owner: z.string().nullish(), + contentType: z.string().nullish(), + content: z.string().nullish(), + source: z.string().nullish(), + sourceId: z.string().nullish(), + type: z.string().optional(), + typeVersion: z.int().nullish(), + searchText: z.string().nullish(), + metadata: z.unknown().nullish(), + }) + .meta({ + example: { + content: 'the cat is yellow', + contentType: 'text/plain', + source: 'test', + sourceId: 'test', + type: 'raw', + metadata: { + foo: 'bar', + bar: 'baz', + }, + }, + }); type DocumentUpsert = z.infer; @@ -47,7 +61,7 @@ type DocumentUpsertResult = z.infer; const documentFilterSchema = z.object({ offset: z.number().default(0), limit: z.number().default(20), - condition: queryFilterSchema, + condition: z.union([queryFilterSchema, z.string()]), }); type DocumentFilter = z.infer; diff --git a/packages/server/src/services/documents/documents.ts b/packages/server/src/services/documents/documents.ts index c93afe4..1541676 100644 --- a/packages/server/src/services/documents/documents.ts +++ b/packages/server/src/services/documents/documents.ts @@ -14,6 +14,7 @@ import { EventEmitter } from '#root/utils/utils.event-emitter.ts'; import type { Services } from '#root/utils/utils.services.ts'; import { compareObjectKeys } from '#root/utils/utils.compare.ts'; import { applyQueryFilter } from '#root/utils/utils.query.ts'; +import { QueryParser } from '#root/query-parser/query-parser.ts'; type DocumentsServiceEvents = { upserted: (document: Document) => void; @@ -34,7 +35,11 @@ class DocumentsService extends EventEmitter { const db = await databaseService.getInstance(); let query = db(tableNames.documents); if (filter) { - query = applyQueryFilter(query, filter.condition); + const parser = this.#services.get(QueryParser); + query = applyQueryFilter( + query, + typeof filter.condition === 'string' ? parser.parse(filter.condition) : filter.condition, + ); } query = query.limit(filter.limit).offset(filter.offset); const items = await query; @@ -113,13 +118,15 @@ class DocumentsService extends EventEmitter { } as const; } else { await trx(tableNames.documents).insert({ + metadata: {}, + type: 'raw', ...document, id, createdAt: now, updatedAt: now, - metadata: document.metadata || {}, }); const resultDocument: Document = mapFromDocumentRow({ + type: 'raw', owner: null, contentType: null, content: null, diff --git a/packages/server/src/utils/utils.query.ts b/packages/server/src/utils/utils.query.ts index eb99862..34905ac 100644 --- a/packages/server/src/utils/utils.query.ts +++ b/packages/server/src/utils/utils.query.ts @@ -545,4 +545,5 @@ const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => { } }; +export type { QueryConditionText, QueryConditionNumber, QueryOperator, QueryCondition, QueryFilter }; export { applyQueryCondition, queryConditionSchema, queryFilterSchema, applyQueryFilter };