update
This commit is contained in:
@@ -1,336 +0,0 @@
|
||||
# Query Language Specification
|
||||
|
||||
This document describes the SQL-like query language syntax for building database queries. The language supports filtering on both text and numeric fields, including nested JSON fields, with logical operators for complex queries.
|
||||
|
||||
## Overview
|
||||
|
||||
The query language provides a human-readable, SQL-like syntax that can be parsed into the internal JSON query format used by the system. It supports:
|
||||
|
||||
- Text field conditions (equality, pattern matching, membership)
|
||||
- Numeric field conditions (comparison operators, membership)
|
||||
- Nested JSON field access using dot notation
|
||||
- Logical operators (AND, OR) with grouping
|
||||
- NULL value checks
|
||||
|
||||
## Syntax
|
||||
|
||||
### Field References
|
||||
|
||||
Fields are referenced using dot notation for nested JSON paths:
|
||||
|
||||
```
|
||||
field_name
|
||||
metadata.foo
|
||||
metadata.nested.deep.field
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
- `content` - top-level field
|
||||
- `metadata.author` - nested field in metadata object
|
||||
- `metadata.tags.0` - array element (if needed)
|
||||
|
||||
### Text Conditions
|
||||
|
||||
Text conditions operate on string values:
|
||||
|
||||
| Operator | Syntax | Description |
|
||||
|----------|--------|-------------|
|
||||
| Equality | `field = 'value'` | Exact match |
|
||||
| Inequality | `field != 'value'` | Not equal |
|
||||
| NULL check | `field IS NULL` | Field is null |
|
||||
| NOT NULL | `field IS NOT NULL` | Field is not null |
|
||||
| Pattern match | `field LIKE 'pattern'` | SQL LIKE pattern matching |
|
||||
| Not like | `field NOT LIKE 'pattern'` | Negated pattern matching |
|
||||
| In list | `field IN ('val1', 'val2', 'val3')` | Value in list |
|
||||
| Not in list | `field NOT IN ('val1', 'val2')` | Value not in list |
|
||||
|
||||
**String Literals:**
|
||||
- Single quotes: `'value'`
|
||||
- Escaped quotes: `'O''Brien'` (double single quote)
|
||||
- Empty string: `''`
|
||||
|
||||
**LIKE Patterns:**
|
||||
- `%` matches any sequence of characters
|
||||
- `_` matches any single character
|
||||
- Examples: `'%cat%'`, `'test_%'`, `'exact'`
|
||||
|
||||
**Examples:**
|
||||
```sql
|
||||
content = 'hello world'
|
||||
metadata.foo = 'bar'
|
||||
type != 'draft'
|
||||
source IS NULL
|
||||
title LIKE '%cat%'
|
||||
author NOT LIKE '%admin%'
|
||||
status IN ('published', 'archived')
|
||||
category NOT IN ('deleted', 'hidden')
|
||||
```
|
||||
|
||||
### Numeric Conditions
|
||||
|
||||
Numeric conditions operate on number values:
|
||||
|
||||
| Operator | Syntax | Description |
|
||||
|----------|--------|-------------|
|
||||
| Equality | `field = 123` | Exact match |
|
||||
| Inequality | `field != 123` | Not equal |
|
||||
| NULL check | `field IS NULL` | Field is null |
|
||||
| NOT NULL | `field IS NOT NULL` | Field is not null |
|
||||
| Greater than | `field > 10` | Greater than |
|
||||
| Greater or equal | `field >= 10` | Greater than or equal |
|
||||
| Less than | `field < 10` | Less than |
|
||||
| Less or equal | `field <= 10` | Less than or equal |
|
||||
| In list | `field IN (1, 2, 3)` | Value in list |
|
||||
| Not in list | `field NOT IN (1, 2, 3)` | Value not in list |
|
||||
|
||||
**Numeric Literals:**
|
||||
- Integers: `123`, `-45`, `0`
|
||||
- Decimals: `123.45`, `-0.5`, `3.14159`
|
||||
- Scientific notation: `1e10`, `2.5e-3` (if supported)
|
||||
|
||||
**Examples:**
|
||||
```sql
|
||||
typeVersion = 1
|
||||
score > 0.5
|
||||
views >= 100
|
||||
priority < 5
|
||||
age <= 65
|
||||
rating IN (1, 2, 3, 4, 5)
|
||||
count NOT IN (0, -1)
|
||||
```
|
||||
|
||||
### Logical Operators
|
||||
|
||||
Combine conditions using `AND` and `OR` operators:
|
||||
|
||||
| Operator | Syntax | Description |
|
||||
|----------|--------|-------------|
|
||||
| AND | `condition1 AND condition2` | Both conditions must be true |
|
||||
| OR | `condition1 OR condition2` | At least one condition must be true |
|
||||
|
||||
**Grouping:**
|
||||
Use parentheses `()` to group conditions and control operator precedence:
|
||||
|
||||
```sql
|
||||
(condition1 AND condition2) OR condition3
|
||||
condition1 AND (condition2 OR condition3)
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
```sql
|
||||
type = 'article' AND status = 'published'
|
||||
metadata.foo = 'bar' OR metadata.foo = 'baz'
|
||||
(type = 'post' OR type = 'page') AND views > 100
|
||||
```
|
||||
|
||||
### Operator Precedence
|
||||
|
||||
1. Parentheses `()` - highest precedence
|
||||
2. `AND` - evaluated before OR
|
||||
3. `OR` - lowest precedence
|
||||
|
||||
**Examples:**
|
||||
```sql
|
||||
-- Equivalent to: (A AND B) OR C
|
||||
A AND B OR C
|
||||
|
||||
-- Equivalent to: A AND (B OR C)
|
||||
A AND (B OR C)
|
||||
|
||||
-- Explicit grouping
|
||||
(A OR B) AND (C OR D)
|
||||
```
|
||||
|
||||
## Complete Examples
|
||||
|
||||
### Simple Conditions
|
||||
|
||||
```sql
|
||||
-- Text equality
|
||||
metadata.author = 'John Doe'
|
||||
|
||||
-- Numeric comparison
|
||||
views >= 1000
|
||||
|
||||
-- Pattern matching
|
||||
title LIKE '%tutorial%'
|
||||
|
||||
-- NULL check
|
||||
source IS NULL
|
||||
```
|
||||
|
||||
### Multiple Conditions
|
||||
|
||||
```sql
|
||||
-- AND operator
|
||||
type = 'article' AND status = 'published' AND views > 100
|
||||
|
||||
-- OR operator
|
||||
category = 'tech' OR category = 'science'
|
||||
|
||||
-- Mixed operators
|
||||
(type = 'post' OR type = 'page') AND published = true
|
||||
```
|
||||
|
||||
### Complex Nested Queries
|
||||
|
||||
```sql
|
||||
-- Nested AND within OR
|
||||
(metadata.foo = 'bar' AND type = 'demo') OR metadata.foo = 'baz'
|
||||
|
||||
-- Multiple levels of nesting
|
||||
((status = 'active' AND views > 100) OR (status = 'featured' AND views > 50)) AND category = 'news'
|
||||
|
||||
-- Complex query with multiple field types
|
||||
type = 'article' AND (metadata.author = 'John' OR metadata.author = 'Jane') AND views >= 100 AND rating IN (4, 5)
|
||||
```
|
||||
|
||||
### Array/List Operations
|
||||
|
||||
```sql
|
||||
-- Text IN
|
||||
status IN ('published', 'archived', 'draft')
|
||||
|
||||
-- Numeric IN
|
||||
priority IN (1, 2, 3)
|
||||
|
||||
-- NOT IN
|
||||
category NOT IN ('deleted', 'hidden')
|
||||
```
|
||||
|
||||
## Type Inference
|
||||
|
||||
The parser will infer the condition type (text vs number) based on:
|
||||
|
||||
1. **Operator context**: Operators like `>`, `<`, `>=`, `<=` imply numeric
|
||||
2. **Value type**:
|
||||
- Quoted strings (`'value'`) → text condition
|
||||
- Unquoted numbers (`123`, `45.6`) → numeric condition
|
||||
- `NULL` → can be either (context-dependent)
|
||||
3. **Field name**: If a field is known to be numeric, numeric operators are used
|
||||
|
||||
**Examples:**
|
||||
```sql
|
||||
-- Text condition (quoted string)
|
||||
author = 'John'
|
||||
|
||||
-- Numeric condition (unquoted number)
|
||||
age = 30
|
||||
|
||||
-- Numeric comparison
|
||||
score > 0.5
|
||||
|
||||
-- Text pattern
|
||||
title LIKE '%test%'
|
||||
```
|
||||
|
||||
## Escaping and Special Characters
|
||||
|
||||
### String Escaping
|
||||
|
||||
- Single quotes in strings: `'O''Brien'` → `O'Brien`
|
||||
- Empty string: `''`
|
||||
|
||||
### Field Name Escaping
|
||||
|
||||
If field names contain special characters or reserved words, they can be quoted (implementation-dependent):
|
||||
|
||||
```sql
|
||||
-- Reserved words or special characters (if supported)
|
||||
"order" = 'asc'
|
||||
"metadata.field-name" = 'value'
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The parser should provide clear error messages for:
|
||||
|
||||
- Invalid syntax
|
||||
- Mismatched parentheses
|
||||
- Invalid operators for field types
|
||||
- Missing values
|
||||
- Invalid escape sequences
|
||||
|
||||
## Grammar (BNF-like)
|
||||
|
||||
```
|
||||
query ::= expression
|
||||
expression ::= condition | group
|
||||
group ::= '(' expression ')'
|
||||
| expression AND expression
|
||||
| expression OR expression
|
||||
condition ::= text_condition | numeric_condition
|
||||
text_condition ::= field ( '=' | '!=' | 'LIKE' | 'NOT LIKE' ) string_literal
|
||||
| field 'IS' ( 'NULL' | 'NOT NULL' )
|
||||
| field 'IN' '(' string_list ')'
|
||||
| field 'NOT IN' '(' string_list ')'
|
||||
numeric_condition ::= field ( '=' | '!=' | '>' | '>=' | '<' | '<=' ) number
|
||||
| field 'IS' ( 'NULL' | 'NOT NULL' )
|
||||
| field 'IN' '(' number_list ')'
|
||||
| field 'NOT IN' '(' number_list ')'
|
||||
field ::= identifier ( '.' identifier )*
|
||||
identifier ::= [a-zA-Z_][a-zA-Z0-9_]*
|
||||
string_literal ::= "'" ( escaped_char | [^'] )* "'"
|
||||
escaped_char ::= "''"
|
||||
string_list ::= string_literal ( ',' string_literal )*
|
||||
number ::= [0-9]+ ( '.' [0-9]+ )? ( [eE] [+-]? [0-9]+ )?
|
||||
number_list ::= number ( ',' number )*
|
||||
```
|
||||
|
||||
## Migration from JSON Format
|
||||
|
||||
The SQL-like syntax maps to the JSON format as follows:
|
||||
|
||||
**JSON:**
|
||||
```json
|
||||
{
|
||||
"type": "text",
|
||||
"field": ["metadata", "foo"],
|
||||
"conditions": {
|
||||
"equal": "bar"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**SQL:**
|
||||
```sql
|
||||
metadata.foo = 'bar'
|
||||
```
|
||||
|
||||
**JSON (with operator):**
|
||||
```json
|
||||
{
|
||||
"type": "operator",
|
||||
"operator": "and",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "text",
|
||||
"field": ["metadata", "foo"],
|
||||
"conditions": {
|
||||
"equal": "bar"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"field": ["type"],
|
||||
"conditions": {
|
||||
"equal": "demo"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**SQL:**
|
||||
```sql
|
||||
metadata.foo = 'bar' AND type = 'demo'
|
||||
```
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
1. **Whitespace**: Whitespace is generally ignored except within string literals
|
||||
2. **Case sensitivity**:
|
||||
- Operators (`AND`, `OR`, `LIKE`, etc.) are case-insensitive
|
||||
- Field names and string values are case-sensitive
|
||||
3. **Comments**: Not supported in initial version (can be added later)
|
||||
4. **Table prefixes**: The parser may support optional table name prefixes (e.g., `documents.metadata.foo`) if needed
|
||||
@@ -37,6 +37,7 @@
|
||||
"@fastify/websocket": "11.2.0",
|
||||
"@huggingface/transformers": "^3.8.1",
|
||||
"@langchain/textsplitters": "^1.0.1",
|
||||
"@morten-olsen/stash-query-dsl": "workspace:*",
|
||||
"@scalar/fastify-api-reference": "1.40.2",
|
||||
"better-sqlite3": "^12.5.0",
|
||||
"deep-equal": "^2.2.3",
|
||||
|
||||
@@ -10,12 +10,12 @@ import {
|
||||
type ZodTypeProvider,
|
||||
} from 'fastify-type-provider-zod';
|
||||
|
||||
import { Services } from './utils/utils.services.ts';
|
||||
import { systemEndpoints } from './endpoints/system/system.ts';
|
||||
import { WarmupService } from './services/warmup/warmup.ts';
|
||||
import { documentEndpoints } from './endpoints/documents/documents.ts';
|
||||
import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.ts';
|
||||
import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.ts';
|
||||
import { Services } from './utils/utils.services.js';
|
||||
import { systemEndpoints } from './endpoints/system/system.js';
|
||||
import { WarmupService } from './services/warmup/warmup.js';
|
||||
import { documentEndpoints } from './endpoints/documents/documents.js';
|
||||
import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.js';
|
||||
import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.js';
|
||||
|
||||
class BaseError extends Error {
|
||||
public statusCode: number;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { createApi } from './api.js';
|
||||
import { DocumentsService, type DocumentUpsert } from './services/documents/documents.ts';
|
||||
import { Services } from './utils/utils.services.ts';
|
||||
import { DocumentsService, type DocumentUpsert } from './services/documents/documents.js';
|
||||
import { Services } from './utils/utils.services.js';
|
||||
|
||||
const services = new Services();
|
||||
const server = await createApi(services);
|
||||
|
||||
@@ -4,7 +4,7 @@ import {
|
||||
documentChunkFilterSchema,
|
||||
documentChunksFindResultSchema,
|
||||
DocumentChunksService,
|
||||
} from '#root/services/document-chunks/document-chunks.ts';
|
||||
} from '#root/services/document-chunks/document-chunks.js';
|
||||
|
||||
const documentChunkFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
|
||||
instance.route({
|
||||
|
||||
@@ -4,7 +4,7 @@ import {
|
||||
documentFilterSchema,
|
||||
documentFindResultSchema,
|
||||
DocumentsService,
|
||||
} from '#root/services/documents/documents.ts';
|
||||
} from '#root/services/documents/documents.js';
|
||||
|
||||
const documentFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
|
||||
instance.route({
|
||||
|
||||
@@ -4,7 +4,7 @@ import {
|
||||
DocumentsService,
|
||||
documentUpsertResultSchema,
|
||||
documentUpsertSchema,
|
||||
} from '#root/services/documents/documents.ts';
|
||||
} from '#root/services/documents/documents.js';
|
||||
|
||||
const documentEndpoints: FastifyPluginAsyncZod = async (instance) => {
|
||||
instance.route({
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { DatabaseService } from '#root/services/database/database.ts';
|
||||
import { DatabaseService } from '#root/services/database/database.js';
|
||||
|
||||
const systemEndpoints: FastifyPluginAsyncZod = async (instance) => {
|
||||
instance.route({
|
||||
|
||||
@@ -1,202 +0,0 @@
|
||||
import type { Token } from './query-parser.types.ts';
|
||||
|
||||
class Lexer {
|
||||
#input: string;
|
||||
#position = 0;
|
||||
#tokens: Token[] = [];
|
||||
|
||||
constructor(input: string) {
|
||||
this.#input = input;
|
||||
}
|
||||
|
||||
#skipWhitespace = (): void => {
|
||||
while (this.#position < this.#input.length && /\s/.test(this.#input[this.#position])) {
|
||||
this.#position++;
|
||||
}
|
||||
};
|
||||
|
||||
#nextToken = (): Token | null => {
|
||||
const char = this.#input[this.#position];
|
||||
const startPosition = this.#position;
|
||||
|
||||
// Single character tokens
|
||||
if (char === '(') {
|
||||
this.#position++;
|
||||
return { type: 'LPAREN', value: '(', position: startPosition };
|
||||
}
|
||||
if (char === ')') {
|
||||
this.#position++;
|
||||
return { type: 'RPAREN', value: ')', position: startPosition };
|
||||
}
|
||||
if (char === ',') {
|
||||
this.#position++;
|
||||
return { type: 'COMMA', value: ',', position: startPosition };
|
||||
}
|
||||
if (char === '.') {
|
||||
this.#position++;
|
||||
return { type: 'DOT', value: '.', position: startPosition };
|
||||
}
|
||||
|
||||
// Two-character operators
|
||||
if (char === '!' && this.#input[this.#position + 1] === '=') {
|
||||
this.#position += 2;
|
||||
return { type: 'NOT_EQUALS', value: '!=', position: startPosition };
|
||||
}
|
||||
if (char === '>' && this.#input[this.#position + 1] === '=') {
|
||||
this.#position += 2;
|
||||
return { type: 'GREATER_THAN_OR_EQUAL', value: '>=', position: startPosition };
|
||||
}
|
||||
if (char === '<' && this.#input[this.#position + 1] === '=') {
|
||||
this.#position += 2;
|
||||
return { type: 'LESS_THAN_OR_EQUAL', value: '<=', position: startPosition };
|
||||
}
|
||||
|
||||
// Single character operators
|
||||
if (char === '=') {
|
||||
this.#position++;
|
||||
return { type: 'EQUALS', value: '=', position: startPosition };
|
||||
}
|
||||
if (char === '>') {
|
||||
this.#position++;
|
||||
return { type: 'GREATER_THAN', value: '>', position: startPosition };
|
||||
}
|
||||
if (char === '<') {
|
||||
this.#position++;
|
||||
return { type: 'LESS_THAN', value: '<', position: startPosition };
|
||||
}
|
||||
|
||||
// String literal
|
||||
if (char === "'") {
|
||||
return this.#readString();
|
||||
}
|
||||
|
||||
// Number
|
||||
if (/[0-9]/.test(char) || (char === '-' && /[0-9]/.test(this.#input[this.#position + 1]))) {
|
||||
return this.#readNumber();
|
||||
}
|
||||
|
||||
// Identifier or keyword
|
||||
if (/[a-zA-Z_]/.test(char)) {
|
||||
return this.#readIdentifierOrKeyword();
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected character '${char}' at position ${this.#position}`);
|
||||
};
|
||||
|
||||
#readString = (): Token => {
|
||||
const startPosition = this.#position;
|
||||
this.#position++; // Skip opening quote
|
||||
let value = '';
|
||||
|
||||
while (this.#position < this.#input.length) {
|
||||
const char = this.#input[this.#position];
|
||||
if (char === "'") {
|
||||
// Check for escaped quote
|
||||
if (this.#input[this.#position + 1] === "'") {
|
||||
value += "'";
|
||||
this.#position += 2;
|
||||
} else {
|
||||
this.#position++; // Skip closing quote
|
||||
return { type: 'STRING', value, position: startPosition };
|
||||
}
|
||||
} else {
|
||||
value += char;
|
||||
this.#position++;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Unterminated string starting at position ${startPosition}`);
|
||||
};
|
||||
|
||||
#readNumber = (): Token => {
|
||||
const startPosition = this.#position;
|
||||
let value = '';
|
||||
|
||||
// Optional minus sign
|
||||
if (this.#input[this.#position] === '-') {
|
||||
value += '-';
|
||||
this.#position++;
|
||||
}
|
||||
|
||||
// Integer part
|
||||
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
|
||||
value += this.#input[this.#position];
|
||||
this.#position++;
|
||||
}
|
||||
|
||||
// Decimal part
|
||||
if (this.#input[this.#position] === '.' && /[0-9]/.test(this.#input[this.#position + 1])) {
|
||||
value += '.';
|
||||
this.#position++;
|
||||
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
|
||||
value += this.#input[this.#position];
|
||||
this.#position++;
|
||||
}
|
||||
}
|
||||
|
||||
// Scientific notation
|
||||
if (this.#input[this.#position] === 'e' || this.#input[this.#position] === 'E') {
|
||||
value += this.#input[this.#position];
|
||||
this.#position++;
|
||||
if (this.#input[this.#position] === '+' || this.#input[this.#position] === '-') {
|
||||
value += this.#input[this.#position];
|
||||
this.#position++;
|
||||
}
|
||||
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
|
||||
value += this.#input[this.#position];
|
||||
this.#position++;
|
||||
}
|
||||
}
|
||||
|
||||
return { type: 'NUMBER', value, position: startPosition };
|
||||
};
|
||||
|
||||
#readIdentifierOrKeyword = (): Token => {
|
||||
const startPosition = this.#position;
|
||||
let value = '';
|
||||
|
||||
while (this.#position < this.#input.length && /[a-zA-Z0-9_]/.test(this.#input[this.#position])) {
|
||||
value += this.#input[this.#position];
|
||||
this.#position++;
|
||||
}
|
||||
|
||||
const upperValue = value.toUpperCase();
|
||||
|
||||
// Keywords
|
||||
switch (upperValue) {
|
||||
case 'AND':
|
||||
return { type: 'AND', value, position: startPosition };
|
||||
case 'OR':
|
||||
return { type: 'OR', value, position: startPosition };
|
||||
case 'LIKE':
|
||||
return { type: 'LIKE', value, position: startPosition };
|
||||
case 'NOT':
|
||||
return { type: 'NOT', value, position: startPosition };
|
||||
case 'IN':
|
||||
return { type: 'IN', value, position: startPosition };
|
||||
case 'IS':
|
||||
return { type: 'IS', value, position: startPosition };
|
||||
case 'NULL':
|
||||
return { type: 'NULL', value, position: startPosition };
|
||||
default:
|
||||
return { type: 'IDENTIFIER', value, position: startPosition };
|
||||
}
|
||||
};
|
||||
|
||||
public tokenize = (): Token[] => {
|
||||
while (this.#position < this.#input.length) {
|
||||
this.#skipWhitespace();
|
||||
if (this.#position >= this.#input.length) break;
|
||||
|
||||
const token = this.#nextToken();
|
||||
if (token) {
|
||||
this.#tokens.push(token);
|
||||
}
|
||||
}
|
||||
|
||||
this.#tokens.push({ type: 'EOF', value: '', position: this.#position });
|
||||
return this.#tokens;
|
||||
};
|
||||
}
|
||||
|
||||
export { Lexer };
|
||||
@@ -1,317 +0,0 @@
|
||||
import { Lexer } from './query-parser.lexer.ts';
|
||||
import type { Token, TokenType } from './query-parser.types.ts';
|
||||
|
||||
import type { QueryConditionText, QueryConditionNumber, QueryFilter, QueryCondition } from '#root/utils/utils.query.ts';
|
||||
|
||||
class Parser {
|
||||
#tokens: Token[] = [];
|
||||
#position = 0;
|
||||
|
||||
#current = (): Token => {
|
||||
return this.#tokens[this.#position];
|
||||
};
|
||||
|
||||
#advance = (): Token => {
|
||||
const token = this.#current();
|
||||
this.#position++;
|
||||
return token;
|
||||
};
|
||||
|
||||
#expect = (type: TokenType): Token => {
|
||||
const token = this.#current();
|
||||
if (token.type !== type) {
|
||||
throw new Error(`Expected ${type} but got ${token.type} at position ${token.position}`);
|
||||
}
|
||||
return this.#advance();
|
||||
};
|
||||
|
||||
#parseExpression = (): QueryFilter => {
|
||||
return this.#parseOr();
|
||||
};
|
||||
|
||||
#parseOr = (): QueryFilter => {
|
||||
let left = this.#parseAnd();
|
||||
|
||||
while (this.#current().type === 'OR') {
|
||||
this.#advance();
|
||||
const right = this.#parseAnd();
|
||||
left = this.#combineWithOperator(left, right, 'or');
|
||||
}
|
||||
|
||||
return left;
|
||||
};
|
||||
|
||||
#parseAnd = (): QueryFilter => {
|
||||
let left = this.#parsePrimary();
|
||||
|
||||
while (this.#current().type === 'AND') {
|
||||
this.#advance();
|
||||
const right = this.#parsePrimary();
|
||||
left = this.#combineWithOperator(left, right, 'and');
|
||||
}
|
||||
|
||||
return left;
|
||||
};
|
||||
|
||||
#combineWithOperator = (left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter => {
|
||||
// If left is already an operator of the same type, add to its conditions
|
||||
if (left.type === 'operator' && left.operator === operator) {
|
||||
return {
|
||||
type: 'operator',
|
||||
operator,
|
||||
conditions: [...left.conditions, right],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'operator',
|
||||
operator,
|
||||
conditions: [left, right],
|
||||
};
|
||||
};
|
||||
|
||||
#parsePrimary = (): QueryFilter => {
|
||||
// Handle parenthesized expressions
|
||||
if (this.#current().type === 'LPAREN') {
|
||||
this.#advance();
|
||||
const expr = this.#parseExpression();
|
||||
this.#expect('RPAREN');
|
||||
return expr;
|
||||
}
|
||||
|
||||
// Must be a condition
|
||||
return this.#parseCondition();
|
||||
};
|
||||
|
||||
#parseCondition = (): QueryCondition => {
|
||||
const field = this.#parseField();
|
||||
|
||||
const token = this.#current();
|
||||
|
||||
// IS NULL / IS NOT NULL
|
||||
if (token.type === 'IS') {
|
||||
this.#advance();
|
||||
const isNot = this.#current().type === 'NOT';
|
||||
if (isNot) {
|
||||
this.#advance();
|
||||
}
|
||||
this.#expect('NULL');
|
||||
|
||||
// IS NULL / IS NOT NULL could be either text or number - default to text
|
||||
return {
|
||||
type: 'text',
|
||||
field,
|
||||
conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null },
|
||||
} satisfies QueryConditionText;
|
||||
}
|
||||
|
||||
// NOT IN / NOT LIKE
|
||||
if (token.type === 'NOT') {
|
||||
this.#advance();
|
||||
const nextToken = this.#current();
|
||||
|
||||
if (nextToken.type === 'IN') {
|
||||
this.#advance();
|
||||
return this.#parseInCondition(field, true);
|
||||
}
|
||||
|
||||
if (nextToken.type === 'LIKE') {
|
||||
this.#advance();
|
||||
const pattern = this.#expect('STRING').value;
|
||||
return {
|
||||
type: 'text',
|
||||
field,
|
||||
conditions: { notLike: pattern },
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Expected IN or LIKE after NOT at position ${nextToken.position}`);
|
||||
}
|
||||
|
||||
// IN
|
||||
if (token.type === 'IN') {
|
||||
this.#advance();
|
||||
return this.#parseInCondition(field, false);
|
||||
}
|
||||
|
||||
// LIKE
|
||||
if (token.type === 'LIKE') {
|
||||
this.#advance();
|
||||
const pattern = this.#expect('STRING').value;
|
||||
return {
|
||||
type: 'text',
|
||||
field,
|
||||
conditions: { like: pattern },
|
||||
};
|
||||
}
|
||||
|
||||
// Comparison operators
|
||||
if (token.type === 'EQUALS') {
|
||||
this.#advance();
|
||||
return this.#parseValueCondition(field, 'equals');
|
||||
}
|
||||
|
||||
if (token.type === 'NOT_EQUALS') {
|
||||
this.#advance();
|
||||
return this.#parseValueCondition(field, 'notEquals');
|
||||
}
|
||||
|
||||
if (token.type === 'GREATER_THAN') {
|
||||
this.#advance();
|
||||
const value = this.#parseNumber();
|
||||
return {
|
||||
type: 'number',
|
||||
field,
|
||||
conditions: { greaterThan: value },
|
||||
};
|
||||
}
|
||||
|
||||
if (token.type === 'GREATER_THAN_OR_EQUAL') {
|
||||
this.#advance();
|
||||
const value = this.#parseNumber();
|
||||
return {
|
||||
type: 'number',
|
||||
field,
|
||||
conditions: { greaterThanOrEqual: value },
|
||||
};
|
||||
}
|
||||
|
||||
if (token.type === 'LESS_THAN') {
|
||||
this.#advance();
|
||||
const value = this.#parseNumber();
|
||||
return {
|
||||
type: 'number',
|
||||
field,
|
||||
conditions: { lessThan: value },
|
||||
};
|
||||
}
|
||||
|
||||
if (token.type === 'LESS_THAN_OR_EQUAL') {
|
||||
this.#advance();
|
||||
const value = this.#parseNumber();
|
||||
return {
|
||||
type: 'number',
|
||||
field,
|
||||
conditions: { lessThanOrEqual: value },
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected token '${token.value}' at position ${token.position}`);
|
||||
};
|
||||
|
||||
#parseField = (): string[] => {
|
||||
const parts: string[] = [];
|
||||
parts.push(this.#expect('IDENTIFIER').value);
|
||||
|
||||
while (this.#current().type === 'DOT') {
|
||||
this.#advance();
|
||||
parts.push(this.#expect('IDENTIFIER').value);
|
||||
}
|
||||
|
||||
return parts;
|
||||
};
|
||||
|
||||
#parseValueCondition = (field: string[], operator: 'equals' | 'notEquals'): QueryCondition => {
|
||||
const token = this.#current();
|
||||
|
||||
if (token.type === 'STRING') {
|
||||
this.#advance();
|
||||
const textCondition: QueryConditionText = {
|
||||
type: 'text',
|
||||
field,
|
||||
conditions: operator === 'equals' ? { equal: token.value } : { notEqual: token.value },
|
||||
};
|
||||
return textCondition;
|
||||
}
|
||||
|
||||
if (token.type === 'NUMBER') {
|
||||
this.#advance();
|
||||
const value = parseFloat(token.value);
|
||||
const numCondition: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field,
|
||||
conditions: operator === 'equals' ? { equals: value } : { notEquals: value },
|
||||
};
|
||||
return numCondition;
|
||||
}
|
||||
|
||||
if (token.type === 'NULL') {
|
||||
this.#advance();
|
||||
// NULL equality - default to text type
|
||||
return {
|
||||
type: 'text',
|
||||
field,
|
||||
conditions: operator === 'equals' ? { equal: null } : {},
|
||||
} as QueryConditionText;
|
||||
}
|
||||
|
||||
throw new Error(`Expected value but got ${token.type} at position ${token.position}`);
|
||||
};
|
||||
|
||||
#parseNumber = (): number => {
|
||||
const token = this.#expect('NUMBER');
|
||||
return parseFloat(token.value);
|
||||
};
|
||||
|
||||
#parseInCondition = (field: string[], isNot: boolean): QueryCondition => {
|
||||
this.#expect('LPAREN');
|
||||
|
||||
const firstToken = this.#current();
|
||||
|
||||
if (firstToken.type === 'STRING') {
|
||||
// Text IN
|
||||
const values: string[] = [];
|
||||
values.push(this.#advance().value);
|
||||
|
||||
while (this.#current().type === 'COMMA') {
|
||||
this.#advance();
|
||||
values.push(this.#expect('STRING').value);
|
||||
}
|
||||
|
||||
this.#expect('RPAREN');
|
||||
|
||||
return {
|
||||
type: 'text',
|
||||
field,
|
||||
conditions: isNot ? { notIn: values } : { in: values },
|
||||
};
|
||||
}
|
||||
|
||||
if (firstToken.type === 'NUMBER') {
|
||||
// Numeric IN
|
||||
const values: number[] = [];
|
||||
values.push(parseFloat(this.#advance().value));
|
||||
|
||||
while (this.#current().type === 'COMMA') {
|
||||
this.#advance();
|
||||
values.push(parseFloat(this.#expect('NUMBER').value));
|
||||
}
|
||||
|
||||
this.#expect('RPAREN');
|
||||
|
||||
return {
|
||||
type: 'number',
|
||||
field,
|
||||
conditions: isNot ? { notIn: values } : { in: values },
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Expected STRING or NUMBER in IN list at position ${firstToken.position}`);
|
||||
};
|
||||
|
||||
public parse(input: string): QueryFilter {
|
||||
const lexer = new Lexer(input);
|
||||
this.#tokens = lexer.tokenize();
|
||||
this.#position = 0;
|
||||
|
||||
const result = this.#parseExpression();
|
||||
|
||||
if (this.#current().type !== 'EOF') {
|
||||
throw new Error(`Unexpected token '${this.#current().value}' at position ${this.#current().position}`);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
export { Parser };
|
||||
@@ -1,135 +0,0 @@
|
||||
import type {
|
||||
QueryFilter,
|
||||
QueryOperator,
|
||||
QueryCondition,
|
||||
QueryConditionText,
|
||||
QueryConditionNumber,
|
||||
} from '#root/utils/utils.query.ts';
|
||||
|
||||
class Stringifier {
|
||||
#stringifyFilter = (filter: QueryFilter, needsParens: boolean): string => {
|
||||
if (filter.type === 'operator') {
|
||||
return this.#stringifyOperator(filter, needsParens);
|
||||
}
|
||||
return this.#stringifyCondition(filter);
|
||||
};
|
||||
|
||||
#stringifyOperator = (op: QueryOperator, needsParens: boolean): string => {
|
||||
if (op.conditions.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if (op.conditions.length === 1) {
|
||||
return this.#stringifyFilter(op.conditions[0], needsParens);
|
||||
}
|
||||
|
||||
const operator = op.operator.toUpperCase();
|
||||
const parts = op.conditions.map((condition) => {
|
||||
// Child operators need parens if they have a different operator
|
||||
const childNeedsParens = condition.type === 'operator' && condition.operator !== op.operator;
|
||||
return this.#stringifyFilter(condition, childNeedsParens);
|
||||
});
|
||||
|
||||
const result = parts.join(` ${operator} `);
|
||||
|
||||
return needsParens ? `(${result})` : result;
|
||||
};
|
||||
|
||||
#stringifyCondition = (condition: QueryCondition): string => {
|
||||
const fieldStr = condition.field.join('.');
|
||||
|
||||
if (condition.type === 'text') {
|
||||
return this.#stringifyTextCondition(fieldStr, condition.conditions);
|
||||
}
|
||||
|
||||
return this.#stringifyNumberCondition(fieldStr, condition.conditions);
|
||||
};
|
||||
|
||||
#stringifyTextCondition = (field: string, conditions: QueryConditionText['conditions']): string => {
|
||||
if (conditions.equal !== undefined) {
|
||||
if (conditions.equal === null) {
|
||||
return `${field} IS NULL`;
|
||||
}
|
||||
return `${field} = ${this.#escapeString(conditions.equal)}`;
|
||||
}
|
||||
|
||||
if (conditions.notEqual !== undefined) {
|
||||
return `${field} != ${this.#escapeString(conditions.notEqual)}`;
|
||||
}
|
||||
|
||||
if (conditions.like !== undefined) {
|
||||
return `${field} LIKE ${this.#escapeString(conditions.like)}`;
|
||||
}
|
||||
|
||||
if (conditions.notLike !== undefined) {
|
||||
return `${field} NOT LIKE ${this.#escapeString(conditions.notLike)}`;
|
||||
}
|
||||
|
||||
if (conditions.in !== undefined) {
|
||||
const values = conditions.in.map((v) => this.#escapeString(v)).join(', ');
|
||||
return `${field} IN (${values})`;
|
||||
}
|
||||
|
||||
if (conditions.notIn !== undefined) {
|
||||
const values = conditions.notIn.map((v) => this.#escapeString(v)).join(', ');
|
||||
return `${field} NOT IN (${values})`;
|
||||
}
|
||||
|
||||
throw new Error('Invalid text condition: no condition specified');
|
||||
};
|
||||
|
||||
#stringifyNumberCondition = (field: string, conditions: QueryConditionNumber['conditions']): string => {
|
||||
if (conditions.equals !== undefined) {
|
||||
if (conditions.equals === null) {
|
||||
return `${field} IS NULL`;
|
||||
}
|
||||
return `${field} = ${conditions.equals}`;
|
||||
}
|
||||
|
||||
if (conditions.notEquals !== undefined) {
|
||||
if (conditions.notEquals === null) {
|
||||
return `${field} IS NOT NULL`;
|
||||
}
|
||||
return `${field} != ${conditions.notEquals}`;
|
||||
}
|
||||
|
||||
if (conditions.greaterThan !== undefined) {
|
||||
return `${field} > ${conditions.greaterThan}`;
|
||||
}
|
||||
|
||||
if (conditions.greaterThanOrEqual !== undefined) {
|
||||
return `${field} >= ${conditions.greaterThanOrEqual}`;
|
||||
}
|
||||
|
||||
if (conditions.lessThan !== undefined) {
|
||||
return `${field} < ${conditions.lessThan}`;
|
||||
}
|
||||
|
||||
if (conditions.lessThanOrEqual !== undefined) {
|
||||
return `${field} <= ${conditions.lessThanOrEqual}`;
|
||||
}
|
||||
|
||||
if (conditions.in !== undefined) {
|
||||
const values = conditions.in.join(', ');
|
||||
return `${field} IN (${values})`;
|
||||
}
|
||||
|
||||
if (conditions.notIn !== undefined) {
|
||||
const values = conditions.notIn.join(', ');
|
||||
return `${field} NOT IN (${values})`;
|
||||
}
|
||||
|
||||
throw new Error('Invalid number condition: no condition specified');
|
||||
};
|
||||
|
||||
#escapeString = (value: string): string => {
|
||||
const escaped = value.replace(/'/g, "''");
|
||||
return `'${escaped}'`;
|
||||
};
|
||||
|
||||
public stringify = (filter: QueryFilter): string => {
|
||||
return this.#stringifyFilter(filter, false);
|
||||
};
|
||||
}
|
||||
|
||||
export { Stringifier };
|
||||
@@ -1,754 +0,0 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
|
||||
import { QueryParser } from './query-parser.ts';
|
||||
|
||||
import type { QueryConditionNumber, QueryConditionText, QueryFilter, QueryOperator } from '#root/utils/utils.query.ts';
|
||||
|
||||
describe('QueryParser', () => {
|
||||
const parser = new QueryParser();
|
||||
|
||||
describe('parse', () => {
|
||||
describe('text conditions', () => {
|
||||
it('should parse simple text equality', () => {
|
||||
const result = parser.parse("name = 'John'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['name'],
|
||||
conditions: { equal: 'John' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse nested field text equality', () => {
|
||||
const result = parser.parse("metadata.author = 'John'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['metadata', 'author'],
|
||||
conditions: { equal: 'John' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse deeply nested field', () => {
|
||||
const result = parser.parse("metadata.nested.deep.field = 'value'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['metadata', 'nested', 'deep', 'field'],
|
||||
conditions: { equal: 'value' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse text not equal', () => {
|
||||
const result = parser.parse("type != 'draft'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['type'],
|
||||
conditions: { notEqual: 'draft' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse LIKE pattern', () => {
|
||||
const result = parser.parse("title LIKE '%cat%'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['title'],
|
||||
conditions: { like: '%cat%' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse NOT LIKE pattern', () => {
|
||||
const result = parser.parse("author NOT LIKE '%admin%'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['author'],
|
||||
conditions: { notLike: '%admin%' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse text IN list', () => {
|
||||
const result = parser.parse("status IN ('published', 'archived', 'draft')");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['status'],
|
||||
conditions: { in: ['published', 'archived', 'draft'] },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse text NOT IN list', () => {
|
||||
const result = parser.parse("category NOT IN ('deleted', 'hidden')");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['category'],
|
||||
conditions: { notIn: ['deleted', 'hidden'] },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse IS NULL', () => {
|
||||
const result = parser.parse('source IS NULL');
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['source'],
|
||||
conditions: { equal: null },
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle escaped quotes in strings', () => {
|
||||
const result = parser.parse("name = 'O''Brien'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['name'],
|
||||
conditions: { equal: "O'Brien" },
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle empty string', () => {
|
||||
const result = parser.parse("name = ''");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['name'],
|
||||
conditions: { equal: '' },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('numeric conditions', () => {
|
||||
it('should parse numeric equality', () => {
|
||||
const result = parser.parse('age = 30');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['age'],
|
||||
conditions: { equals: 30 },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse numeric not equal', () => {
|
||||
const result = parser.parse('count != 0');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['count'],
|
||||
conditions: { notEquals: 0 },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse greater than', () => {
|
||||
const result = parser.parse('views > 100');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['views'],
|
||||
conditions: { greaterThan: 100 },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse greater than or equal', () => {
|
||||
const result = parser.parse('views >= 100');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['views'],
|
||||
conditions: { greaterThanOrEqual: 100 },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse less than', () => {
|
||||
const result = parser.parse('priority < 5');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['priority'],
|
||||
conditions: { lessThan: 5 },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse less than or equal', () => {
|
||||
const result = parser.parse('age <= 65');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['age'],
|
||||
conditions: { lessThanOrEqual: 65 },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse decimal numbers', () => {
|
||||
const result = parser.parse('score > 0.5');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['score'],
|
||||
conditions: { greaterThan: 0.5 },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse negative numbers', () => {
|
||||
const result = parser.parse('temperature > -10');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['temperature'],
|
||||
conditions: { greaterThan: -10 },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse numeric IN list', () => {
|
||||
const result = parser.parse('priority IN (1, 2, 3)');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['priority'],
|
||||
conditions: { in: [1, 2, 3] },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse numeric NOT IN list', () => {
|
||||
const result = parser.parse('count NOT IN (0, -1)');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['count'],
|
||||
conditions: { notIn: [0, -1] },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse nested field numeric condition', () => {
|
||||
const result = parser.parse('metadata.score >= 0.8');
|
||||
expect(result).toEqual({
|
||||
type: 'number',
|
||||
field: ['metadata', 'score'],
|
||||
conditions: { greaterThanOrEqual: 0.8 },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('logical operators', () => {
|
||||
it('should parse AND operator', () => {
|
||||
const result = parser.parse("type = 'article' AND status = 'published'");
|
||||
expect(result).toEqual({
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'article' } },
|
||||
{ type: 'text', field: ['status'], conditions: { equal: 'published' } },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse OR operator', () => {
|
||||
const result = parser.parse("category = 'tech' OR category = 'science'");
|
||||
expect(result).toEqual({
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['category'], conditions: { equal: 'tech' } },
|
||||
{ type: 'text', field: ['category'], conditions: { equal: 'science' } },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse multiple AND conditions', () => {
|
||||
const result = parser.parse("type = 'article' AND status = 'published' AND views > 100");
|
||||
expect(result).toEqual({
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'article' } },
|
||||
{ type: 'text', field: ['status'], conditions: { equal: 'published' } },
|
||||
{ type: 'number', field: ['views'], conditions: { greaterThan: 100 } },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse multiple OR conditions', () => {
|
||||
const result = parser.parse("type = 'a' OR type = 'b' OR type = 'c'");
|
||||
expect(result).toEqual({
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'a' } },
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'b' } },
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'c' } },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should respect AND precedence over OR', () => {
|
||||
// A AND B OR C should be parsed as (A AND B) OR C
|
||||
const result = parser.parse("a = '1' AND b = '2' OR c = '3'");
|
||||
expect(result).toEqual({
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['a'], conditions: { equal: '1' } },
|
||||
{ type: 'text', field: ['b'], conditions: { equal: '2' } },
|
||||
],
|
||||
},
|
||||
{ type: 'text', field: ['c'], conditions: { equal: '3' } },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse parenthesized expressions', () => {
|
||||
const result = parser.parse("(type = 'post' OR type = 'page') AND views > 100");
|
||||
expect(result).toEqual({
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'post' } },
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'page' } },
|
||||
],
|
||||
},
|
||||
{ type: 'number', field: ['views'], conditions: { greaterThan: 100 } },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse nested parentheses', () => {
|
||||
const result = parser.parse(
|
||||
"((status = 'active' AND views > 100) OR (status = 'featured' AND views > 50)) AND category = 'news'",
|
||||
);
|
||||
expect(result).toEqual({
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['status'], conditions: { equal: 'active' } },
|
||||
{ type: 'number', field: ['views'], conditions: { greaterThan: 100 } },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['status'], conditions: { equal: 'featured' } },
|
||||
{ type: 'number', field: ['views'], conditions: { greaterThan: 50 } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{ type: 'text', field: ['category'], conditions: { equal: 'news' } },
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('case insensitivity', () => {
|
||||
it('should parse lowercase AND', () => {
|
||||
const result = parser.parse("a = '1' and b = '2'");
|
||||
expect(result.type).toBe('operator');
|
||||
expect((result as QueryOperator).operator).toBe('and');
|
||||
});
|
||||
|
||||
it('should parse lowercase OR', () => {
|
||||
const result = parser.parse("a = '1' or b = '2'");
|
||||
expect(result.type).toBe('operator');
|
||||
expect((result as QueryOperator).operator).toBe('or');
|
||||
});
|
||||
|
||||
it('should parse mixed case LIKE', () => {
|
||||
const result = parser.parse("title Like '%test%'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['title'],
|
||||
conditions: { like: '%test%' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse mixed case IS NULL', () => {
|
||||
const result = parser.parse('field Is Null');
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['field'],
|
||||
conditions: { equal: null },
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse mixed case IN', () => {
|
||||
const result = parser.parse("status In ('a', 'b')");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['status'],
|
||||
conditions: { in: ['a', 'b'] },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('whitespace handling', () => {
|
||||
it('should handle extra whitespace', () => {
|
||||
const result = parser.parse(" name = 'John' ");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['name'],
|
||||
conditions: { equal: 'John' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle no whitespace around operators', () => {
|
||||
const result = parser.parse("name='John'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['name'],
|
||||
conditions: { equal: 'John' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle tabs and newlines', () => {
|
||||
const result = parser.parse("name\t=\n'John'");
|
||||
expect(result).toEqual({
|
||||
type: 'text',
|
||||
field: ['name'],
|
||||
conditions: { equal: 'John' },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('error handling', () => {
|
||||
it('should throw on invalid syntax', () => {
|
||||
expect(() => parser.parse('invalid')).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on mismatched parentheses', () => {
|
||||
expect(() => parser.parse("(type = 'a'")).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on unterminated string', () => {
|
||||
expect(() => parser.parse("name = 'unterminated")).toThrow(/Unterminated string/);
|
||||
});
|
||||
|
||||
it('should throw on unexpected token', () => {
|
||||
expect(() => parser.parse("name = 'a' INVALID")).toThrow();
|
||||
});
|
||||
|
||||
it('should throw on missing value after operator', () => {
|
||||
expect(() => parser.parse('name =')).toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('stringify', () => {
|
||||
describe('text conditions', () => {
|
||||
it('should stringify text equality', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['name'],
|
||||
conditions: { equal: 'John' },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("name = 'John'");
|
||||
});
|
||||
|
||||
it('should stringify nested field', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['metadata', 'author'],
|
||||
conditions: { equal: 'John' },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("metadata.author = 'John'");
|
||||
});
|
||||
|
||||
it('should stringify text not equal', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['type'],
|
||||
conditions: { notEqual: 'draft' },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("type != 'draft'");
|
||||
});
|
||||
|
||||
it('should stringify LIKE', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['title'],
|
||||
conditions: { like: '%cat%' },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("title LIKE '%cat%'");
|
||||
});
|
||||
|
||||
it('should stringify NOT LIKE', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['author'],
|
||||
conditions: { notLike: '%admin%' },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("author NOT LIKE '%admin%'");
|
||||
});
|
||||
|
||||
it('should stringify text IN', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['status'],
|
||||
conditions: { in: ['published', 'archived'] },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("status IN ('published', 'archived')");
|
||||
});
|
||||
|
||||
it('should stringify text NOT IN', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['category'],
|
||||
conditions: { notIn: ['deleted', 'hidden'] },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("category NOT IN ('deleted', 'hidden')");
|
||||
});
|
||||
|
||||
it('should stringify IS NULL', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['source'],
|
||||
conditions: { equal: null },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('source IS NULL');
|
||||
});
|
||||
|
||||
it('should escape quotes in strings', () => {
|
||||
const filter: QueryConditionText = {
|
||||
type: 'text',
|
||||
field: ['name'],
|
||||
conditions: { equal: "O'Brien" },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("name = 'O''Brien'");
|
||||
});
|
||||
});
|
||||
|
||||
describe('numeric conditions', () => {
|
||||
it('should stringify numeric equality', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['age'],
|
||||
conditions: { equals: 30 },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('age = 30');
|
||||
});
|
||||
|
||||
it('should stringify numeric not equal', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['count'],
|
||||
conditions: { notEquals: 0 },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('count != 0');
|
||||
});
|
||||
|
||||
it('should stringify greater than', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['views'],
|
||||
conditions: { greaterThan: 100 },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('views > 100');
|
||||
});
|
||||
|
||||
it('should stringify greater than or equal', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['views'],
|
||||
conditions: { greaterThanOrEqual: 100 },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('views >= 100');
|
||||
});
|
||||
|
||||
it('should stringify less than', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['priority'],
|
||||
conditions: { lessThan: 5 },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('priority < 5');
|
||||
});
|
||||
|
||||
it('should stringify less than or equal', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['age'],
|
||||
conditions: { lessThanOrEqual: 65 },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('age <= 65');
|
||||
});
|
||||
|
||||
it('should stringify decimal numbers', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['score'],
|
||||
conditions: { greaterThan: 0.5 },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('score > 0.5');
|
||||
});
|
||||
|
||||
it('should stringify numeric IN', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['priority'],
|
||||
conditions: { in: [1, 2, 3] },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('priority IN (1, 2, 3)');
|
||||
});
|
||||
|
||||
it('should stringify numeric NOT IN', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['count'],
|
||||
conditions: { notIn: [0, -1] },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('count NOT IN (0, -1)');
|
||||
});
|
||||
|
||||
it('should stringify numeric IS NULL', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['score'],
|
||||
conditions: { equals: null },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('score IS NULL');
|
||||
});
|
||||
|
||||
it('should stringify numeric IS NOT NULL', () => {
|
||||
const filter: QueryConditionNumber = {
|
||||
type: 'number',
|
||||
field: ['score'],
|
||||
conditions: { notEquals: null },
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('score IS NOT NULL');
|
||||
});
|
||||
});
|
||||
|
||||
describe('logical operators', () => {
|
||||
it('should stringify AND operator', () => {
|
||||
const filter: QueryFilter = {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'article' } },
|
||||
{ type: 'text', field: ['status'], conditions: { equal: 'published' } },
|
||||
],
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("type = 'article' AND status = 'published'");
|
||||
});
|
||||
|
||||
it('should stringify OR operator', () => {
|
||||
const filter: QueryFilter = {
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['category'], conditions: { equal: 'tech' } },
|
||||
{ type: 'text', field: ['category'], conditions: { equal: 'science' } },
|
||||
],
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("category = 'tech' OR category = 'science'");
|
||||
});
|
||||
|
||||
it('should stringify nested operators with parentheses', () => {
|
||||
const filter: QueryFilter = {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'post' } },
|
||||
{ type: 'text', field: ['type'], conditions: { equal: 'page' } },
|
||||
],
|
||||
},
|
||||
{ type: 'number', field: ['views'], conditions: { greaterThan: 100 } },
|
||||
],
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("(type = 'post' OR type = 'page') AND views > 100");
|
||||
});
|
||||
|
||||
it('should stringify empty operator', () => {
|
||||
const filter: QueryFilter = {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [],
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe('');
|
||||
});
|
||||
|
||||
it('should stringify single-condition operator', () => {
|
||||
const filter: QueryFilter = {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [{ type: 'text', field: ['name'], conditions: { equal: 'test' } }],
|
||||
};
|
||||
expect(parser.stringify(filter)).toBe("name = 'test'");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('roundtrip', () => {
|
||||
const testCases = [
|
||||
"name = 'John'",
|
||||
"metadata.author = 'Jane'",
|
||||
'views > 100',
|
||||
'score >= 0.5',
|
||||
"title LIKE '%cat%'",
|
||||
"author NOT LIKE '%admin%'",
|
||||
"status IN ('published', 'archived')",
|
||||
'priority IN (1, 2, 3)',
|
||||
"type = 'article' AND status = 'published'",
|
||||
"category = 'tech' OR category = 'science'",
|
||||
"(type = 'post' OR type = 'page') AND views > 100",
|
||||
];
|
||||
|
||||
testCases.forEach((query) => {
|
||||
it(`should roundtrip: ${query}`, () => {
|
||||
const parsed = parser.parse(query);
|
||||
const stringified = parser.stringify(parsed);
|
||||
const reparsed = parser.parse(stringified);
|
||||
expect(reparsed).toEqual(parsed);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('complex real-world queries', () => {
|
||||
it('should handle complex query with multiple field types', () => {
|
||||
const query = "type = 'article' AND (metadata.author = 'John' OR metadata.author = 'Jane') AND views >= 100";
|
||||
const result = parser.parse(query);
|
||||
|
||||
expect(result.type).toBe('operator');
|
||||
const operator = result as QueryOperator;
|
||||
expect(operator.operator).toBe('and');
|
||||
expect(operator.conditions).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('should handle nested JSON paths with conditions', () => {
|
||||
const query = "metadata.nested.deep.value = 'test' AND metadata.nested.count > 10";
|
||||
const result = parser.parse(query);
|
||||
|
||||
expect(result.type).toBe('operator');
|
||||
const operator = result as QueryOperator;
|
||||
const condition1 = operator.conditions[0] as QueryConditionText;
|
||||
const condition2 = operator.conditions[1] as QueryConditionNumber;
|
||||
expect(condition1.field).toEqual(['metadata', 'nested', 'deep', 'value']);
|
||||
expect(condition2.field).toEqual(['metadata', 'nested', 'count']);
|
||||
});
|
||||
|
||||
it('should handle query from documentation example', () => {
|
||||
// From the JSON format in docs
|
||||
const expectedJson: QueryFilter = {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: { equal: 'bar' },
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['type'],
|
||||
conditions: { equal: 'demo' },
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const sql = "metadata.foo = 'bar' AND type = 'demo'";
|
||||
const parsed = parser.parse(sql);
|
||||
|
||||
expect(parsed).toEqual(expectedJson);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,19 +0,0 @@
|
||||
import { Stringifier } from './query-parser.stringifier.ts';
|
||||
import { Parser } from './query-parser.parser.ts';
|
||||
|
||||
import type { QueryFilter } from '#root/utils/utils.query.ts';
|
||||
|
||||
class QueryParser {
|
||||
private parser = new Parser();
|
||||
private stringifier = new Stringifier();
|
||||
|
||||
public parse = (input: string): QueryFilter => {
|
||||
return this.parser.parse(input);
|
||||
};
|
||||
|
||||
public stringify = (filter: QueryFilter): string => {
|
||||
return this.stringifier.stringify(filter);
|
||||
};
|
||||
}
|
||||
|
||||
export { QueryParser };
|
||||
@@ -1,30 +0,0 @@
|
||||
type TokenType =
|
||||
| 'IDENTIFIER'
|
||||
| 'STRING'
|
||||
| 'NUMBER'
|
||||
| 'AND'
|
||||
| 'OR'
|
||||
| 'LIKE'
|
||||
| 'NOT'
|
||||
| 'IN'
|
||||
| 'IS'
|
||||
| 'NULL'
|
||||
| 'EQUALS'
|
||||
| 'NOT_EQUALS'
|
||||
| 'GREATER_THAN'
|
||||
| 'GREATER_THAN_OR_EQUAL'
|
||||
| 'LESS_THAN'
|
||||
| 'LESS_THAN_OR_EQUAL'
|
||||
| 'LPAREN'
|
||||
| 'RPAREN'
|
||||
| 'COMMA'
|
||||
| 'DOT'
|
||||
| 'EOF';
|
||||
|
||||
type Token = {
|
||||
type: TokenType;
|
||||
value: string;
|
||||
position: number;
|
||||
};
|
||||
|
||||
export type { TokenType, Token };
|
||||
@@ -3,9 +3,9 @@ import ClientPgLite from 'knex-pglite';
|
||||
import { PGlite } from '@electric-sql/pglite';
|
||||
import { vector } from '@electric-sql/pglite/vector';
|
||||
|
||||
import { migrationSource } from './migrations/migrations.ts';
|
||||
import { migrationSource } from './migrations/migrations.js';
|
||||
|
||||
import { destroy, Services } from '#root/utils/utils.services.ts';
|
||||
import { destroy, Services } from '#root/utils/utils.services.js';
|
||||
|
||||
class DatabaseService {
|
||||
#services: Services;
|
||||
@@ -50,5 +50,5 @@ class DatabaseService {
|
||||
};
|
||||
}
|
||||
|
||||
export { type TableRows, tableNames } from './migrations/migrations.ts';
|
||||
export { type TableRows, tableNames } from './migrations/migrations.js';
|
||||
export { DatabaseService };
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { Migration } from './migrations.types.ts';
|
||||
import type { Migration } from './migrations.types.js';
|
||||
|
||||
import { EmbeddingsService } from '#root/services/embeddings/embeddings.ts';
|
||||
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
|
||||
import { EmbeddingsService } from '#root/services/embeddings/embeddings.js';
|
||||
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
|
||||
|
||||
const tableNames = {
|
||||
documents: 'documents',
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import type { Knex } from 'knex';
|
||||
|
||||
import type { Migration } from './migrations.types.ts';
|
||||
import { init } from './migrations.001-init.ts';
|
||||
import type { Migration } from './migrations.types.js';
|
||||
import { init } from './migrations.001-init.js';
|
||||
|
||||
import type { Services } from '#root/utils/utils.services.ts';
|
||||
import type { Services } from '#root/utils/utils.services.js';
|
||||
|
||||
const migrations = [init] satisfies Migration[];
|
||||
|
||||
@@ -21,5 +21,5 @@ const migrationSource = (options: MigrationSourceOptions): Knex.MigrationSource<
|
||||
getMigrations: async () => migrations,
|
||||
});
|
||||
|
||||
export { type TableRows, tableNames } from './migrations.001-init.ts';
|
||||
export { type TableRows, tableNames } from './migrations.001-init.js';
|
||||
export { migrationSource };
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { Knex } from 'knex';
|
||||
|
||||
import type { Services } from '#root/utils/utils.services.ts';
|
||||
import type { Services } from '#root/utils/utils.services.js';
|
||||
|
||||
type MigrationOptions = {
|
||||
knex: Knex;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { TableRows } from '../database/database.ts';
|
||||
import type { TableRows } from '../database/database.js';
|
||||
|
||||
import type { DocumentChunk } from './document-chunks.schemas.ts';
|
||||
import type { DocumentChunk } from './document-chunks.schemas.js';
|
||||
|
||||
const mapFromDocumentChunkRow = (
|
||||
row: TableRows['documentChunks'] & {
|
||||
@@ -1,7 +1,7 @@
|
||||
import { z } from 'zod';
|
||||
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
|
||||
|
||||
import { createListResultSchema } from '#root/utils/utils.schema.ts';
|
||||
import { queryFilterSchema } from '#root/utils/utils.query.ts';
|
||||
import { createListResultSchema } from '#root/utils/utils.schema.js';
|
||||
|
||||
const documentChunkSchema = z.object({
|
||||
id: z.string(),
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
import { DatabaseService, tableNames, type TableRows } from '../database/database.ts';
|
||||
import { EmbeddingsService } from '../embeddings/embeddings.ts';
|
||||
import { QueryParser } from '@morten-olsen/stash-query-dsl';
|
||||
|
||||
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.ts';
|
||||
import { mapFromDocumentChunkRow } from './document.mappings.ts';
|
||||
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
|
||||
import { EmbeddingsService } from '../embeddings/embeddings.js';
|
||||
|
||||
import type { Services } from '#root/utils/utils.services.ts';
|
||||
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
|
||||
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.js';
|
||||
import { mapFromDocumentChunkRow } from './document-chunks.mappings.js';
|
||||
|
||||
import type { Services } from '#root/utils/utils.services.js';
|
||||
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
|
||||
import type { ExplicitAny } from '#root/global.js';
|
||||
import { applyQueryFilter } from '#root/utils/utils.query.ts';
|
||||
import { QueryParser } from '#root/query-parser/query-parser.ts';
|
||||
import { applyQueryFilter } from '#root/utils/utils.query.js';
|
||||
|
||||
const baseFields = [
|
||||
`${tableNames.documentChunks}.*`,
|
||||
@@ -61,5 +62,5 @@ class DocumentChunksService {
|
||||
};
|
||||
}
|
||||
|
||||
export * from './document-chunks.schemas.ts';
|
||||
export * from './document-chunks.schemas.js';
|
||||
export { DocumentChunksService };
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { TableRows } from '../database/database.ts';
|
||||
import type { TableRows } from '../database/database.js';
|
||||
|
||||
import type { Document } from './documents.schemas.ts';
|
||||
import type { Document } from './documents.schemas.js';
|
||||
|
||||
const mapFromDocumentRow = (row: TableRows['documents']): Document => ({
|
||||
...row,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { z } from 'zod';
|
||||
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
|
||||
|
||||
import { createListResultSchema } from '#root/utils/utils.schema.ts';
|
||||
import { queryFilterSchema } from '#root/utils/utils.query.ts';
|
||||
import { createListResultSchema } from '#root/utils/utils.schema.js';
|
||||
|
||||
const documentSchema = z.object({
|
||||
id: z.string(),
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import { DatabaseService, tableNames, type TableRows } from '../database/database.ts';
|
||||
import { SplittingService } from '../splitter/splitter.ts';
|
||||
import { QueryParser } from '@morten-olsen/stash-query-dsl';
|
||||
|
||||
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
|
||||
import { SplittingService } from '../splitter/splitter.js';
|
||||
|
||||
import type {
|
||||
Document,
|
||||
@@ -8,13 +10,12 @@ import type {
|
||||
DocumentUpsert,
|
||||
DocumentUpsertResult,
|
||||
} from './documents.schemas.ts';
|
||||
import { mapFromDocumentRow } from './documents.mapping.ts';
|
||||
import { mapFromDocumentRow } from './documents.mapping.js';
|
||||
|
||||
import { EventEmitter } from '#root/utils/utils.event-emitter.ts';
|
||||
import type { Services } from '#root/utils/utils.services.ts';
|
||||
import { compareObjectKeys } from '#root/utils/utils.compare.ts';
|
||||
import { applyQueryFilter } from '#root/utils/utils.query.ts';
|
||||
import { QueryParser } from '#root/query-parser/query-parser.ts';
|
||||
import { EventEmitter } from '#root/utils/utils.event-emitter.js';
|
||||
import type { Services } from '#root/utils/utils.services.js';
|
||||
import { compareObjectKeys } from '#root/utils/utils.compare.js';
|
||||
import { applyQueryFilter } from '#root/utils/utils.query.js';
|
||||
|
||||
type DocumentsServiceEvents = {
|
||||
upserted: (document: Document) => void;
|
||||
@@ -174,5 +175,5 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
||||
};
|
||||
}
|
||||
|
||||
export * from './documents.schemas.ts';
|
||||
export * from './documents.schemas.js';
|
||||
export { DocumentsService };
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
|
||||
|
||||
import { Vector } from './embeddings.vector.ts';
|
||||
import { Vector } from './embeddings.vector.js';
|
||||
|
||||
import type { ExplicitAny } from '#root/global.js';
|
||||
|
||||
type ExtractOptions = {
|
||||
input: string[];
|
||||
@@ -57,4 +59,4 @@ class EmbeddingsService {
|
||||
};
|
||||
}
|
||||
|
||||
export { EmbeddingsService };
|
||||
export { EmbeddingsService, Vector };
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { EmbeddingsService } from '../embeddings/embeddings.ts';
|
||||
import type { Document } from '../documents/documents.schemas.ts';
|
||||
import { EmbeddingsService } from '../embeddings/embeddings.js';
|
||||
import type { Document } from '../documents/documents.schemas.js';
|
||||
|
||||
import type { Chunk, Splitter } from './splitter.types.ts';
|
||||
import { textSplitter } from './splitters/splitters.text.ts';
|
||||
import type { Chunk, Splitter } from './splitter.types.js';
|
||||
import { textSplitter } from './splitters/splitters.text.js';
|
||||
|
||||
import type { Services } from '#root/utils/utils.services.ts';
|
||||
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
|
||||
import type { Services } from '#root/utils/utils.services.js';
|
||||
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
|
||||
|
||||
class SplittingService {
|
||||
#services: Services;
|
||||
@@ -40,5 +40,5 @@ class SplittingService {
|
||||
};
|
||||
}
|
||||
|
||||
export * from './splitter.types.ts';
|
||||
export * from './splitter.types.js';
|
||||
export { SplittingService };
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { Document } from '../documents/documents.schemas.ts';
|
||||
import type { Vector } from '../embeddings/embeddings.vector.ts';
|
||||
import type { Document } from '../documents/documents.schemas.js';
|
||||
import type { Vector } from '../embeddings/embeddings.vector.js';
|
||||
|
||||
type Chunk = {
|
||||
content: string;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
||||
|
||||
import type { Splitter } from '../splitter.types.ts';
|
||||
import type { Splitter } from '../splitter.types.js';
|
||||
|
||||
const textSplitter: Splitter = {
|
||||
match: (document) => !!document.content,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { DatabaseService } from '../database/database.ts';
|
||||
import { DatabaseService } from '../database/database.js';
|
||||
|
||||
import { Services } from '#root/utils/utils.services.ts';
|
||||
import { Services } from '#root/utils/utils.services.js';
|
||||
|
||||
class WarmupService {
|
||||
#services: Services;
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import type { ExplicitAny } from '#root/global.js';
|
||||
|
||||
type EventListener<T extends unknown[]> = (...args: T) => void | Promise<void>;
|
||||
|
||||
type OnOptions = {
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import type {
|
||||
QueryCondition,
|
||||
QueryConditionNumber,
|
||||
QueryConditionText,
|
||||
QueryFilter,
|
||||
} from '@morten-olsen/stash-query-dsl';
|
||||
import { type Knex } from 'knex';
|
||||
import { z } from 'zod';
|
||||
|
||||
/**
|
||||
* Escapes a JSON key for use in PostgreSQL JSON operators.
|
||||
* Escapes single quotes by doubling them, which is the PostgreSQL standard.
|
||||
@@ -30,74 +34,6 @@ const getFieldSelector = (query: Knex.QueryBuilder, field: string[], tableName?:
|
||||
return query.client.raw(sqlExpression);
|
||||
};
|
||||
|
||||
const queryConditionTextSchema = z
|
||||
.object({
|
||||
type: z.literal('text'),
|
||||
tableName: z.string().optional(),
|
||||
field: z.array(z.string()),
|
||||
conditions: z.object({
|
||||
equal: z.string().nullish(),
|
||||
notEqual: z.string().optional(),
|
||||
like: z.string().optional(),
|
||||
notLike: z.string().optional(),
|
||||
in: z.array(z.string()).optional(),
|
||||
notIn: z.array(z.string()).optional(),
|
||||
}),
|
||||
})
|
||||
.meta({
|
||||
example: {
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
examples: [
|
||||
{
|
||||
summary: 'Equal condition',
|
||||
value: {
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
summary: 'Like condition',
|
||||
value: {
|
||||
type: 'text',
|
||||
field: ['content'],
|
||||
conditions: {
|
||||
like: '%cat%',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
summary: 'In condition',
|
||||
value: {
|
||||
type: 'text',
|
||||
field: ['type'],
|
||||
conditions: {
|
||||
in: ['demo', 'article', 'post'],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
summary: 'Null check',
|
||||
value: {
|
||||
type: 'text',
|
||||
field: ['source'],
|
||||
conditions: {
|
||||
equal: null,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
type QueryConditionText = z.infer<typeof queryConditionTextSchema>;
|
||||
|
||||
const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionText) => {
|
||||
const selector = getFieldSelector(query, field, tableName);
|
||||
if (conditions.equal) {
|
||||
@@ -127,77 +63,6 @@ const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, c
|
||||
return query;
|
||||
};
|
||||
|
||||
const queryConditionNumberSchema = z
|
||||
.object({
|
||||
type: z.literal('number'),
|
||||
tableName: z.string().optional(),
|
||||
field: z.array(z.string()),
|
||||
conditions: z.object({
|
||||
equals: z.number().nullish(),
|
||||
notEquals: z.number().nullish(),
|
||||
greaterThan: z.number().optional(),
|
||||
greaterThanOrEqual: z.number().optional(),
|
||||
lessThan: z.number().optional(),
|
||||
lessThanOrEqual: z.number().optional(),
|
||||
in: z.array(z.number()).optional(),
|
||||
notIn: z.array(z.number()).optional(),
|
||||
}),
|
||||
})
|
||||
.meta({
|
||||
example: {
|
||||
type: 'number',
|
||||
field: ['typeVersion'],
|
||||
conditions: {
|
||||
equals: 1,
|
||||
},
|
||||
},
|
||||
examples: [
|
||||
{
|
||||
summary: 'Equals condition',
|
||||
value: {
|
||||
type: 'number',
|
||||
field: ['typeVersion'],
|
||||
conditions: {
|
||||
equals: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
summary: 'Greater than condition',
|
||||
value: {
|
||||
type: 'number',
|
||||
field: ['typeVersion'],
|
||||
conditions: {
|
||||
greaterThan: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
summary: 'Range condition',
|
||||
value: {
|
||||
type: 'number',
|
||||
field: ['typeVersion'],
|
||||
conditions: {
|
||||
greaterThanOrEqual: 1,
|
||||
lessThanOrEqual: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
summary: 'In condition',
|
||||
value: {
|
||||
type: 'number',
|
||||
field: ['typeVersion'],
|
||||
conditions: {
|
||||
in: [1, 2, 3],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
type QueryConditionNumber = z.infer<typeof queryConditionNumberSchema>;
|
||||
|
||||
const applyQueryConditionNumber = (
|
||||
query: Knex.QueryBuilder,
|
||||
{ field, tableName, conditions }: QueryConditionNumber,
|
||||
@@ -236,10 +101,6 @@ const applyQueryConditionNumber = (
|
||||
return query;
|
||||
};
|
||||
|
||||
const queryConditionSchema = z.discriminatedUnion('type', [queryConditionTextSchema, queryConditionNumberSchema]);
|
||||
|
||||
type QueryCondition = z.infer<typeof queryConditionSchema>;
|
||||
|
||||
const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) => {
|
||||
switch (options.type) {
|
||||
case 'text': {
|
||||
@@ -254,254 +115,6 @@ const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition)
|
||||
}
|
||||
};
|
||||
|
||||
type QueryFilter = QueryCondition | QueryOperator;
|
||||
|
||||
type QueryOperator = {
|
||||
type: 'operator';
|
||||
operator: 'and' | 'or';
|
||||
conditions: QueryFilter[];
|
||||
};
|
||||
|
||||
// Create a depth-limited recursive schema for OpenAPI compatibility
|
||||
// This supports up to 3 levels of nesting, which should be sufficient for most use cases
|
||||
// OpenAPI cannot handle z.lazy(), so we manually define the nesting
|
||||
// If you need deeper nesting, you can add more levels (Level3, Level4, etc.)
|
||||
const queryFilterSchemaLevel0: z.ZodType<QueryFilter> = z.union([
|
||||
queryConditionSchema,
|
||||
z
|
||||
.object({
|
||||
type: z.literal('operator'),
|
||||
operator: z.enum(['and', 'or']),
|
||||
conditions: z.array(queryConditionSchema),
|
||||
})
|
||||
.meta({
|
||||
example: {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
examples: [
|
||||
{
|
||||
summary: 'AND operator',
|
||||
value: {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['type'],
|
||||
conditions: {
|
||||
equal: 'demo',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
summary: 'OR operator',
|
||||
value: {
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'baz',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
]);
|
||||
|
||||
const queryFilterSchemaLevel1: z.ZodType<QueryFilter> = z.union([
|
||||
queryConditionSchema,
|
||||
z
|
||||
.object({
|
||||
type: z.literal('operator'),
|
||||
operator: z.enum(['and', 'or']),
|
||||
conditions: z.array(queryFilterSchemaLevel0),
|
||||
})
|
||||
.meta({
|
||||
example: {
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'baz',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
examples: [
|
||||
{
|
||||
summary: 'Nested AND within OR',
|
||||
value: {
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['type'],
|
||||
conditions: {
|
||||
equal: 'demo',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'baz',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
]);
|
||||
|
||||
const queryFilterSchemaLevel2: z.ZodType<QueryFilter> = z.union([
|
||||
queryConditionSchema,
|
||||
z
|
||||
.object({
|
||||
type: z.literal('operator'),
|
||||
operator: z.enum(['and', 'or']),
|
||||
conditions: z.array(queryFilterSchemaLevel1),
|
||||
})
|
||||
.meta({
|
||||
example: {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'baz',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['type'],
|
||||
conditions: {
|
||||
equal: 'demo',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
examples: [
|
||||
{
|
||||
summary: 'Complex nested query',
|
||||
value: {
|
||||
type: 'operator',
|
||||
operator: 'and',
|
||||
conditions: [
|
||||
{
|
||||
type: 'operator',
|
||||
operator: 'or',
|
||||
conditions: [
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'bar',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['metadata', 'foo'],
|
||||
conditions: {
|
||||
equal: 'baz',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
field: ['type'],
|
||||
conditions: {
|
||||
equal: 'demo',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
]);
|
||||
|
||||
// Export the depth-limited schema (supports 3 levels of nesting)
|
||||
// This works with OpenAPI schema generation
|
||||
const queryFilterSchema = queryFilterSchemaLevel2;
|
||||
|
||||
const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
|
||||
if (filter.type === 'operator') {
|
||||
if (filter.conditions.length === 0) {
|
||||
@@ -545,5 +158,4 @@ const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
|
||||
}
|
||||
};
|
||||
|
||||
export type { QueryConditionText, QueryConditionNumber, QueryOperator, QueryCondition, QueryFilter };
|
||||
export { applyQueryCondition, queryConditionSchema, queryFilterSchema, applyQueryFilter };
|
||||
export { applyQueryCondition, applyQueryFilter };
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"paths": {
|
||||
"#root/*": [
|
||||
"./src/*"
|
||||
|
||||
Reference in New Issue
Block a user