This commit is contained in:
Morten Olsen
2025-12-10 09:11:03 +01:00
parent 9f9bc03d03
commit f9494c88e2
74 changed files with 2004 additions and 1035 deletions

View File

@@ -1,336 +0,0 @@
# Query Language Specification
This document describes the SQL-like query language syntax for building database queries. The language supports filtering on both text and numeric fields, including nested JSON fields, with logical operators for complex queries.
## Overview
The query language provides a human-readable, SQL-like syntax that can be parsed into the internal JSON query format used by the system. It supports:
- Text field conditions (equality, pattern matching, membership)
- Numeric field conditions (comparison operators, membership)
- Nested JSON field access using dot notation
- Logical operators (AND, OR) with grouping
- NULL value checks
## Syntax
### Field References
Fields are referenced using dot notation for nested JSON paths:
```
field_name
metadata.foo
metadata.nested.deep.field
```
**Examples:**
- `content` - top-level field
- `metadata.author` - nested field in metadata object
- `metadata.tags.0` - array element (if needed)
### Text Conditions
Text conditions operate on string values:
| Operator | Syntax | Description |
|----------|--------|-------------|
| Equality | `field = 'value'` | Exact match |
| Inequality | `field != 'value'` | Not equal |
| NULL check | `field IS NULL` | Field is null |
| NOT NULL | `field IS NOT NULL` | Field is not null |
| Pattern match | `field LIKE 'pattern'` | SQL LIKE pattern matching |
| Not like | `field NOT LIKE 'pattern'` | Negated pattern matching |
| In list | `field IN ('val1', 'val2', 'val3')` | Value in list |
| Not in list | `field NOT IN ('val1', 'val2')` | Value not in list |
**String Literals:**
- Single quotes: `'value'`
- Escaped quotes: `'O''Brien'` (double single quote)
- Empty string: `''`
**LIKE Patterns:**
- `%` matches any sequence of characters
- `_` matches any single character
- Examples: `'%cat%'`, `'test_%'`, `'exact'`
**Examples:**
```sql
content = 'hello world'
metadata.foo = 'bar'
type != 'draft'
source IS NULL
title LIKE '%cat%'
author NOT LIKE '%admin%'
status IN ('published', 'archived')
category NOT IN ('deleted', 'hidden')
```
### Numeric Conditions
Numeric conditions operate on number values:
| Operator | Syntax | Description |
|----------|--------|-------------|
| Equality | `field = 123` | Exact match |
| Inequality | `field != 123` | Not equal |
| NULL check | `field IS NULL` | Field is null |
| NOT NULL | `field IS NOT NULL` | Field is not null |
| Greater than | `field > 10` | Greater than |
| Greater or equal | `field >= 10` | Greater than or equal |
| Less than | `field < 10` | Less than |
| Less or equal | `field <= 10` | Less than or equal |
| In list | `field IN (1, 2, 3)` | Value in list |
| Not in list | `field NOT IN (1, 2, 3)` | Value not in list |
**Numeric Literals:**
- Integers: `123`, `-45`, `0`
- Decimals: `123.45`, `-0.5`, `3.14159`
- Scientific notation: `1e10`, `2.5e-3` (if supported)
**Examples:**
```sql
typeVersion = 1
score > 0.5
views >= 100
priority < 5
age <= 65
rating IN (1, 2, 3, 4, 5)
count NOT IN (0, -1)
```
### Logical Operators
Combine conditions using `AND` and `OR` operators:
| Operator | Syntax | Description |
|----------|--------|-------------|
| AND | `condition1 AND condition2` | Both conditions must be true |
| OR | `condition1 OR condition2` | At least one condition must be true |
**Grouping:**
Use parentheses `()` to group conditions and control operator precedence:
```sql
(condition1 AND condition2) OR condition3
condition1 AND (condition2 OR condition3)
```
**Examples:**
```sql
type = 'article' AND status = 'published'
metadata.foo = 'bar' OR metadata.foo = 'baz'
(type = 'post' OR type = 'page') AND views > 100
```
### Operator Precedence
1. Parentheses `()` - highest precedence
2. `AND` - evaluated before OR
3. `OR` - lowest precedence
**Examples:**
```sql
-- Equivalent to: (A AND B) OR C
A AND B OR C
-- Equivalent to: A AND (B OR C)
A AND (B OR C)
-- Explicit grouping
(A OR B) AND (C OR D)
```
## Complete Examples
### Simple Conditions
```sql
-- Text equality
metadata.author = 'John Doe'
-- Numeric comparison
views >= 1000
-- Pattern matching
title LIKE '%tutorial%'
-- NULL check
source IS NULL
```
### Multiple Conditions
```sql
-- AND operator
type = 'article' AND status = 'published' AND views > 100
-- OR operator
category = 'tech' OR category = 'science'
-- Mixed operators
(type = 'post' OR type = 'page') AND published = true
```
### Complex Nested Queries
```sql
-- Nested AND within OR
(metadata.foo = 'bar' AND type = 'demo') OR metadata.foo = 'baz'
-- Multiple levels of nesting
((status = 'active' AND views > 100) OR (status = 'featured' AND views > 50)) AND category = 'news'
-- Complex query with multiple field types
type = 'article' AND (metadata.author = 'John' OR metadata.author = 'Jane') AND views >= 100 AND rating IN (4, 5)
```
### Array/List Operations
```sql
-- Text IN
status IN ('published', 'archived', 'draft')
-- Numeric IN
priority IN (1, 2, 3)
-- NOT IN
category NOT IN ('deleted', 'hidden')
```
## Type Inference
The parser will infer the condition type (text vs number) based on:
1. **Operator context**: Operators like `>`, `<`, `>=`, `<=` imply numeric
2. **Value type**:
- Quoted strings (`'value'`) → text condition
- Unquoted numbers (`123`, `45.6`) → numeric condition
- `NULL` → can be either (context-dependent)
3. **Field name**: If a field is known to be numeric, numeric operators are used
**Examples:**
```sql
-- Text condition (quoted string)
author = 'John'
-- Numeric condition (unquoted number)
age = 30
-- Numeric comparison
score > 0.5
-- Text pattern
title LIKE '%test%'
```
## Escaping and Special Characters
### String Escaping
- Single quotes in strings: `'O''Brien'``O'Brien`
- Empty string: `''`
### Field Name Escaping
If field names contain special characters or reserved words, they can be quoted (implementation-dependent):
```sql
-- Reserved words or special characters (if supported)
"order" = 'asc'
"metadata.field-name" = 'value'
```
## Error Handling
The parser should provide clear error messages for:
- Invalid syntax
- Mismatched parentheses
- Invalid operators for field types
- Missing values
- Invalid escape sequences
## Grammar (BNF-like)
```
query ::= expression
expression ::= condition | group
group ::= '(' expression ')'
| expression AND expression
| expression OR expression
condition ::= text_condition | numeric_condition
text_condition ::= field ( '=' | '!=' | 'LIKE' | 'NOT LIKE' ) string_literal
| field 'IS' ( 'NULL' | 'NOT NULL' )
| field 'IN' '(' string_list ')'
| field 'NOT IN' '(' string_list ')'
numeric_condition ::= field ( '=' | '!=' | '>' | '>=' | '<' | '<=' ) number
| field 'IS' ( 'NULL' | 'NOT NULL' )
| field 'IN' '(' number_list ')'
| field 'NOT IN' '(' number_list ')'
field ::= identifier ( '.' identifier )*
identifier ::= [a-zA-Z_][a-zA-Z0-9_]*
string_literal ::= "'" ( escaped_char | [^'] )* "'"
escaped_char ::= "''"
string_list ::= string_literal ( ',' string_literal )*
number ::= [0-9]+ ( '.' [0-9]+ )? ( [eE] [+-]? [0-9]+ )?
number_list ::= number ( ',' number )*
```
## Migration from JSON Format
The SQL-like syntax maps to the JSON format as follows:
**JSON:**
```json
{
"type": "text",
"field": ["metadata", "foo"],
"conditions": {
"equal": "bar"
}
}
```
**SQL:**
```sql
metadata.foo = 'bar'
```
**JSON (with operator):**
```json
{
"type": "operator",
"operator": "and",
"conditions": [
{
"type": "text",
"field": ["metadata", "foo"],
"conditions": {
"equal": "bar"
}
},
{
"type": "text",
"field": ["type"],
"conditions": {
"equal": "demo"
}
}
]
}
```
**SQL:**
```sql
metadata.foo = 'bar' AND type = 'demo'
```
## Implementation Notes
1. **Whitespace**: Whitespace is generally ignored except within string literals
2. **Case sensitivity**:
- Operators (`AND`, `OR`, `LIKE`, etc.) are case-insensitive
- Field names and string values are case-sensitive
3. **Comments**: Not supported in initial version (can be added later)
4. **Table prefixes**: The parser may support optional table name prefixes (e.g., `documents.metadata.foo`) if needed

View File

@@ -37,6 +37,7 @@
"@fastify/websocket": "11.2.0",
"@huggingface/transformers": "^3.8.1",
"@langchain/textsplitters": "^1.0.1",
"@morten-olsen/stash-query-dsl": "workspace:*",
"@scalar/fastify-api-reference": "1.40.2",
"better-sqlite3": "^12.5.0",
"deep-equal": "^2.2.3",

View File

@@ -10,12 +10,12 @@ import {
type ZodTypeProvider,
} from 'fastify-type-provider-zod';
import { Services } from './utils/utils.services.ts';
import { systemEndpoints } from './endpoints/system/system.ts';
import { WarmupService } from './services/warmup/warmup.ts';
import { documentEndpoints } from './endpoints/documents/documents.ts';
import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.ts';
import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.ts';
import { Services } from './utils/utils.services.js';
import { systemEndpoints } from './endpoints/system/system.js';
import { WarmupService } from './services/warmup/warmup.js';
import { documentEndpoints } from './endpoints/documents/documents.js';
import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.js';
import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.js';
class BaseError extends Error {
public statusCode: number;

View File

@@ -1,6 +1,6 @@
import { createApi } from './api.js';
import { DocumentsService, type DocumentUpsert } from './services/documents/documents.ts';
import { Services } from './utils/utils.services.ts';
import { DocumentsService, type DocumentUpsert } from './services/documents/documents.js';
import { Services } from './utils/utils.services.js';
const services = new Services();
const server = await createApi(services);

View File

@@ -4,7 +4,7 @@ import {
documentChunkFilterSchema,
documentChunksFindResultSchema,
DocumentChunksService,
} from '#root/services/document-chunks/document-chunks.ts';
} from '#root/services/document-chunks/document-chunks.js';
const documentChunkFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({

View File

@@ -4,7 +4,7 @@ import {
documentFilterSchema,
documentFindResultSchema,
DocumentsService,
} from '#root/services/documents/documents.ts';
} from '#root/services/documents/documents.js';
const documentFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({

View File

@@ -4,7 +4,7 @@ import {
DocumentsService,
documentUpsertResultSchema,
documentUpsertSchema,
} from '#root/services/documents/documents.ts';
} from '#root/services/documents/documents.js';
const documentEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({

View File

@@ -1,7 +1,7 @@
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import { z } from 'zod';
import { DatabaseService } from '#root/services/database/database.ts';
import { DatabaseService } from '#root/services/database/database.js';
const systemEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({

View File

@@ -1,202 +0,0 @@
import type { Token } from './query-parser.types.ts';
class Lexer {
#input: string;
#position = 0;
#tokens: Token[] = [];
constructor(input: string) {
this.#input = input;
}
#skipWhitespace = (): void => {
while (this.#position < this.#input.length && /\s/.test(this.#input[this.#position])) {
this.#position++;
}
};
#nextToken = (): Token | null => {
const char = this.#input[this.#position];
const startPosition = this.#position;
// Single character tokens
if (char === '(') {
this.#position++;
return { type: 'LPAREN', value: '(', position: startPosition };
}
if (char === ')') {
this.#position++;
return { type: 'RPAREN', value: ')', position: startPosition };
}
if (char === ',') {
this.#position++;
return { type: 'COMMA', value: ',', position: startPosition };
}
if (char === '.') {
this.#position++;
return { type: 'DOT', value: '.', position: startPosition };
}
// Two-character operators
if (char === '!' && this.#input[this.#position + 1] === '=') {
this.#position += 2;
return { type: 'NOT_EQUALS', value: '!=', position: startPosition };
}
if (char === '>' && this.#input[this.#position + 1] === '=') {
this.#position += 2;
return { type: 'GREATER_THAN_OR_EQUAL', value: '>=', position: startPosition };
}
if (char === '<' && this.#input[this.#position + 1] === '=') {
this.#position += 2;
return { type: 'LESS_THAN_OR_EQUAL', value: '<=', position: startPosition };
}
// Single character operators
if (char === '=') {
this.#position++;
return { type: 'EQUALS', value: '=', position: startPosition };
}
if (char === '>') {
this.#position++;
return { type: 'GREATER_THAN', value: '>', position: startPosition };
}
if (char === '<') {
this.#position++;
return { type: 'LESS_THAN', value: '<', position: startPosition };
}
// String literal
if (char === "'") {
return this.#readString();
}
// Number
if (/[0-9]/.test(char) || (char === '-' && /[0-9]/.test(this.#input[this.#position + 1]))) {
return this.#readNumber();
}
// Identifier or keyword
if (/[a-zA-Z_]/.test(char)) {
return this.#readIdentifierOrKeyword();
}
throw new Error(`Unexpected character '${char}' at position ${this.#position}`);
};
#readString = (): Token => {
const startPosition = this.#position;
this.#position++; // Skip opening quote
let value = '';
while (this.#position < this.#input.length) {
const char = this.#input[this.#position];
if (char === "'") {
// Check for escaped quote
if (this.#input[this.#position + 1] === "'") {
value += "'";
this.#position += 2;
} else {
this.#position++; // Skip closing quote
return { type: 'STRING', value, position: startPosition };
}
} else {
value += char;
this.#position++;
}
}
throw new Error(`Unterminated string starting at position ${startPosition}`);
};
#readNumber = (): Token => {
const startPosition = this.#position;
let value = '';
// Optional minus sign
if (this.#input[this.#position] === '-') {
value += '-';
this.#position++;
}
// Integer part
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
value += this.#input[this.#position];
this.#position++;
}
// Decimal part
if (this.#input[this.#position] === '.' && /[0-9]/.test(this.#input[this.#position + 1])) {
value += '.';
this.#position++;
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
value += this.#input[this.#position];
this.#position++;
}
}
// Scientific notation
if (this.#input[this.#position] === 'e' || this.#input[this.#position] === 'E') {
value += this.#input[this.#position];
this.#position++;
if (this.#input[this.#position] === '+' || this.#input[this.#position] === '-') {
value += this.#input[this.#position];
this.#position++;
}
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
value += this.#input[this.#position];
this.#position++;
}
}
return { type: 'NUMBER', value, position: startPosition };
};
#readIdentifierOrKeyword = (): Token => {
const startPosition = this.#position;
let value = '';
while (this.#position < this.#input.length && /[a-zA-Z0-9_]/.test(this.#input[this.#position])) {
value += this.#input[this.#position];
this.#position++;
}
const upperValue = value.toUpperCase();
// Keywords
switch (upperValue) {
case 'AND':
return { type: 'AND', value, position: startPosition };
case 'OR':
return { type: 'OR', value, position: startPosition };
case 'LIKE':
return { type: 'LIKE', value, position: startPosition };
case 'NOT':
return { type: 'NOT', value, position: startPosition };
case 'IN':
return { type: 'IN', value, position: startPosition };
case 'IS':
return { type: 'IS', value, position: startPosition };
case 'NULL':
return { type: 'NULL', value, position: startPosition };
default:
return { type: 'IDENTIFIER', value, position: startPosition };
}
};
public tokenize = (): Token[] => {
while (this.#position < this.#input.length) {
this.#skipWhitespace();
if (this.#position >= this.#input.length) break;
const token = this.#nextToken();
if (token) {
this.#tokens.push(token);
}
}
this.#tokens.push({ type: 'EOF', value: '', position: this.#position });
return this.#tokens;
};
}
export { Lexer };

View File

@@ -1,317 +0,0 @@
import { Lexer } from './query-parser.lexer.ts';
import type { Token, TokenType } from './query-parser.types.ts';
import type { QueryConditionText, QueryConditionNumber, QueryFilter, QueryCondition } from '#root/utils/utils.query.ts';
class Parser {
#tokens: Token[] = [];
#position = 0;
#current = (): Token => {
return this.#tokens[this.#position];
};
#advance = (): Token => {
const token = this.#current();
this.#position++;
return token;
};
#expect = (type: TokenType): Token => {
const token = this.#current();
if (token.type !== type) {
throw new Error(`Expected ${type} but got ${token.type} at position ${token.position}`);
}
return this.#advance();
};
#parseExpression = (): QueryFilter => {
return this.#parseOr();
};
#parseOr = (): QueryFilter => {
let left = this.#parseAnd();
while (this.#current().type === 'OR') {
this.#advance();
const right = this.#parseAnd();
left = this.#combineWithOperator(left, right, 'or');
}
return left;
};
#parseAnd = (): QueryFilter => {
let left = this.#parsePrimary();
while (this.#current().type === 'AND') {
this.#advance();
const right = this.#parsePrimary();
left = this.#combineWithOperator(left, right, 'and');
}
return left;
};
#combineWithOperator = (left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter => {
// If left is already an operator of the same type, add to its conditions
if (left.type === 'operator' && left.operator === operator) {
return {
type: 'operator',
operator,
conditions: [...left.conditions, right],
};
}
return {
type: 'operator',
operator,
conditions: [left, right],
};
};
#parsePrimary = (): QueryFilter => {
// Handle parenthesized expressions
if (this.#current().type === 'LPAREN') {
this.#advance();
const expr = this.#parseExpression();
this.#expect('RPAREN');
return expr;
}
// Must be a condition
return this.#parseCondition();
};
#parseCondition = (): QueryCondition => {
const field = this.#parseField();
const token = this.#current();
// IS NULL / IS NOT NULL
if (token.type === 'IS') {
this.#advance();
const isNot = this.#current().type === 'NOT';
if (isNot) {
this.#advance();
}
this.#expect('NULL');
// IS NULL / IS NOT NULL could be either text or number - default to text
return {
type: 'text',
field,
conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null },
} satisfies QueryConditionText;
}
// NOT IN / NOT LIKE
if (token.type === 'NOT') {
this.#advance();
const nextToken = this.#current();
if (nextToken.type === 'IN') {
this.#advance();
return this.#parseInCondition(field, true);
}
if (nextToken.type === 'LIKE') {
this.#advance();
const pattern = this.#expect('STRING').value;
return {
type: 'text',
field,
conditions: { notLike: pattern },
};
}
throw new Error(`Expected IN or LIKE after NOT at position ${nextToken.position}`);
}
// IN
if (token.type === 'IN') {
this.#advance();
return this.#parseInCondition(field, false);
}
// LIKE
if (token.type === 'LIKE') {
this.#advance();
const pattern = this.#expect('STRING').value;
return {
type: 'text',
field,
conditions: { like: pattern },
};
}
// Comparison operators
if (token.type === 'EQUALS') {
this.#advance();
return this.#parseValueCondition(field, 'equals');
}
if (token.type === 'NOT_EQUALS') {
this.#advance();
return this.#parseValueCondition(field, 'notEquals');
}
if (token.type === 'GREATER_THAN') {
this.#advance();
const value = this.#parseNumber();
return {
type: 'number',
field,
conditions: { greaterThan: value },
};
}
if (token.type === 'GREATER_THAN_OR_EQUAL') {
this.#advance();
const value = this.#parseNumber();
return {
type: 'number',
field,
conditions: { greaterThanOrEqual: value },
};
}
if (token.type === 'LESS_THAN') {
this.#advance();
const value = this.#parseNumber();
return {
type: 'number',
field,
conditions: { lessThan: value },
};
}
if (token.type === 'LESS_THAN_OR_EQUAL') {
this.#advance();
const value = this.#parseNumber();
return {
type: 'number',
field,
conditions: { lessThanOrEqual: value },
};
}
throw new Error(`Unexpected token '${token.value}' at position ${token.position}`);
};
#parseField = (): string[] => {
const parts: string[] = [];
parts.push(this.#expect('IDENTIFIER').value);
while (this.#current().type === 'DOT') {
this.#advance();
parts.push(this.#expect('IDENTIFIER').value);
}
return parts;
};
#parseValueCondition = (field: string[], operator: 'equals' | 'notEquals'): QueryCondition => {
const token = this.#current();
if (token.type === 'STRING') {
this.#advance();
const textCondition: QueryConditionText = {
type: 'text',
field,
conditions: operator === 'equals' ? { equal: token.value } : { notEqual: token.value },
};
return textCondition;
}
if (token.type === 'NUMBER') {
this.#advance();
const value = parseFloat(token.value);
const numCondition: QueryConditionNumber = {
type: 'number',
field,
conditions: operator === 'equals' ? { equals: value } : { notEquals: value },
};
return numCondition;
}
if (token.type === 'NULL') {
this.#advance();
// NULL equality - default to text type
return {
type: 'text',
field,
conditions: operator === 'equals' ? { equal: null } : {},
} as QueryConditionText;
}
throw new Error(`Expected value but got ${token.type} at position ${token.position}`);
};
#parseNumber = (): number => {
const token = this.#expect('NUMBER');
return parseFloat(token.value);
};
#parseInCondition = (field: string[], isNot: boolean): QueryCondition => {
this.#expect('LPAREN');
const firstToken = this.#current();
if (firstToken.type === 'STRING') {
// Text IN
const values: string[] = [];
values.push(this.#advance().value);
while (this.#current().type === 'COMMA') {
this.#advance();
values.push(this.#expect('STRING').value);
}
this.#expect('RPAREN');
return {
type: 'text',
field,
conditions: isNot ? { notIn: values } : { in: values },
};
}
if (firstToken.type === 'NUMBER') {
// Numeric IN
const values: number[] = [];
values.push(parseFloat(this.#advance().value));
while (this.#current().type === 'COMMA') {
this.#advance();
values.push(parseFloat(this.#expect('NUMBER').value));
}
this.#expect('RPAREN');
return {
type: 'number',
field,
conditions: isNot ? { notIn: values } : { in: values },
};
}
throw new Error(`Expected STRING or NUMBER in IN list at position ${firstToken.position}`);
};
public parse(input: string): QueryFilter {
const lexer = new Lexer(input);
this.#tokens = lexer.tokenize();
this.#position = 0;
const result = this.#parseExpression();
if (this.#current().type !== 'EOF') {
throw new Error(`Unexpected token '${this.#current().value}' at position ${this.#current().position}`);
}
return result;
}
}
export { Parser };

View File

@@ -1,135 +0,0 @@
import type {
QueryFilter,
QueryOperator,
QueryCondition,
QueryConditionText,
QueryConditionNumber,
} from '#root/utils/utils.query.ts';
class Stringifier {
#stringifyFilter = (filter: QueryFilter, needsParens: boolean): string => {
if (filter.type === 'operator') {
return this.#stringifyOperator(filter, needsParens);
}
return this.#stringifyCondition(filter);
};
#stringifyOperator = (op: QueryOperator, needsParens: boolean): string => {
if (op.conditions.length === 0) {
return '';
}
if (op.conditions.length === 1) {
return this.#stringifyFilter(op.conditions[0], needsParens);
}
const operator = op.operator.toUpperCase();
const parts = op.conditions.map((condition) => {
// Child operators need parens if they have a different operator
const childNeedsParens = condition.type === 'operator' && condition.operator !== op.operator;
return this.#stringifyFilter(condition, childNeedsParens);
});
const result = parts.join(` ${operator} `);
return needsParens ? `(${result})` : result;
};
#stringifyCondition = (condition: QueryCondition): string => {
const fieldStr = condition.field.join('.');
if (condition.type === 'text') {
return this.#stringifyTextCondition(fieldStr, condition.conditions);
}
return this.#stringifyNumberCondition(fieldStr, condition.conditions);
};
#stringifyTextCondition = (field: string, conditions: QueryConditionText['conditions']): string => {
if (conditions.equal !== undefined) {
if (conditions.equal === null) {
return `${field} IS NULL`;
}
return `${field} = ${this.#escapeString(conditions.equal)}`;
}
if (conditions.notEqual !== undefined) {
return `${field} != ${this.#escapeString(conditions.notEqual)}`;
}
if (conditions.like !== undefined) {
return `${field} LIKE ${this.#escapeString(conditions.like)}`;
}
if (conditions.notLike !== undefined) {
return `${field} NOT LIKE ${this.#escapeString(conditions.notLike)}`;
}
if (conditions.in !== undefined) {
const values = conditions.in.map((v) => this.#escapeString(v)).join(', ');
return `${field} IN (${values})`;
}
if (conditions.notIn !== undefined) {
const values = conditions.notIn.map((v) => this.#escapeString(v)).join(', ');
return `${field} NOT IN (${values})`;
}
throw new Error('Invalid text condition: no condition specified');
};
#stringifyNumberCondition = (field: string, conditions: QueryConditionNumber['conditions']): string => {
if (conditions.equals !== undefined) {
if (conditions.equals === null) {
return `${field} IS NULL`;
}
return `${field} = ${conditions.equals}`;
}
if (conditions.notEquals !== undefined) {
if (conditions.notEquals === null) {
return `${field} IS NOT NULL`;
}
return `${field} != ${conditions.notEquals}`;
}
if (conditions.greaterThan !== undefined) {
return `${field} > ${conditions.greaterThan}`;
}
if (conditions.greaterThanOrEqual !== undefined) {
return `${field} >= ${conditions.greaterThanOrEqual}`;
}
if (conditions.lessThan !== undefined) {
return `${field} < ${conditions.lessThan}`;
}
if (conditions.lessThanOrEqual !== undefined) {
return `${field} <= ${conditions.lessThanOrEqual}`;
}
if (conditions.in !== undefined) {
const values = conditions.in.join(', ');
return `${field} IN (${values})`;
}
if (conditions.notIn !== undefined) {
const values = conditions.notIn.join(', ');
return `${field} NOT IN (${values})`;
}
throw new Error('Invalid number condition: no condition specified');
};
#escapeString = (value: string): string => {
const escaped = value.replace(/'/g, "''");
return `'${escaped}'`;
};
public stringify = (filter: QueryFilter): string => {
return this.#stringifyFilter(filter, false);
};
}
export { Stringifier };

View File

@@ -1,754 +0,0 @@
import { describe, it, expect } from 'vitest';
import { QueryParser } from './query-parser.ts';
import type { QueryConditionNumber, QueryConditionText, QueryFilter, QueryOperator } from '#root/utils/utils.query.ts';
describe('QueryParser', () => {
const parser = new QueryParser();
describe('parse', () => {
describe('text conditions', () => {
it('should parse simple text equality', () => {
const result = parser.parse("name = 'John'");
expect(result).toEqual({
type: 'text',
field: ['name'],
conditions: { equal: 'John' },
});
});
it('should parse nested field text equality', () => {
const result = parser.parse("metadata.author = 'John'");
expect(result).toEqual({
type: 'text',
field: ['metadata', 'author'],
conditions: { equal: 'John' },
});
});
it('should parse deeply nested field', () => {
const result = parser.parse("metadata.nested.deep.field = 'value'");
expect(result).toEqual({
type: 'text',
field: ['metadata', 'nested', 'deep', 'field'],
conditions: { equal: 'value' },
});
});
it('should parse text not equal', () => {
const result = parser.parse("type != 'draft'");
expect(result).toEqual({
type: 'text',
field: ['type'],
conditions: { notEqual: 'draft' },
});
});
it('should parse LIKE pattern', () => {
const result = parser.parse("title LIKE '%cat%'");
expect(result).toEqual({
type: 'text',
field: ['title'],
conditions: { like: '%cat%' },
});
});
it('should parse NOT LIKE pattern', () => {
const result = parser.parse("author NOT LIKE '%admin%'");
expect(result).toEqual({
type: 'text',
field: ['author'],
conditions: { notLike: '%admin%' },
});
});
it('should parse text IN list', () => {
const result = parser.parse("status IN ('published', 'archived', 'draft')");
expect(result).toEqual({
type: 'text',
field: ['status'],
conditions: { in: ['published', 'archived', 'draft'] },
});
});
it('should parse text NOT IN list', () => {
const result = parser.parse("category NOT IN ('deleted', 'hidden')");
expect(result).toEqual({
type: 'text',
field: ['category'],
conditions: { notIn: ['deleted', 'hidden'] },
});
});
it('should parse IS NULL', () => {
const result = parser.parse('source IS NULL');
expect(result).toEqual({
type: 'text',
field: ['source'],
conditions: { equal: null },
});
});
it('should handle escaped quotes in strings', () => {
const result = parser.parse("name = 'O''Brien'");
expect(result).toEqual({
type: 'text',
field: ['name'],
conditions: { equal: "O'Brien" },
});
});
it('should handle empty string', () => {
const result = parser.parse("name = ''");
expect(result).toEqual({
type: 'text',
field: ['name'],
conditions: { equal: '' },
});
});
});
describe('numeric conditions', () => {
it('should parse numeric equality', () => {
const result = parser.parse('age = 30');
expect(result).toEqual({
type: 'number',
field: ['age'],
conditions: { equals: 30 },
});
});
it('should parse numeric not equal', () => {
const result = parser.parse('count != 0');
expect(result).toEqual({
type: 'number',
field: ['count'],
conditions: { notEquals: 0 },
});
});
it('should parse greater than', () => {
const result = parser.parse('views > 100');
expect(result).toEqual({
type: 'number',
field: ['views'],
conditions: { greaterThan: 100 },
});
});
it('should parse greater than or equal', () => {
const result = parser.parse('views >= 100');
expect(result).toEqual({
type: 'number',
field: ['views'],
conditions: { greaterThanOrEqual: 100 },
});
});
it('should parse less than', () => {
const result = parser.parse('priority < 5');
expect(result).toEqual({
type: 'number',
field: ['priority'],
conditions: { lessThan: 5 },
});
});
it('should parse less than or equal', () => {
const result = parser.parse('age <= 65');
expect(result).toEqual({
type: 'number',
field: ['age'],
conditions: { lessThanOrEqual: 65 },
});
});
it('should parse decimal numbers', () => {
const result = parser.parse('score > 0.5');
expect(result).toEqual({
type: 'number',
field: ['score'],
conditions: { greaterThan: 0.5 },
});
});
it('should parse negative numbers', () => {
const result = parser.parse('temperature > -10');
expect(result).toEqual({
type: 'number',
field: ['temperature'],
conditions: { greaterThan: -10 },
});
});
it('should parse numeric IN list', () => {
const result = parser.parse('priority IN (1, 2, 3)');
expect(result).toEqual({
type: 'number',
field: ['priority'],
conditions: { in: [1, 2, 3] },
});
});
it('should parse numeric NOT IN list', () => {
const result = parser.parse('count NOT IN (0, -1)');
expect(result).toEqual({
type: 'number',
field: ['count'],
conditions: { notIn: [0, -1] },
});
});
it('should parse nested field numeric condition', () => {
const result = parser.parse('metadata.score >= 0.8');
expect(result).toEqual({
type: 'number',
field: ['metadata', 'score'],
conditions: { greaterThanOrEqual: 0.8 },
});
});
});
describe('logical operators', () => {
it('should parse AND operator', () => {
const result = parser.parse("type = 'article' AND status = 'published'");
expect(result).toEqual({
type: 'operator',
operator: 'and',
conditions: [
{ type: 'text', field: ['type'], conditions: { equal: 'article' } },
{ type: 'text', field: ['status'], conditions: { equal: 'published' } },
],
});
});
it('should parse OR operator', () => {
const result = parser.parse("category = 'tech' OR category = 'science'");
expect(result).toEqual({
type: 'operator',
operator: 'or',
conditions: [
{ type: 'text', field: ['category'], conditions: { equal: 'tech' } },
{ type: 'text', field: ['category'], conditions: { equal: 'science' } },
],
});
});
it('should parse multiple AND conditions', () => {
const result = parser.parse("type = 'article' AND status = 'published' AND views > 100");
expect(result).toEqual({
type: 'operator',
operator: 'and',
conditions: [
{ type: 'text', field: ['type'], conditions: { equal: 'article' } },
{ type: 'text', field: ['status'], conditions: { equal: 'published' } },
{ type: 'number', field: ['views'], conditions: { greaterThan: 100 } },
],
});
});
it('should parse multiple OR conditions', () => {
const result = parser.parse("type = 'a' OR type = 'b' OR type = 'c'");
expect(result).toEqual({
type: 'operator',
operator: 'or',
conditions: [
{ type: 'text', field: ['type'], conditions: { equal: 'a' } },
{ type: 'text', field: ['type'], conditions: { equal: 'b' } },
{ type: 'text', field: ['type'], conditions: { equal: 'c' } },
],
});
});
it('should respect AND precedence over OR', () => {
// A AND B OR C should be parsed as (A AND B) OR C
const result = parser.parse("a = '1' AND b = '2' OR c = '3'");
expect(result).toEqual({
type: 'operator',
operator: 'or',
conditions: [
{
type: 'operator',
operator: 'and',
conditions: [
{ type: 'text', field: ['a'], conditions: { equal: '1' } },
{ type: 'text', field: ['b'], conditions: { equal: '2' } },
],
},
{ type: 'text', field: ['c'], conditions: { equal: '3' } },
],
});
});
it('should parse parenthesized expressions', () => {
const result = parser.parse("(type = 'post' OR type = 'page') AND views > 100");
expect(result).toEqual({
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{ type: 'text', field: ['type'], conditions: { equal: 'post' } },
{ type: 'text', field: ['type'], conditions: { equal: 'page' } },
],
},
{ type: 'number', field: ['views'], conditions: { greaterThan: 100 } },
],
});
});
it('should parse nested parentheses', () => {
const result = parser.parse(
"((status = 'active' AND views > 100) OR (status = 'featured' AND views > 50)) AND category = 'news'",
);
expect(result).toEqual({
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{
type: 'operator',
operator: 'and',
conditions: [
{ type: 'text', field: ['status'], conditions: { equal: 'active' } },
{ type: 'number', field: ['views'], conditions: { greaterThan: 100 } },
],
},
{
type: 'operator',
operator: 'and',
conditions: [
{ type: 'text', field: ['status'], conditions: { equal: 'featured' } },
{ type: 'number', field: ['views'], conditions: { greaterThan: 50 } },
],
},
],
},
{ type: 'text', field: ['category'], conditions: { equal: 'news' } },
],
});
});
});
describe('case insensitivity', () => {
it('should parse lowercase AND', () => {
const result = parser.parse("a = '1' and b = '2'");
expect(result.type).toBe('operator');
expect((result as QueryOperator).operator).toBe('and');
});
it('should parse lowercase OR', () => {
const result = parser.parse("a = '1' or b = '2'");
expect(result.type).toBe('operator');
expect((result as QueryOperator).operator).toBe('or');
});
it('should parse mixed case LIKE', () => {
const result = parser.parse("title Like '%test%'");
expect(result).toEqual({
type: 'text',
field: ['title'],
conditions: { like: '%test%' },
});
});
it('should parse mixed case IS NULL', () => {
const result = parser.parse('field Is Null');
expect(result).toEqual({
type: 'text',
field: ['field'],
conditions: { equal: null },
});
});
it('should parse mixed case IN', () => {
const result = parser.parse("status In ('a', 'b')");
expect(result).toEqual({
type: 'text',
field: ['status'],
conditions: { in: ['a', 'b'] },
});
});
});
describe('whitespace handling', () => {
it('should handle extra whitespace', () => {
const result = parser.parse(" name = 'John' ");
expect(result).toEqual({
type: 'text',
field: ['name'],
conditions: { equal: 'John' },
});
});
it('should handle no whitespace around operators', () => {
const result = parser.parse("name='John'");
expect(result).toEqual({
type: 'text',
field: ['name'],
conditions: { equal: 'John' },
});
});
it('should handle tabs and newlines', () => {
const result = parser.parse("name\t=\n'John'");
expect(result).toEqual({
type: 'text',
field: ['name'],
conditions: { equal: 'John' },
});
});
});
describe('error handling', () => {
it('should throw on invalid syntax', () => {
expect(() => parser.parse('invalid')).toThrow();
});
it('should throw on mismatched parentheses', () => {
expect(() => parser.parse("(type = 'a'")).toThrow();
});
it('should throw on unterminated string', () => {
expect(() => parser.parse("name = 'unterminated")).toThrow(/Unterminated string/);
});
it('should throw on unexpected token', () => {
expect(() => parser.parse("name = 'a' INVALID")).toThrow();
});
it('should throw on missing value after operator', () => {
expect(() => parser.parse('name =')).toThrow();
});
});
});
describe('stringify', () => {
describe('text conditions', () => {
it('should stringify text equality', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['name'],
conditions: { equal: 'John' },
};
expect(parser.stringify(filter)).toBe("name = 'John'");
});
it('should stringify nested field', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['metadata', 'author'],
conditions: { equal: 'John' },
};
expect(parser.stringify(filter)).toBe("metadata.author = 'John'");
});
it('should stringify text not equal', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['type'],
conditions: { notEqual: 'draft' },
};
expect(parser.stringify(filter)).toBe("type != 'draft'");
});
it('should stringify LIKE', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['title'],
conditions: { like: '%cat%' },
};
expect(parser.stringify(filter)).toBe("title LIKE '%cat%'");
});
it('should stringify NOT LIKE', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['author'],
conditions: { notLike: '%admin%' },
};
expect(parser.stringify(filter)).toBe("author NOT LIKE '%admin%'");
});
it('should stringify text IN', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['status'],
conditions: { in: ['published', 'archived'] },
};
expect(parser.stringify(filter)).toBe("status IN ('published', 'archived')");
});
it('should stringify text NOT IN', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['category'],
conditions: { notIn: ['deleted', 'hidden'] },
};
expect(parser.stringify(filter)).toBe("category NOT IN ('deleted', 'hidden')");
});
it('should stringify IS NULL', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['source'],
conditions: { equal: null },
};
expect(parser.stringify(filter)).toBe('source IS NULL');
});
it('should escape quotes in strings', () => {
const filter: QueryConditionText = {
type: 'text',
field: ['name'],
conditions: { equal: "O'Brien" },
};
expect(parser.stringify(filter)).toBe("name = 'O''Brien'");
});
});
describe('numeric conditions', () => {
it('should stringify numeric equality', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['age'],
conditions: { equals: 30 },
};
expect(parser.stringify(filter)).toBe('age = 30');
});
it('should stringify numeric not equal', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['count'],
conditions: { notEquals: 0 },
};
expect(parser.stringify(filter)).toBe('count != 0');
});
it('should stringify greater than', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['views'],
conditions: { greaterThan: 100 },
};
expect(parser.stringify(filter)).toBe('views > 100');
});
it('should stringify greater than or equal', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['views'],
conditions: { greaterThanOrEqual: 100 },
};
expect(parser.stringify(filter)).toBe('views >= 100');
});
it('should stringify less than', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['priority'],
conditions: { lessThan: 5 },
};
expect(parser.stringify(filter)).toBe('priority < 5');
});
it('should stringify less than or equal', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['age'],
conditions: { lessThanOrEqual: 65 },
};
expect(parser.stringify(filter)).toBe('age <= 65');
});
it('should stringify decimal numbers', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['score'],
conditions: { greaterThan: 0.5 },
};
expect(parser.stringify(filter)).toBe('score > 0.5');
});
it('should stringify numeric IN', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['priority'],
conditions: { in: [1, 2, 3] },
};
expect(parser.stringify(filter)).toBe('priority IN (1, 2, 3)');
});
it('should stringify numeric NOT IN', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['count'],
conditions: { notIn: [0, -1] },
};
expect(parser.stringify(filter)).toBe('count NOT IN (0, -1)');
});
it('should stringify numeric IS NULL', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['score'],
conditions: { equals: null },
};
expect(parser.stringify(filter)).toBe('score IS NULL');
});
it('should stringify numeric IS NOT NULL', () => {
const filter: QueryConditionNumber = {
type: 'number',
field: ['score'],
conditions: { notEquals: null },
};
expect(parser.stringify(filter)).toBe('score IS NOT NULL');
});
});
describe('logical operators', () => {
it('should stringify AND operator', () => {
const filter: QueryFilter = {
type: 'operator',
operator: 'and',
conditions: [
{ type: 'text', field: ['type'], conditions: { equal: 'article' } },
{ type: 'text', field: ['status'], conditions: { equal: 'published' } },
],
};
expect(parser.stringify(filter)).toBe("type = 'article' AND status = 'published'");
});
it('should stringify OR operator', () => {
const filter: QueryFilter = {
type: 'operator',
operator: 'or',
conditions: [
{ type: 'text', field: ['category'], conditions: { equal: 'tech' } },
{ type: 'text', field: ['category'], conditions: { equal: 'science' } },
],
};
expect(parser.stringify(filter)).toBe("category = 'tech' OR category = 'science'");
});
it('should stringify nested operators with parentheses', () => {
const filter: QueryFilter = {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{ type: 'text', field: ['type'], conditions: { equal: 'post' } },
{ type: 'text', field: ['type'], conditions: { equal: 'page' } },
],
},
{ type: 'number', field: ['views'], conditions: { greaterThan: 100 } },
],
};
expect(parser.stringify(filter)).toBe("(type = 'post' OR type = 'page') AND views > 100");
});
it('should stringify empty operator', () => {
const filter: QueryFilter = {
type: 'operator',
operator: 'and',
conditions: [],
};
expect(parser.stringify(filter)).toBe('');
});
it('should stringify single-condition operator', () => {
const filter: QueryFilter = {
type: 'operator',
operator: 'and',
conditions: [{ type: 'text', field: ['name'], conditions: { equal: 'test' } }],
};
expect(parser.stringify(filter)).toBe("name = 'test'");
});
});
});
describe('roundtrip', () => {
const testCases = [
"name = 'John'",
"metadata.author = 'Jane'",
'views > 100',
'score >= 0.5',
"title LIKE '%cat%'",
"author NOT LIKE '%admin%'",
"status IN ('published', 'archived')",
'priority IN (1, 2, 3)',
"type = 'article' AND status = 'published'",
"category = 'tech' OR category = 'science'",
"(type = 'post' OR type = 'page') AND views > 100",
];
testCases.forEach((query) => {
it(`should roundtrip: ${query}`, () => {
const parsed = parser.parse(query);
const stringified = parser.stringify(parsed);
const reparsed = parser.parse(stringified);
expect(reparsed).toEqual(parsed);
});
});
});
describe('complex real-world queries', () => {
it('should handle complex query with multiple field types', () => {
const query = "type = 'article' AND (metadata.author = 'John' OR metadata.author = 'Jane') AND views >= 100";
const result = parser.parse(query);
expect(result.type).toBe('operator');
const operator = result as QueryOperator;
expect(operator.operator).toBe('and');
expect(operator.conditions).toHaveLength(3);
});
it('should handle nested JSON paths with conditions', () => {
const query = "metadata.nested.deep.value = 'test' AND metadata.nested.count > 10";
const result = parser.parse(query);
expect(result.type).toBe('operator');
const operator = result as QueryOperator;
const condition1 = operator.conditions[0] as QueryConditionText;
const condition2 = operator.conditions[1] as QueryConditionNumber;
expect(condition1.field).toEqual(['metadata', 'nested', 'deep', 'value']);
expect(condition2.field).toEqual(['metadata', 'nested', 'count']);
});
it('should handle query from documentation example', () => {
// From the JSON format in docs
const expectedJson: QueryFilter = {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: { equal: 'bar' },
},
{
type: 'text',
field: ['type'],
conditions: { equal: 'demo' },
},
],
};
const sql = "metadata.foo = 'bar' AND type = 'demo'";
const parsed = parser.parse(sql);
expect(parsed).toEqual(expectedJson);
});
});
});

View File

@@ -1,19 +0,0 @@
import { Stringifier } from './query-parser.stringifier.ts';
import { Parser } from './query-parser.parser.ts';
import type { QueryFilter } from '#root/utils/utils.query.ts';
class QueryParser {
private parser = new Parser();
private stringifier = new Stringifier();
public parse = (input: string): QueryFilter => {
return this.parser.parse(input);
};
public stringify = (filter: QueryFilter): string => {
return this.stringifier.stringify(filter);
};
}
export { QueryParser };

View File

@@ -1,30 +0,0 @@
type TokenType =
| 'IDENTIFIER'
| 'STRING'
| 'NUMBER'
| 'AND'
| 'OR'
| 'LIKE'
| 'NOT'
| 'IN'
| 'IS'
| 'NULL'
| 'EQUALS'
| 'NOT_EQUALS'
| 'GREATER_THAN'
| 'GREATER_THAN_OR_EQUAL'
| 'LESS_THAN'
| 'LESS_THAN_OR_EQUAL'
| 'LPAREN'
| 'RPAREN'
| 'COMMA'
| 'DOT'
| 'EOF';
type Token = {
type: TokenType;
value: string;
position: number;
};
export type { TokenType, Token };

View File

@@ -3,9 +3,9 @@ import ClientPgLite from 'knex-pglite';
import { PGlite } from '@electric-sql/pglite';
import { vector } from '@electric-sql/pglite/vector';
import { migrationSource } from './migrations/migrations.ts';
import { migrationSource } from './migrations/migrations.js';
import { destroy, Services } from '#root/utils/utils.services.ts';
import { destroy, Services } from '#root/utils/utils.services.js';
class DatabaseService {
#services: Services;
@@ -50,5 +50,5 @@ class DatabaseService {
};
}
export { type TableRows, tableNames } from './migrations/migrations.ts';
export { type TableRows, tableNames } from './migrations/migrations.js';
export { DatabaseService };

View File

@@ -1,7 +1,7 @@
import type { Migration } from './migrations.types.ts';
import type { Migration } from './migrations.types.js';
import { EmbeddingsService } from '#root/services/embeddings/embeddings.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
import { EmbeddingsService } from '#root/services/embeddings/embeddings.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
const tableNames = {
documents: 'documents',

View File

@@ -1,9 +1,9 @@
import type { Knex } from 'knex';
import type { Migration } from './migrations.types.ts';
import { init } from './migrations.001-init.ts';
import type { Migration } from './migrations.types.js';
import { init } from './migrations.001-init.js';
import type { Services } from '#root/utils/utils.services.ts';
import type { Services } from '#root/utils/utils.services.js';
const migrations = [init] satisfies Migration[];
@@ -21,5 +21,5 @@ const migrationSource = (options: MigrationSourceOptions): Knex.MigrationSource<
getMigrations: async () => migrations,
});
export { type TableRows, tableNames } from './migrations.001-init.ts';
export { type TableRows, tableNames } from './migrations.001-init.js';
export { migrationSource };

View File

@@ -1,6 +1,6 @@
import type { Knex } from 'knex';
import type { Services } from '#root/utils/utils.services.ts';
import type { Services } from '#root/utils/utils.services.js';
type MigrationOptions = {
knex: Knex;

View File

@@ -1,6 +1,6 @@
import type { TableRows } from '../database/database.ts';
import type { TableRows } from '../database/database.js';
import type { DocumentChunk } from './document-chunks.schemas.ts';
import type { DocumentChunk } from './document-chunks.schemas.js';
const mapFromDocumentChunkRow = (
row: TableRows['documentChunks'] & {

View File

@@ -1,7 +1,7 @@
import { z } from 'zod';
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
import { createListResultSchema } from '#root/utils/utils.schema.ts';
import { queryFilterSchema } from '#root/utils/utils.query.ts';
import { createListResultSchema } from '#root/utils/utils.schema.js';
const documentChunkSchema = z.object({
id: z.string(),

View File

@@ -1,14 +1,15 @@
import { DatabaseService, tableNames, type TableRows } from '../database/database.ts';
import { EmbeddingsService } from '../embeddings/embeddings.ts';
import { QueryParser } from '@morten-olsen/stash-query-dsl';
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.ts';
import { mapFromDocumentChunkRow } from './document.mappings.ts';
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
import { EmbeddingsService } from '../embeddings/embeddings.js';
import type { Services } from '#root/utils/utils.services.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.js';
import { mapFromDocumentChunkRow } from './document-chunks.mappings.js';
import type { Services } from '#root/utils/utils.services.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
import type { ExplicitAny } from '#root/global.js';
import { applyQueryFilter } from '#root/utils/utils.query.ts';
import { QueryParser } from '#root/query-parser/query-parser.ts';
import { applyQueryFilter } from '#root/utils/utils.query.js';
const baseFields = [
`${tableNames.documentChunks}.*`,
@@ -61,5 +62,5 @@ class DocumentChunksService {
};
}
export * from './document-chunks.schemas.ts';
export * from './document-chunks.schemas.js';
export { DocumentChunksService };

View File

@@ -1,6 +1,6 @@
import type { TableRows } from '../database/database.ts';
import type { TableRows } from '../database/database.js';
import type { Document } from './documents.schemas.ts';
import type { Document } from './documents.schemas.js';
const mapFromDocumentRow = (row: TableRows['documents']): Document => ({
...row,

View File

@@ -1,7 +1,7 @@
import { z } from 'zod';
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
import { createListResultSchema } from '#root/utils/utils.schema.ts';
import { queryFilterSchema } from '#root/utils/utils.query.ts';
import { createListResultSchema } from '#root/utils/utils.schema.js';
const documentSchema = z.object({
id: z.string(),

View File

@@ -1,5 +1,7 @@
import { DatabaseService, tableNames, type TableRows } from '../database/database.ts';
import { SplittingService } from '../splitter/splitter.ts';
import { QueryParser } from '@morten-olsen/stash-query-dsl';
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
import { SplittingService } from '../splitter/splitter.js';
import type {
Document,
@@ -8,13 +10,12 @@ import type {
DocumentUpsert,
DocumentUpsertResult,
} from './documents.schemas.ts';
import { mapFromDocumentRow } from './documents.mapping.ts';
import { mapFromDocumentRow } from './documents.mapping.js';
import { EventEmitter } from '#root/utils/utils.event-emitter.ts';
import type { Services } from '#root/utils/utils.services.ts';
import { compareObjectKeys } from '#root/utils/utils.compare.ts';
import { applyQueryFilter } from '#root/utils/utils.query.ts';
import { QueryParser } from '#root/query-parser/query-parser.ts';
import { EventEmitter } from '#root/utils/utils.event-emitter.js';
import type { Services } from '#root/utils/utils.services.js';
import { compareObjectKeys } from '#root/utils/utils.compare.js';
import { applyQueryFilter } from '#root/utils/utils.query.js';
type DocumentsServiceEvents = {
upserted: (document: Document) => void;
@@ -174,5 +175,5 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
};
}
export * from './documents.schemas.ts';
export * from './documents.schemas.js';
export { DocumentsService };

View File

@@ -1,6 +1,8 @@
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import { Vector } from './embeddings.vector.ts';
import { Vector } from './embeddings.vector.js';
import type { ExplicitAny } from '#root/global.js';
type ExtractOptions = {
input: string[];
@@ -57,4 +59,4 @@ class EmbeddingsService {
};
}
export { EmbeddingsService };
export { EmbeddingsService, Vector };

View File

@@ -1,11 +1,11 @@
import { EmbeddingsService } from '../embeddings/embeddings.ts';
import type { Document } from '../documents/documents.schemas.ts';
import { EmbeddingsService } from '../embeddings/embeddings.js';
import type { Document } from '../documents/documents.schemas.js';
import type { Chunk, Splitter } from './splitter.types.ts';
import { textSplitter } from './splitters/splitters.text.ts';
import type { Chunk, Splitter } from './splitter.types.js';
import { textSplitter } from './splitters/splitters.text.js';
import type { Services } from '#root/utils/utils.services.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
import type { Services } from '#root/utils/utils.services.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
class SplittingService {
#services: Services;
@@ -40,5 +40,5 @@ class SplittingService {
};
}
export * from './splitter.types.ts';
export * from './splitter.types.js';
export { SplittingService };

View File

@@ -1,5 +1,5 @@
import type { Document } from '../documents/documents.schemas.ts';
import type { Vector } from '../embeddings/embeddings.vector.ts';
import type { Document } from '../documents/documents.schemas.js';
import type { Vector } from '../embeddings/embeddings.vector.js';
type Chunk = {
content: string;

View File

@@ -1,6 +1,6 @@
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import type { Splitter } from '../splitter.types.ts';
import type { Splitter } from '../splitter.types.js';
const textSplitter: Splitter = {
match: (document) => !!document.content,

View File

@@ -1,6 +1,6 @@
import { DatabaseService } from '../database/database.ts';
import { DatabaseService } from '../database/database.js';
import { Services } from '#root/utils/utils.services.ts';
import { Services } from '#root/utils/utils.services.js';
class WarmupService {
#services: Services;

View File

@@ -1,3 +1,5 @@
import type { ExplicitAny } from '#root/global.js';
type EventListener<T extends unknown[]> = (...args: T) => void | Promise<void>;
type OnOptions = {

View File

@@ -1,6 +1,10 @@
import type {
QueryCondition,
QueryConditionNumber,
QueryConditionText,
QueryFilter,
} from '@morten-olsen/stash-query-dsl';
import { type Knex } from 'knex';
import { z } from 'zod';
/**
* Escapes a JSON key for use in PostgreSQL JSON operators.
* Escapes single quotes by doubling them, which is the PostgreSQL standard.
@@ -30,74 +34,6 @@ const getFieldSelector = (query: Knex.QueryBuilder, field: string[], tableName?:
return query.client.raw(sqlExpression);
};
const queryConditionTextSchema = z
.object({
type: z.literal('text'),
tableName: z.string().optional(),
field: z.array(z.string()),
conditions: z.object({
equal: z.string().nullish(),
notEqual: z.string().optional(),
like: z.string().optional(),
notLike: z.string().optional(),
in: z.array(z.string()).optional(),
notIn: z.array(z.string()).optional(),
}),
})
.meta({
example: {
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
examples: [
{
summary: 'Equal condition',
value: {
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
},
{
summary: 'Like condition',
value: {
type: 'text',
field: ['content'],
conditions: {
like: '%cat%',
},
},
},
{
summary: 'In condition',
value: {
type: 'text',
field: ['type'],
conditions: {
in: ['demo', 'article', 'post'],
},
},
},
{
summary: 'Null check',
value: {
type: 'text',
field: ['source'],
conditions: {
equal: null,
},
},
},
],
});
type QueryConditionText = z.infer<typeof queryConditionTextSchema>;
const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionText) => {
const selector = getFieldSelector(query, field, tableName);
if (conditions.equal) {
@@ -127,77 +63,6 @@ const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, c
return query;
};
const queryConditionNumberSchema = z
.object({
type: z.literal('number'),
tableName: z.string().optional(),
field: z.array(z.string()),
conditions: z.object({
equals: z.number().nullish(),
notEquals: z.number().nullish(),
greaterThan: z.number().optional(),
greaterThanOrEqual: z.number().optional(),
lessThan: z.number().optional(),
lessThanOrEqual: z.number().optional(),
in: z.array(z.number()).optional(),
notIn: z.array(z.number()).optional(),
}),
})
.meta({
example: {
type: 'number',
field: ['typeVersion'],
conditions: {
equals: 1,
},
},
examples: [
{
summary: 'Equals condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
equals: 1,
},
},
},
{
summary: 'Greater than condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
greaterThan: 0,
},
},
},
{
summary: 'Range condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
greaterThanOrEqual: 1,
lessThanOrEqual: 10,
},
},
},
{
summary: 'In condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
in: [1, 2, 3],
},
},
},
],
});
type QueryConditionNumber = z.infer<typeof queryConditionNumberSchema>;
const applyQueryConditionNumber = (
query: Knex.QueryBuilder,
{ field, tableName, conditions }: QueryConditionNumber,
@@ -236,10 +101,6 @@ const applyQueryConditionNumber = (
return query;
};
const queryConditionSchema = z.discriminatedUnion('type', [queryConditionTextSchema, queryConditionNumberSchema]);
type QueryCondition = z.infer<typeof queryConditionSchema>;
const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) => {
switch (options.type) {
case 'text': {
@@ -254,254 +115,6 @@ const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition)
}
};
type QueryFilter = QueryCondition | QueryOperator;
type QueryOperator = {
type: 'operator';
operator: 'and' | 'or';
conditions: QueryFilter[];
};
// Create a depth-limited recursive schema for OpenAPI compatibility
// This supports up to 3 levels of nesting, which should be sufficient for most use cases
// OpenAPI cannot handle z.lazy(), so we manually define the nesting
// If you need deeper nesting, you can add more levels (Level3, Level4, etc.)
const queryFilterSchemaLevel0: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryConditionSchema),
})
.meta({
example: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
],
},
examples: [
{
summary: 'AND operator',
value: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
},
{
summary: 'OR operator',
value: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
},
],
}),
]);
const queryFilterSchemaLevel1: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryFilterSchemaLevel0),
})
.meta({
example: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
],
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
examples: [
{
summary: 'Nested AND within OR',
value: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
},
],
}),
]);
const queryFilterSchemaLevel2: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryFilterSchemaLevel1),
})
.meta({
example: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
examples: [
{
summary: 'Complex nested query',
value: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
},
],
}),
]);
// Export the depth-limited schema (supports 3 levels of nesting)
// This works with OpenAPI schema generation
const queryFilterSchema = queryFilterSchemaLevel2;
const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
if (filter.type === 'operator') {
if (filter.conditions.length === 0) {
@@ -545,5 +158,4 @@ const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
}
};
export type { QueryConditionText, QueryConditionNumber, QueryOperator, QueryCondition, QueryFilter };
export { applyQueryCondition, queryConditionSchema, queryFilterSchema, applyQueryFilter };
export { applyQueryCondition, applyQueryFilter };

View File

@@ -1,6 +1,7 @@
{
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src",
"paths": {
"#root/*": [
"./src/*"