From f9494c88e2e6dd0b668edcf32d818d8bfb108a81 Mon Sep 17 00:00:00 2001 From: Morten Olsen Date: Wed, 10 Dec 2025 09:11:03 +0100 Subject: [PATCH] update --- .gitignore | 5 +- .u8.json | 20 + packages/configs/tsconfig.json | 4 +- packages/query-dsl/.gitignore | 4 + .../docs/query-language.md | 0 packages/query-dsl/package.json | 33 ++ packages/query-dsl/src/exports.ts | 2 + packages/query-dsl/src/query-parser.parser.ts | 457 ++++++++++++++++++ .../query-dsl/src/query-parser.schemas.ts | 85 ++++ .../src}/query-parser.stringifier.ts | 2 +- .../src}/query-parser.test.ts | 5 +- packages/query-dsl/src/query-parser.ts | 18 + packages/query-dsl/tsconfig.json | 10 + packages/query-dsl/vitest.config.ts | 12 + packages/runtime/.gitignore | 4 + packages/runtime/package.json | 29 ++ packages/runtime/src/exports.ts | 1 + packages/runtime/src/global.d.ts | 12 + packages/runtime/src/runtime.ts | 14 + .../runtime/src/services/database/database.ts | 54 +++ .../migrations/migrations.001-init.ts | 112 +++++ .../database/migrations/migrations.ts | 25 + .../database/migrations/migrations.types.ts | 16 + .../document-chunks.mappings.ts} | 4 +- .../document-chunks.schemas.ts | 33 ++ .../document-chunks/document-chunks.ts | 66 +++ .../services/documents/documents.mapping.ts | 12 + .../services/documents/documents.schemas.ts | 80 +++ .../src/services/documents/documents.ts | 179 +++++++ .../src/services/embeddings/embeddings.ts | 62 +++ .../services/embeddings/embeddings.vector.ts | 37 ++ .../runtime/src/services/splitter/splitter.ts | 44 ++ .../src/services/splitter/splitter.types.ts | 15 + .../splitter/splitters/splitters.text.ts | 17 + .../runtime/src/services/warmup/warmup.ts | 17 + packages/runtime/src/utils/utils.compare.ts | 14 + packages/runtime/src/utils/utils.consts.ts | 3 + .../runtime/src/utils/utils.event-emitter.ts | 66 +++ packages/runtime/src/utils/utils.query.ts | 161 ++++++ packages/runtime/src/utils/utils.schema.ts | 8 + packages/runtime/src/utils/utils.services.ts | 51 ++ packages/runtime/tsconfig.json | 9 + packages/runtime/vitest.config.ts | 12 + packages/server/package.json | 1 + packages/server/src/api.ts | 12 +- packages/server/src/dev.ts | 4 +- .../document-chunk-filters.ts | 2 +- .../document-filters/document-filters.ts | 2 +- .../src/endpoints/documents/documents.ts | 2 +- .../server/src/endpoints/system/system.ts | 2 +- .../src/query-parser/query-parser.lexer.ts | 202 -------- .../src/query-parser/query-parser.parser.ts | 317 ------------ .../server/src/query-parser/query-parser.ts | 19 - .../src/query-parser/query-parser.types.ts | 30 -- .../server/src/services/database/database.ts | 6 +- .../migrations/migrations.001-init.ts | 6 +- .../database/migrations/migrations.ts | 8 +- .../database/migrations/migrations.types.ts | 2 +- .../document-chunks.mappings.ts | 13 + .../document-chunks.schemas.ts | 4 +- .../document-chunks/document-chunks.ts | 19 +- .../services/documents/documents.mapping.ts | 4 +- .../services/documents/documents.schemas.ts | 4 +- .../src/services/documents/documents.ts | 19 +- .../src/services/embeddings/embeddings.ts | 6 +- .../server/src/services/splitter/splitter.ts | 14 +- .../src/services/splitter/splitter.types.ts | 4 +- .../splitter/splitters/splitters.text.ts | 2 +- packages/server/src/services/warmup/warmup.ts | 4 +- .../server/src/utils/utils.event-emitter.ts | 2 + packages/server/src/utils/utils.query.ts | 402 +-------------- packages/server/tsconfig.json | 1 + pnpm-lock.yaml | 101 ++++ tsconfig.json | 11 + 74 files changed, 2004 insertions(+), 1035 deletions(-) create mode 100644 packages/query-dsl/.gitignore rename packages/{server => query-dsl}/docs/query-language.md (100%) create mode 100644 packages/query-dsl/package.json create mode 100644 packages/query-dsl/src/exports.ts create mode 100644 packages/query-dsl/src/query-parser.parser.ts create mode 100644 packages/query-dsl/src/query-parser.schemas.ts rename packages/{server/src/query-parser => query-dsl/src}/query-parser.stringifier.ts (99%) rename packages/{server/src/query-parser => query-dsl/src}/query-parser.test.ts (99%) create mode 100644 packages/query-dsl/src/query-parser.ts create mode 100644 packages/query-dsl/tsconfig.json create mode 100644 packages/query-dsl/vitest.config.ts create mode 100644 packages/runtime/.gitignore create mode 100644 packages/runtime/package.json create mode 100644 packages/runtime/src/exports.ts create mode 100644 packages/runtime/src/global.d.ts create mode 100644 packages/runtime/src/runtime.ts create mode 100644 packages/runtime/src/services/database/database.ts create mode 100644 packages/runtime/src/services/database/migrations/migrations.001-init.ts create mode 100644 packages/runtime/src/services/database/migrations/migrations.ts create mode 100644 packages/runtime/src/services/database/migrations/migrations.types.ts rename packages/{server/src/services/document-chunks/document.mappings.ts => runtime/src/services/document-chunks/document-chunks.mappings.ts} (59%) create mode 100644 packages/runtime/src/services/document-chunks/document-chunks.schemas.ts create mode 100644 packages/runtime/src/services/document-chunks/document-chunks.ts create mode 100644 packages/runtime/src/services/documents/documents.mapping.ts create mode 100644 packages/runtime/src/services/documents/documents.schemas.ts create mode 100644 packages/runtime/src/services/documents/documents.ts create mode 100644 packages/runtime/src/services/embeddings/embeddings.ts create mode 100644 packages/runtime/src/services/embeddings/embeddings.vector.ts create mode 100644 packages/runtime/src/services/splitter/splitter.ts create mode 100644 packages/runtime/src/services/splitter/splitter.types.ts create mode 100644 packages/runtime/src/services/splitter/splitters/splitters.text.ts create mode 100644 packages/runtime/src/services/warmup/warmup.ts create mode 100644 packages/runtime/src/utils/utils.compare.ts create mode 100644 packages/runtime/src/utils/utils.consts.ts create mode 100644 packages/runtime/src/utils/utils.event-emitter.ts create mode 100644 packages/runtime/src/utils/utils.query.ts create mode 100644 packages/runtime/src/utils/utils.schema.ts create mode 100644 packages/runtime/src/utils/utils.services.ts create mode 100644 packages/runtime/tsconfig.json create mode 100644 packages/runtime/vitest.config.ts delete mode 100644 packages/server/src/query-parser/query-parser.lexer.ts delete mode 100644 packages/server/src/query-parser/query-parser.parser.ts delete mode 100644 packages/server/src/query-parser/query-parser.ts delete mode 100644 packages/server/src/query-parser/query-parser.types.ts create mode 100644 packages/server/src/services/document-chunks/document-chunks.mappings.ts create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore index 0f23061..321222a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -/node_modules +/node_modules/ +/packages/*/dist/ .turbo/ /.env -/coverage/ \ No newline at end of file +/coverage/ diff --git a/.u8.json b/.u8.json index 9b7490c..3c32329 100644 --- a/.u8.json +++ b/.u8.json @@ -36,6 +36,26 @@ "packageVersion": "1.0.0", "packageName": "server" } + }, + { + "timestamp": "2025-12-10T07:50:20.652Z", + "template": "pkg", + "values": { + "monoRepo": true, + "packagePrefix": "@morten-olsen/stash-", + "packageVersion": "1.0.0", + "packageName": "query-dsl" + } + }, + { + "timestamp": "2025-12-10T08:07:44.756Z", + "template": "pkg", + "values": { + "monoRepo": true, + "packagePrefix": "@morten-olsen/stash-", + "packageVersion": "1.0.0", + "packageName": "runtime" + } } ] } \ No newline at end of file diff --git a/packages/configs/tsconfig.json b/packages/configs/tsconfig.json index 29b29ee..edff93f 100644 --- a/packages/configs/tsconfig.json +++ b/packages/configs/tsconfig.json @@ -10,11 +10,9 @@ "resolveJsonModule": true, "allowSyntheticDefaultImports": true, "skipLibCheck": true, - "noEmit": true, "jsx": "react-jsx", "isolatedModules": true, "verbatimModuleSyntax": true, - "erasableSyntaxOnly": true, - "allowImportingTsExtensions": true + "erasableSyntaxOnly": true } } diff --git a/packages/query-dsl/.gitignore b/packages/query-dsl/.gitignore new file mode 100644 index 0000000..8511d52 --- /dev/null +++ b/packages/query-dsl/.gitignore @@ -0,0 +1,4 @@ +/node_modules/ +/dist/ +/coverage/ +/.env diff --git a/packages/server/docs/query-language.md b/packages/query-dsl/docs/query-language.md similarity index 100% rename from packages/server/docs/query-language.md rename to packages/query-dsl/docs/query-language.md diff --git a/packages/query-dsl/package.json b/packages/query-dsl/package.json new file mode 100644 index 0000000..f16e715 --- /dev/null +++ b/packages/query-dsl/package.json @@ -0,0 +1,33 @@ +{ + "type": "module", + "main": "dist/exports.js", + "scripts": { + "build": "tsc --build", + "test:unit": "vitest --run --passWithNoTests", + "test": "pnpm run \"/^test:/\"" + }, + "packageManager": "pnpm@10.6.0", + "files": [ + "dist" + ], + "exports": { + ".": "./dist/exports.js" + }, + "devDependencies": { + "@morten-olsen/stash-configs": "workspace:*", + "@morten-olsen/stash-tests": "workspace:*", + "@types/node": "24.10.2", + "@vitest/coverage-v8": "4.0.15", + "typescript": "5.9.3", + "vitest": "4.0.15" + }, + "name": "@morten-olsen/stash-query-dsl", + "version": "1.0.0", + "imports": { + "#root/*": "./src/*" + }, + "dependencies": { + "chevrotain": "^11.0.3", + "zod": "4.1.13" + } +} diff --git a/packages/query-dsl/src/exports.ts b/packages/query-dsl/src/exports.ts new file mode 100644 index 0000000..1d0620a --- /dev/null +++ b/packages/query-dsl/src/exports.ts @@ -0,0 +1,2 @@ +export * from './query-parser.schemas.js'; +export { QueryParser } from './query-parser.js'; diff --git a/packages/query-dsl/src/query-parser.parser.ts b/packages/query-dsl/src/query-parser.parser.ts new file mode 100644 index 0000000..3ae2536 --- /dev/null +++ b/packages/query-dsl/src/query-parser.parser.ts @@ -0,0 +1,457 @@ +import { createToken, Lexer, EmbeddedActionsParser } from 'chevrotain'; + +import type { QueryFilter, QueryCondition } from './query-parser.schemas.js'; + +// ----------------- Lexer ----------------- + +// Whitespace (skipped) +const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED }); + +// Keywords (must be defined before Identifier to take precedence) +const And = createToken({ name: 'And', pattern: /AND/i, longer_alt: undefined }); +const Or = createToken({ name: 'Or', pattern: /OR/i, longer_alt: undefined }); +const Like = createToken({ name: 'Like', pattern: /LIKE/i, longer_alt: undefined }); +const Not = createToken({ name: 'Not', pattern: /NOT/i, longer_alt: undefined }); +const In = createToken({ name: 'In', pattern: /IN/i, longer_alt: undefined }); +const Is = createToken({ name: 'Is', pattern: /IS/i, longer_alt: undefined }); +const Null = createToken({ name: 'Null', pattern: /NULL/i, longer_alt: undefined }); + +// Identifier (must come after keywords) +const Identifier = createToken({ name: 'Identifier', pattern: /[a-zA-Z_][a-zA-Z0-9_]*/ }); + +// Set longer_alt for keywords to handle cases like "ANDROID" not matching "AND" +And.LONGER_ALT = Identifier; +Or.LONGER_ALT = Identifier; +Like.LONGER_ALT = Identifier; +Not.LONGER_ALT = Identifier; +In.LONGER_ALT = Identifier; +Is.LONGER_ALT = Identifier; +Null.LONGER_ALT = Identifier; + +// Literals +const StringLiteral = createToken({ + name: 'StringLiteral', + pattern: /'(?:''|[^'])*'/, +}); + +const NumberLiteral = createToken({ + name: 'NumberLiteral', + pattern: /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/, +}); + +// Operators +const NotEquals = createToken({ name: 'NotEquals', pattern: /!=/ }); +const GreaterThanOrEqual = createToken({ name: 'GreaterThanOrEqual', pattern: />=/ }); +const LessThanOrEqual = createToken({ name: 'LessThanOrEqual', pattern: /<=/ }); +const Equals = createToken({ name: 'Equals', pattern: /=/ }); +const GreaterThan = createToken({ name: 'GreaterThan', pattern: />/ }); +const LessThan = createToken({ name: 'LessThan', pattern: / { + let left = this.SUBRULE(this.#andExpression); + + this.MANY(() => { + this.CONSUME(Or); + const right = this.SUBRULE2(this.#andExpression); + left = this.ACTION(() => this.#combineWithOperator(left, right, 'or')); + }); + + return left; + }); + + // AND has higher precedence than OR + #andExpression = this.RULE('andExpression', (): QueryFilter => { + let left = this.SUBRULE(this.#primaryExpression); + + this.MANY(() => { + this.CONSUME(And); + const right = this.SUBRULE2(this.#primaryExpression); + left = this.ACTION(() => this.#combineWithOperator(left, right, 'and')); + }); + + return left; + }); + + // Primary: parenthesized expression or condition + #primaryExpression = this.RULE('primaryExpression', (): QueryFilter => { + return this.OR([ + { + ALT: () => { + this.CONSUME(LParen); + const expr = this.SUBRULE(this.#orExpression); + this.CONSUME(RParen); + return expr; + }, + }, + { ALT: () => this.SUBRULE(this.#condition) }, + ]); + }); + + // Condition: field followed by operator and value(s) + #condition = this.RULE('condition', (): QueryCondition => { + const field = this.SUBRULE(this.#fieldReference); + + return this.OR([ + // IS NULL / IS NOT NULL + { + ALT: () => { + this.CONSUME(Is); + const isNot = this.OPTION(() => this.CONSUME(Not)) !== undefined; + this.CONSUME(Null); + + return this.ACTION(() => ({ + type: 'text' as const, + field, + conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null }, + })); + }, + }, + // NOT IN (strings) - LA(1)=NOT, LA(2)=IN, LA(3)=(, LA(4)=value + { + GATE: () => this.LA(4).tokenType === StringLiteral, + ALT: () => { + this.CONSUME2(Not); + this.CONSUME(In); + const values = this.SUBRULE(this.#stringInList); + return this.ACTION(() => ({ + type: 'text' as const, + field, + conditions: { notIn: values }, + })); + }, + }, + // NOT IN (numbers) + { + GATE: () => this.LA(4).tokenType === NumberLiteral, + ALT: () => { + this.CONSUME3(Not); + this.CONSUME2(In); + const values = this.SUBRULE(this.#numberInList); + return this.ACTION(() => ({ + type: 'number' as const, + field, + conditions: { notIn: values }, + })); + }, + }, + // NOT LIKE + { + ALT: () => { + this.CONSUME4(Not); + this.CONSUME(Like); + const pattern = this.CONSUME(StringLiteral); + return this.ACTION(() => ({ + type: 'text' as const, + field, + conditions: { notLike: this.#extractStringValue(pattern.image) }, + })); + }, + }, + // IN (strings) - LA(1)=IN, LA(2)=(, LA(3)=value + { + GATE: () => this.LA(3).tokenType === StringLiteral, + ALT: () => { + this.CONSUME3(In); + const values = this.SUBRULE2(this.#stringInList); + return this.ACTION(() => ({ + type: 'text' as const, + field, + conditions: { in: values }, + })); + }, + }, + // IN (numbers) + { + GATE: () => this.LA(3).tokenType === NumberLiteral, + ALT: () => { + this.CONSUME4(In); + const values = this.SUBRULE2(this.#numberInList); + return this.ACTION(() => ({ + type: 'number' as const, + field, + conditions: { in: values }, + })); + }, + }, + // LIKE + { + ALT: () => { + this.CONSUME2(Like); + const pattern = this.CONSUME2(StringLiteral); + return this.ACTION(() => ({ + type: 'text' as const, + field, + conditions: { like: this.#extractStringValue(pattern.image) }, + })); + }, + }, + // = string + { + GATE: () => this.LA(2).tokenType === StringLiteral, + ALT: () => { + this.CONSUME(Equals); + const token = this.CONSUME3(StringLiteral); + return this.ACTION(() => ({ + type: 'text' as const, + field, + conditions: { equal: this.#extractStringValue(token.image) }, + })); + }, + }, + // = number + { + GATE: () => this.LA(2).tokenType === NumberLiteral, + ALT: () => { + this.CONSUME2(Equals); + const token = this.CONSUME(NumberLiteral); + return this.ACTION(() => ({ + type: 'number' as const, + field, + conditions: { equals: parseFloat(token.image) }, + })); + }, + }, + // = NULL + { + ALT: () => { + this.CONSUME3(Equals); + this.CONSUME2(Null); + return this.ACTION(() => ({ + type: 'text' as const, + field, + conditions: { equal: null }, + })); + }, + }, + // != string + { + GATE: () => this.LA(2).tokenType === StringLiteral, + ALT: () => { + this.CONSUME(NotEquals); + const token = this.CONSUME4(StringLiteral); + return this.ACTION(() => ({ + type: 'text' as const, + field, + conditions: { notEqual: this.#extractStringValue(token.image) }, + })); + }, + }, + // != number + { + ALT: () => { + this.CONSUME2(NotEquals); + const token = this.CONSUME2(NumberLiteral); + return this.ACTION(() => ({ + type: 'number' as const, + field, + conditions: { notEquals: parseFloat(token.image) }, + })); + }, + }, + // > number + { + ALT: () => { + this.CONSUME(GreaterThan); + const token = this.CONSUME3(NumberLiteral); + return this.ACTION(() => ({ + type: 'number' as const, + field, + conditions: { greaterThan: parseFloat(token.image) }, + })); + }, + }, + // >= number + { + ALT: () => { + this.CONSUME(GreaterThanOrEqual); + const token = this.CONSUME4(NumberLiteral); + return this.ACTION(() => ({ + type: 'number' as const, + field, + conditions: { greaterThanOrEqual: parseFloat(token.image) }, + })); + }, + }, + // < number + { + ALT: () => { + this.CONSUME(LessThan); + const token = this.CONSUME5(NumberLiteral); + return this.ACTION(() => ({ + type: 'number' as const, + field, + conditions: { lessThan: parseFloat(token.image) }, + })); + }, + }, + // <= number + { + ALT: () => { + this.CONSUME(LessThanOrEqual); + const token = this.CONSUME6(NumberLiteral); + return this.ACTION(() => ({ + type: 'number' as const, + field, + conditions: { lessThanOrEqual: parseFloat(token.image) }, + })); + }, + }, + ]); + }); + + // Field reference: identifier.identifier.identifier... + #fieldReference = this.RULE('fieldReference', (): string[] => { + const parts: string[] = []; + const first = this.CONSUME(Identifier); + this.ACTION(() => parts.push(first.image)); + + this.MANY(() => { + this.CONSUME(Dot); + const next = this.CONSUME2(Identifier); + this.ACTION(() => parts.push(next.image)); + }); + + return parts; + }); + + // String IN list: ('val1', 'val2', ...) + #stringInList = this.RULE('stringInList', (): string[] => { + const values: string[] = []; + + this.CONSUME(LParen); + const first = this.CONSUME(StringLiteral); + this.ACTION(() => values.push(this.#extractStringValue(first.image))); + + this.MANY(() => { + this.CONSUME(Comma); + const next = this.CONSUME2(StringLiteral); + this.ACTION(() => values.push(this.#extractStringValue(next.image))); + }); + + this.CONSUME(RParen); + return values; + }); + + // Number IN list: (1, 2, 3, ...) + #numberInList = this.RULE('numberInList', (): number[] => { + const values: number[] = []; + + this.CONSUME2(LParen); + const first = this.CONSUME(NumberLiteral); + this.ACTION(() => values.push(parseFloat(first.image))); + + this.MANY(() => { + this.CONSUME2(Comma); + const next = this.CONSUME2(NumberLiteral); + this.ACTION(() => values.push(parseFloat(next.image))); + }); + + this.CONSUME2(RParen); + return values; + }); + + // Extract string value from quoted literal, handling escaped quotes + #extractStringValue(image: string): string { + // Remove surrounding quotes and unescape doubled quotes + return image.slice(1, -1).replace(/''/g, "'"); + } + + // Combine two filters with an operator, flattening if possible + #combineWithOperator(left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter { + if (left.type === 'operator' && left.operator === operator) { + return { + type: 'operator', + operator, + conditions: [...left.conditions, right], + }; + } + + return { + type: 'operator', + operator, + conditions: [left, right], + }; + } + + // Entry point + #query = this.RULE('query', (): QueryFilter => { + return this.SUBRULE(this.#orExpression); + }); + + public parse = (input: string): QueryFilter => { + const lexResult = QueryLexer.tokenize(input); + + if (lexResult.errors.length > 0) { + const error = lexResult.errors[0]; + // Check if this looks like an unterminated string (starts with ' but lexer failed) + if (error.message.includes("'") || input.slice(error.offset).startsWith("'")) { + // Count unescaped single quotes + const unescapedQuotes = input.replace(/''/g, '').match(/'/g); + if (unescapedQuotes && unescapedQuotes.length % 2 !== 0) { + throw new Error(`Unterminated string starting at position ${error.offset}`); + } + } + throw new Error(`Lexer error at position ${error.offset}: ${error.message}`); + } + + this.input = lexResult.tokens; + const result = this.#query(); + + if (this.errors.length > 0) { + const error = this.errors[0]; + throw new Error(`Parse error: ${error.message}`); + } + + return result; + }; +} + +export { QueryParserParser, QueryLexer }; diff --git a/packages/query-dsl/src/query-parser.schemas.ts b/packages/query-dsl/src/query-parser.schemas.ts new file mode 100644 index 0000000..ce52f48 --- /dev/null +++ b/packages/query-dsl/src/query-parser.schemas.ts @@ -0,0 +1,85 @@ +import { z } from 'zod'; + +const queryConditionTextSchema = z.object({ + type: z.literal('text'), + tableName: z.string().optional(), + field: z.array(z.string()), + conditions: z.object({ + equal: z.string().nullish(), + notEqual: z.string().optional(), + like: z.string().optional(), + notLike: z.string().optional(), + in: z.array(z.string()).optional(), + notIn: z.array(z.string()).optional(), + }), +}); + +type QueryConditionText = z.infer; + +const queryConditionNumberSchema = z.object({ + type: z.literal('number'), + tableName: z.string().optional(), + field: z.array(z.string()), + conditions: z.object({ + equals: z.number().nullish(), + notEquals: z.number().nullish(), + greaterThan: z.number().optional(), + greaterThanOrEqual: z.number().optional(), + lessThan: z.number().optional(), + lessThanOrEqual: z.number().optional(), + in: z.array(z.number()).optional(), + notIn: z.array(z.number()).optional(), + }), +}); + +type QueryConditionNumber = z.infer; + +const queryConditionSchema = z.discriminatedUnion('type', [queryConditionTextSchema, queryConditionNumberSchema]); + +type QueryCondition = z.infer; + +type QueryFilter = QueryCondition | QueryOperator; + +type QueryOperator = { + type: 'operator'; + operator: 'and' | 'or'; + conditions: QueryFilter[]; +}; + +// Create a depth-limited recursive schema for OpenAPI compatibility +// This supports up to 3 levels of nesting, which should be sufficient for most use cases +// OpenAPI cannot handle z.lazy(), so we manually define the nesting +// If you need deeper nesting, you can add more levels (Level3, Level4, etc.) +const queryFilterSchemaLevel0: z.ZodType = z.union([ + queryConditionSchema, + z.object({ + type: z.literal('operator'), + operator: z.enum(['and', 'or']), + conditions: z.array(queryConditionSchema), + }), +]); + +const queryFilterSchemaLevel1: z.ZodType = z.union([ + queryConditionSchema, + z.object({ + type: z.literal('operator'), + operator: z.enum(['and', 'or']), + conditions: z.array(queryFilterSchemaLevel0), + }), +]); + +const queryFilterSchemaLevel2: z.ZodType = z.union([ + queryConditionSchema, + z.object({ + type: z.literal('operator'), + operator: z.enum(['and', 'or']), + conditions: z.array(queryFilterSchemaLevel1), + }), +]); + +// Export the depth-limited schema (supports 3 levels of nesting) +// This works with OpenAPI schema generation +const queryFilterSchema = queryFilterSchemaLevel2; + +export type { QueryConditionText, QueryConditionNumber, QueryOperator, QueryCondition, QueryFilter }; +export { queryConditionSchema, queryFilterSchema }; diff --git a/packages/server/src/query-parser/query-parser.stringifier.ts b/packages/query-dsl/src/query-parser.stringifier.ts similarity index 99% rename from packages/server/src/query-parser/query-parser.stringifier.ts rename to packages/query-dsl/src/query-parser.stringifier.ts index f6142e3..850758a 100644 --- a/packages/server/src/query-parser/query-parser.stringifier.ts +++ b/packages/query-dsl/src/query-parser.stringifier.ts @@ -4,7 +4,7 @@ import type { QueryCondition, QueryConditionText, QueryConditionNumber, -} from '#root/utils/utils.query.ts'; +} from './query-parser.schemas.js'; class Stringifier { #stringifyFilter = (filter: QueryFilter, needsParens: boolean): string => { diff --git a/packages/server/src/query-parser/query-parser.test.ts b/packages/query-dsl/src/query-parser.test.ts similarity index 99% rename from packages/server/src/query-parser/query-parser.test.ts rename to packages/query-dsl/src/query-parser.test.ts index 91a0851..8b13744 100644 --- a/packages/server/src/query-parser/query-parser.test.ts +++ b/packages/query-dsl/src/query-parser.test.ts @@ -1,8 +1,7 @@ import { describe, it, expect } from 'vitest'; -import { QueryParser } from './query-parser.ts'; - -import type { QueryConditionNumber, QueryConditionText, QueryFilter, QueryOperator } from '#root/utils/utils.query.ts'; +import { QueryParser } from './query-parser.js'; +import type { QueryConditionNumber, QueryConditionText, QueryFilter, QueryOperator } from './query-parser.schemas.js'; describe('QueryParser', () => { const parser = new QueryParser(); diff --git a/packages/query-dsl/src/query-parser.ts b/packages/query-dsl/src/query-parser.ts new file mode 100644 index 0000000..9ae218f --- /dev/null +++ b/packages/query-dsl/src/query-parser.ts @@ -0,0 +1,18 @@ +import { Stringifier } from './query-parser.stringifier.js'; +import { QueryParserParser } from './query-parser.parser.js'; +import type { QueryFilter } from './query-parser.schemas.js'; + +class QueryParser { + #stringifier = new Stringifier(); + #parser = new QueryParserParser(); + + public parse = (input: string): QueryFilter => { + return this.#parser.parse(input); + }; + + public stringify = (filter: QueryFilter): string => { + return this.#stringifier.stringify(filter); + }; +} + +export { QueryParser }; diff --git a/packages/query-dsl/tsconfig.json b/packages/query-dsl/tsconfig.json new file mode 100644 index 0000000..462d659 --- /dev/null +++ b/packages/query-dsl/tsconfig.json @@ -0,0 +1,10 @@ +{ + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": [ + "src/**/*.ts" + ], + "extends": "@morten-olsen/stash-configs/tsconfig.json" +} diff --git a/packages/query-dsl/vitest.config.ts b/packages/query-dsl/vitest.config.ts new file mode 100644 index 0000000..8998b5b --- /dev/null +++ b/packages/query-dsl/vitest.config.ts @@ -0,0 +1,12 @@ +import { defineConfig } from 'vitest/config'; +import { getAliases } from '@morten-olsen/stash-tests/vitest'; + +// eslint-disable-next-line import/no-default-export +export default defineConfig(async () => { + const aliases = await getAliases(); + return { + resolve: { + alias: aliases, + }, + }; +}); diff --git a/packages/runtime/.gitignore b/packages/runtime/.gitignore new file mode 100644 index 0000000..8511d52 --- /dev/null +++ b/packages/runtime/.gitignore @@ -0,0 +1,4 @@ +/node_modules/ +/dist/ +/coverage/ +/.env diff --git a/packages/runtime/package.json b/packages/runtime/package.json new file mode 100644 index 0000000..53a531b --- /dev/null +++ b/packages/runtime/package.json @@ -0,0 +1,29 @@ +{ + "type": "module", + "main": "dist/exports.js", + "scripts": { + "build": "tsc --build", + "test:unit": "vitest --run --passWithNoTests", + "test": "pnpm run \"/^test:/\"" + }, + "packageManager": "pnpm@10.6.0", + "files": [ + "dist" + ], + "exports": { + ".": "./dist/exports.js" + }, + "devDependencies": { + "@types/node": "24.10.2", + "@vitest/coverage-v8": "4.0.15", + "typescript": "5.9.3", + "vitest": "4.0.15", + "@morten-olsen/stash-configs": "workspace:*", + "@morten-olsen/stash-tests": "workspace:*" + }, + "name": "@morten-olsen/stash-runtime", + "version": "1.0.0", + "imports": { + "#root/*": "./src/*" + } +} diff --git a/packages/runtime/src/exports.ts b/packages/runtime/src/exports.ts new file mode 100644 index 0000000..ad926d7 --- /dev/null +++ b/packages/runtime/src/exports.ts @@ -0,0 +1 @@ +export * from './api.js'; diff --git a/packages/runtime/src/global.d.ts b/packages/runtime/src/global.d.ts new file mode 100644 index 0000000..63b36e7 --- /dev/null +++ b/packages/runtime/src/global.d.ts @@ -0,0 +1,12 @@ +import 'fastify'; +import type { Services } from './utils/utils.services.ts'; + +// eslint-disable-next-line +declare type ExplicitAny = any; + +declare module 'fastify' { + // eslint-disable-next-line + export interface FastifyInstance { + services: Services; + } +} diff --git a/packages/runtime/src/runtime.ts b/packages/runtime/src/runtime.ts new file mode 100644 index 0000000..e8f996e --- /dev/null +++ b/packages/runtime/src/runtime.ts @@ -0,0 +1,14 @@ +import { DocumentsService } from './services/documents/documents.js'; +import { Services } from './utils/utils.services.js'; + +class StashRuntime { + #services: Services; + + constructor(services: Services) { } + + public get documents() { + return this.#services.get(DocumentsService); + } +} + +export { StashRuntime }; diff --git a/packages/runtime/src/services/database/database.ts b/packages/runtime/src/services/database/database.ts new file mode 100644 index 0000000..5865f3a --- /dev/null +++ b/packages/runtime/src/services/database/database.ts @@ -0,0 +1,54 @@ +import knex, { type Knex } from 'knex'; +import ClientPgLite from 'knex-pglite'; +import { PGlite } from '@electric-sql/pglite'; +import { vector } from '@electric-sql/pglite/vector'; + +import { migrationSource } from './migrations/migrations.js'; + +import { destroy, Services } from '#root/utils/utils.services.js'; + +class DatabaseService { + #services: Services; + #instance?: Promise; + + constructor(services: Services) { + this.#services = services; + } + + #setup = async () => { + const pglite = new PGlite({ + extensions: { vector }, + }); + + const instance = knex({ + client: ClientPgLite, + dialect: 'postgres', + connection: () => ({ pglite }) as object, + }); + await instance.raw(`CREATE EXTENSION IF NOT EXISTS vector`); + + await instance.migrate.latest({ + migrationSource: migrationSource({ services: this.#services }), + }); + + return instance; + }; + + public getInstance = () => { + if (!this.#instance) { + this.#instance = this.#setup(); + } + return this.#instance; + }; + + [destroy] = async () => { + if (!this.#instance) { + return; + } + const instance = await this.#instance; + await instance.destroy(); + }; +} + +export { type TableRows, tableNames } from './migrations/migrations.js'; +export { DatabaseService }; diff --git a/packages/runtime/src/services/database/migrations/migrations.001-init.ts b/packages/runtime/src/services/database/migrations/migrations.001-init.ts new file mode 100644 index 0000000..08da637 --- /dev/null +++ b/packages/runtime/src/services/database/migrations/migrations.001-init.ts @@ -0,0 +1,112 @@ +import type { Migration } from './migrations.types.js'; + +import { EmbeddingsService } from '#root/services/embeddings/embeddings.js'; +import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; + +const tableNames = { + documents: 'documents', + documentChunks: 'documentChunks', + relations: 'relations', +}; + +const init: Migration = { + name: 'init', + up: async ({ knex, services }) => { + const embedding = services.get(EmbeddingsService); + const embeddingField = await embedding.getFieldType(EMBEDDING_MODEL); + + await knex.schema.createTable(tableNames.documents, (table) => { + table.uuid('id').primary(); + table.uuid('owner').nullable().references('id').inTable(tableNames.documents).onDelete('CASCADE'); + table.datetime('updatedAt').notNullable(); + table.datetime('createdAt').notNullable(); + table.datetime('deletedAt').nullable(); + table.string('contentType').nullable(); + table.text('content').nullable(); + table.string('source').nullable(); + table.string('sourceId').nullable(); + table.string('type').notNullable(); + table.integer('typeVersion').nullable(); + table.text('searchText').nullable(); + table.jsonb('metadata').nullable(); + + table.index(['source', 'sourceId']); + table.index(['owner']); + }); + + await knex.schema.createTable(tableNames.documentChunks, (table) => { + table.uuid('id').primary(); + table.uuid('owner').nullable().references('id').inTable(tableNames.documents).onDelete('CASCADE'); + table.text('content').notNullable(); + table.specificType('embedding', embeddingField).notNullable(); + table.string('embeddingModel').notNullable(); + + table.index(['owner']); + }); + knex.raw(`create index on ${tableNames.documentChunks} using GIN ("embeddingg")`); + + await knex.schema.createTable(tableNames.relations, (table) => { + table.uuid('from').notNullable().references('id').inTable(tableNames.documents).onDelete('CASCADE'); + table.uuid('to').notNullable().references('id').inTable(tableNames.documents).onDelete('CASCADE'); + table.string('type').nullable(); + table.string('typeVersion').nullable(); + table.datetime('updatedAt').notNullable(); + table.datetime('createdAt').notNullable(); + table.datetime('deletedAt').nullable(); + table.jsonb('data'); + + table.primary(['from', 'to', 'type']); + table.index(['from']); + table.index(['to']); + }); + }, + down: async ({ knex }) => { + await knex.schema.dropTableIfExists(tableNames.relations); + await knex.schema.dropTableIfExists(tableNames.documentChunks); + await knex.schema.dropTableIfExists(tableNames.documents); + }, +}; + +type DocumentRow = { + id: string; + owner: string | null; + updatedAt: Date; + createdAt: Date; + deletedAt: Date | null; + contentType: string | null; + content: string | null; + source: string | null; + sourceId: string | null; + type: string; + typeVersion: number | null; + searchText: string | null; + metadata: unknown; +}; + +type DocumentChunkRow = { + id: string; + owner: string; + content: string; + embedding: unknown; + embeddingModel: string; +}; + +type RelationRow = { + from: string; + to: string; + type: string; + typeVersion: string | null; + updatedAt: Date; + createdAt: Date; + deletedAt: Date | null; + data: unknown; +}; + +type TableRows = { + documents: DocumentRow; + documentChunks: DocumentChunkRow; + replations: RelationRow; +}; + +export type { TableRows }; +export { tableNames, init }; diff --git a/packages/runtime/src/services/database/migrations/migrations.ts b/packages/runtime/src/services/database/migrations/migrations.ts new file mode 100644 index 0000000..e83094f --- /dev/null +++ b/packages/runtime/src/services/database/migrations/migrations.ts @@ -0,0 +1,25 @@ +import type { Knex } from 'knex'; + +import type { Migration } from './migrations.types.js'; +import { init } from './migrations.001-init.js'; + +import type { Services } from '#root/utils/utils.services.js'; + +const migrations = [init] satisfies Migration[]; + +type MigrationSourceOptions = { + services: Services; +}; + +const migrationSource = (options: MigrationSourceOptions): Knex.MigrationSource => ({ + getMigrationName: (migration) => migration.name, + getMigration: async (migration) => ({ + name: migration.name, + up: (knex) => migration.up({ ...options, knex }), + down: (knex) => migration.down({ ...options, knex }), + }), + getMigrations: async () => migrations, +}); + +export { type TableRows, tableNames } from './migrations.001-init.js'; +export { migrationSource }; diff --git a/packages/runtime/src/services/database/migrations/migrations.types.ts b/packages/runtime/src/services/database/migrations/migrations.types.ts new file mode 100644 index 0000000..6897776 --- /dev/null +++ b/packages/runtime/src/services/database/migrations/migrations.types.ts @@ -0,0 +1,16 @@ +import type { Knex } from 'knex'; + +import type { Services } from '#root/utils/utils.services.js'; + +type MigrationOptions = { + knex: Knex; + services: Services; +}; + +type Migration = { + name: string; + up: (options: MigrationOptions) => Promise; + down: (options: MigrationOptions) => Promise; +}; + +export type { Migration }; diff --git a/packages/server/src/services/document-chunks/document.mappings.ts b/packages/runtime/src/services/document-chunks/document-chunks.mappings.ts similarity index 59% rename from packages/server/src/services/document-chunks/document.mappings.ts rename to packages/runtime/src/services/document-chunks/document-chunks.mappings.ts index 27a2a74..6a38e46 100644 --- a/packages/server/src/services/document-chunks/document.mappings.ts +++ b/packages/runtime/src/services/document-chunks/document-chunks.mappings.ts @@ -1,6 +1,6 @@ -import type { TableRows } from '../database/database.ts'; +import type { TableRows } from '../database/database.js'; -import type { DocumentChunk } from './document-chunks.schemas.ts'; +import type { DocumentChunk } from './document-chunks.schemas.js'; const mapFromDocumentChunkRow = ( row: TableRows['documentChunks'] & { diff --git a/packages/runtime/src/services/document-chunks/document-chunks.schemas.ts b/packages/runtime/src/services/document-chunks/document-chunks.schemas.ts new file mode 100644 index 0000000..1a4da7d --- /dev/null +++ b/packages/runtime/src/services/document-chunks/document-chunks.schemas.ts @@ -0,0 +1,33 @@ +import { z } from 'zod'; +import { queryFilterSchema } from '@morten-olsen/stash-query-dsl'; + +import { createListResultSchema } from '#root/utils/utils.schema.js'; + +const documentChunkSchema = z.object({ + id: z.string(), + owner: z.string(), + content: z.string(), + metadata: z.unknown(), +}); + +type DocumentChunk = z.infer; + +const documentChunkFilterSchema = z.object({ + limit: z.number().default(20), + offset: z.number().default(0), + semanticText: z.string().optional(), + conditions: z.union([queryFilterSchema, z.string()]).optional(), +}); + +type DocumentChunkFilter = z.infer; + +const documentChunksFindResultSchema = createListResultSchema( + documentChunkSchema.extend({ + distance: z.number().optional(), + }), +); + +type DocumentChunksFindResult = z.infer; + +export type { DocumentChunk, DocumentChunkFilter, DocumentChunksFindResult }; +export { documentChunkSchema, documentChunkFilterSchema, documentChunksFindResultSchema }; diff --git a/packages/runtime/src/services/document-chunks/document-chunks.ts b/packages/runtime/src/services/document-chunks/document-chunks.ts new file mode 100644 index 0000000..fac2845 --- /dev/null +++ b/packages/runtime/src/services/document-chunks/document-chunks.ts @@ -0,0 +1,66 @@ +import { QueryParser } from '@morten-olsen/stash-query-dsl'; + +import { DatabaseService, tableNames, type TableRows } from '../database/database.js'; +import { EmbeddingsService } from '../embeddings/embeddings.js'; + +import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.js'; +import { mapFromDocumentChunkRow } from './document-chunks.mappings.js'; + +import type { Services } from '#root/utils/utils.services.js'; +import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; +import type { ExplicitAny } from '#root/global.js'; +import { applyQueryFilter } from '#root/utils/utils.query.js'; + +const baseFields = [ + `${tableNames.documentChunks}.*`, + `${tableNames.documents}.metadata`, + `${tableNames.documents}.createdAt`, +]; + +class DocumentChunksService { + #services: Services; + + constructor(services: Services) { + this.#services = services; + } + + public find = async (filter: DocumentChunkFilter): Promise => { + const databaseService = this.#services.get(DatabaseService); + const db = await databaseService.getInstance(); + + let query = db(tableNames.documentChunks); + query.join(tableNames.documents, `${tableNames.documents}.id`, `${tableNames.documentChunks}.owner`); + + if (filter.semanticText) { + const embedding = this.#services.get(EmbeddingsService); + const [vector] = await embedding.extract({ + input: [filter.semanticText], + model: EMBEDDING_MODEL, + }); + query = query.select(...baseFields, db.raw(`embedding <=> '${vector.toSql()}' as distance`)); + query = query.where(`${tableNames.documentChunks}.embeddingModel`, EMBEDDING_MODEL); + query = query.orderBy('distance', 'asc'); + } else { + query = query.select(baseFields); + query = query.orderBy('createdAt', 'desc'); + } + if (filter.conditions) { + const parser = this.#services.get(QueryParser); + query = applyQueryFilter( + query, + typeof filter.conditions === 'string' ? parser.parse(filter.conditions) : filter.conditions, + ); + } + + query = query.limit(filter.limit).offset(filter.offset); + + const items = await query; + + return { + items: items.map(mapFromDocumentChunkRow as ExplicitAny), + }; + }; +} + +export * from './document-chunks.schemas.js'; +export { DocumentChunksService }; diff --git a/packages/runtime/src/services/documents/documents.mapping.ts b/packages/runtime/src/services/documents/documents.mapping.ts new file mode 100644 index 0000000..e85f0b2 --- /dev/null +++ b/packages/runtime/src/services/documents/documents.mapping.ts @@ -0,0 +1,12 @@ +import type { TableRows } from '../database/database.js'; + +import type { Document } from './documents.schemas.js'; + +const mapFromDocumentRow = (row: TableRows['documents']): Document => ({ + ...row, + createdAt: row.createdAt.toISOString(), + updatedAt: row.updatedAt.toISOString(), + deletedAt: row.deletedAt?.toISOString() || null, +}); + +export { mapFromDocumentRow }; diff --git a/packages/runtime/src/services/documents/documents.schemas.ts b/packages/runtime/src/services/documents/documents.schemas.ts new file mode 100644 index 0000000..75222e5 --- /dev/null +++ b/packages/runtime/src/services/documents/documents.schemas.ts @@ -0,0 +1,80 @@ +import { z } from 'zod'; +import { queryFilterSchema } from '@morten-olsen/stash-query-dsl'; + +import { createListResultSchema } from '#root/utils/utils.schema.js'; + +const documentSchema = z.object({ + id: z.string(), + owner: z.string().nullable(), + createdAt: z.iso.datetime(), + updatedAt: z.iso.datetime(), + deletedAt: z.iso.datetime().nullable(), + contentType: z.string().nullable(), + content: z.string().nullable(), + source: z.string().nullable(), + sourceId: z.string().nullable(), + type: z.string(), + typeVersion: z.int().nullable(), + searchText: z.string().nullable(), + metadata: z.unknown(), +}); + +type Document = z.infer; + +const documentUpsertSchema = z + .object({ + id: z.string().nullish(), + owner: z.string().nullish(), + contentType: z.string().nullish(), + content: z.string().nullish(), + source: z.string().nullish(), + sourceId: z.string().nullish(), + type: z.string().optional(), + typeVersion: z.int().nullish(), + searchText: z.string().nullish(), + metadata: z.unknown().nullish(), + }) + .meta({ + example: { + content: 'the cat is yellow', + contentType: 'text/plain', + source: 'test', + sourceId: 'test', + type: 'raw', + metadata: { + foo: 'bar', + bar: 'baz', + }, + }, + }); + +type DocumentUpsert = z.infer; + +const documentUpsertResultSchema = z.object({ + action: z.enum(['inserted', 'updated', 'skipped']), + id: z.string(), + document: documentSchema, +}); + +type DocumentUpsertResult = z.infer; + +const documentFilterSchema = z.object({ + offset: z.number().default(0), + limit: z.number().default(20), + condition: z.union([queryFilterSchema, z.string()]), +}); + +type DocumentFilter = z.infer; + +const documentFindResultSchema = createListResultSchema(documentSchema); + +type DocumentFindResult = z.infer; + +export type { Document, DocumentUpsert, DocumentUpsertResult, DocumentFilter, DocumentFindResult }; +export { + documentSchema, + documentUpsertSchema, + documentUpsertResultSchema, + documentFilterSchema, + documentFindResultSchema, +}; diff --git a/packages/runtime/src/services/documents/documents.ts b/packages/runtime/src/services/documents/documents.ts new file mode 100644 index 0000000..ea5b6f8 --- /dev/null +++ b/packages/runtime/src/services/documents/documents.ts @@ -0,0 +1,179 @@ +import { QueryParser } from '@morten-olsen/stash-query-dsl'; + +import { DatabaseService, tableNames, type TableRows } from '../database/database.js'; +import { SplittingService } from '../splitter/splitter.js'; + +import type { + Document, + DocumentFilter, + DocumentFindResult, + DocumentUpsert, + DocumentUpsertResult, +} from './documents.schemas.ts'; +import { mapFromDocumentRow } from './documents.mapping.js'; + +import { EventEmitter } from '#root/utils/utils.event-emitter.js'; +import type { Services } from '#root/utils/utils.services.js'; +import { compareObjectKeys } from '#root/utils/utils.compare.js'; +import { applyQueryFilter } from '#root/utils/utils.query.js'; + +type DocumentsServiceEvents = { + upserted: (document: Document) => void; + inserted: (document: Document) => void; + updated: (document: Document) => void; +}; + +class DocumentsService extends EventEmitter { + #services: Services; + + constructor(services: Services) { + super(); + this.#services = services; + } + + public find = async (filter: DocumentFilter): Promise => { + const databaseService = this.#services.get(DatabaseService); + const db = await databaseService.getInstance(); + let query = db(tableNames.documents); + if (filter) { + const parser = this.#services.get(QueryParser); + query = applyQueryFilter( + query, + typeof filter.condition === 'string' ? parser.parse(filter.condition) : filter.condition, + ); + } + query = query.limit(filter.limit).offset(filter.offset); + const items = await query; + return { + items: items.map(mapFromDocumentRow), + }; + }; + + public get = async (id: string): Promise => { + const databaseService = this.#services.get(DatabaseService); + const db = await databaseService.getInstance(); + const [item] = await db(tableNames.documents).where('id', id).limit(1); + return mapFromDocumentRow(item); + }; + + public remove = async (id: string): Promise => { + const databaseService = this.#services.get(DatabaseService); + const db = await databaseService.getInstance(); + await db(tableNames.documents).where('id', id).delete(); + }; + + public upsert = async (document: DocumentUpsert): Promise => { + const databaseService = this.#services.get(DatabaseService); + const db = await databaseService.getInstance(); + + const result = await db.transaction(async (trx) => { + let id = document.id || crypto.randomUUID(); + if (document.source && document.sourceId) { + const [currentSourceDocument] = await trx(tableNames.documents) + .where('source', document.source) + .andWhere('sourceId', document.sourceId) + .limit(1); + if (currentSourceDocument) { + id = currentSourceDocument.id; + } + } + const now = new Date(); + const [current] = await trx(tableNames.documents).where('id', id).limit(1); + if (current) { + if ( + compareObjectKeys(current, document, [ + 'sourceId', + 'source', + 'content', + 'contentType', + 'searchText', + 'type', + 'typeVersion', + 'metadata', + ]) + ) { + return { + id, + action: 'skipped', + document: mapFromDocumentRow(current), + } as const; + } + await trx(tableNames.documents) + .update({ + ...document, + id, + updatedAt: now, + }) + .where('id', id); + const resultDocument: Document = mapFromDocumentRow({ + ...current, + ...document, + id, + }); + this.emit('updated', resultDocument); + this.emit('upserted', resultDocument); + return { + id, + action: 'updated', + document: resultDocument, + } as const; + } else { + await trx(tableNames.documents).insert({ + metadata: {}, + type: 'raw', + ...document, + id, + createdAt: now, + updatedAt: now, + }); + const resultDocument: Document = mapFromDocumentRow({ + type: 'raw', + owner: null, + contentType: null, + content: null, + source: null, + sourceId: null, + typeVersion: null, + searchText: null, + metadata: {}, + ...document, + deletedAt: null, + id, + createdAt: now, + updatedAt: now, + }); + this.emit('inserted', resultDocument); + this.emit('upserted', resultDocument); + return { + id, + action: 'inserted', + document: resultDocument, + } as const; + } + }); + + if (result.action !== 'skipped') { + await db.transaction(async (trx) => { + await trx(tableNames.documentChunks).delete().where('owner', result.id); + const splittingService = this.#services.get(SplittingService); + const chunks = await splittingService.chunk(result.document); + if (chunks.length > 0) { + await trx(tableNames.documentChunks).insert( + chunks.map((chunk) => ({ + id: crypto.randomUUID(), + owner: result.id, + content: chunk.content, + embedding: chunk.vector.toSql(), + embeddingModel: chunk.model, + })), + ); + } + }); + } + + return result; + }; +} + +export * from './documents.schemas.js'; +export { DocumentsService }; diff --git a/packages/runtime/src/services/embeddings/embeddings.ts b/packages/runtime/src/services/embeddings/embeddings.ts new file mode 100644 index 0000000..debdc41 --- /dev/null +++ b/packages/runtime/src/services/embeddings/embeddings.ts @@ -0,0 +1,62 @@ +import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers'; + +import { Vector } from './embeddings.vector.js'; + +import type { ExplicitAny } from '#root/global.js'; + +type ExtractOptions = { + input: string[]; + model: string; +}; + +type Extractor = { + extractor: FeatureExtractionPipeline; + dimensions: number; +}; + +class EmbeddingsService { + #extractors = new Map>(); + + #setupExctractor = async (model: string) => { + const extractor = await pipeline('feature-extraction', model, {}); + const { config } = extractor.model; + if (!('hidden_size' in config) || typeof config.hidden_size !== 'number') { + throw new Error('Invalid model configuration'); + } + return { + extractor, + dimensions: config.hidden_size, + }; + }; + + #getExtractor = async (name: string) => { + if (!this.#extractors.has(name)) { + this.#extractors.set(name, this.#setupExctractor(name)); + } + const extractor = await this.#extractors.get(name); + if (!extractor) { + throw new Error('Extractor not found'); + } + + return extractor; + }; + + public extract = async (options: ExtractOptions) => { + const { input, model } = options; + const { extractor, dimensions } = await this.#getExtractor(model); + const output = await extractor(input, { pooling: 'cls' }); + return output.tolist().map((v: ExplicitAny) => new Vector(v, dimensions)); + }; + + public getDimensions = async (model: string) => { + const { dimensions } = await this.#getExtractor(model); + return dimensions; + }; + + public getFieldType = async (model: string) => { + const dimensions = await this.getDimensions(model); + return `vector(${dimensions})`; + }; +} + +export { EmbeddingsService, Vector }; diff --git a/packages/runtime/src/services/embeddings/embeddings.vector.ts b/packages/runtime/src/services/embeddings/embeddings.vector.ts new file mode 100644 index 0000000..01f9015 --- /dev/null +++ b/packages/runtime/src/services/embeddings/embeddings.vector.ts @@ -0,0 +1,37 @@ +import { cos_sim } from '@huggingface/transformers'; +import { toSql } from 'pgvector'; + +class Vector { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + #value: any; + #dimentions: number; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + constructor(value: any, dimentions: number) { + this.#value = value; + this.#dimentions = dimentions; + } + + public get value() { + return this.#value; + } + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + public set value(value: any) { + this.#value = value; + } + + public get dimentions() { + return this.#dimentions; + } + + public toSql = () => { + return toSql(this.#value); + }; + + public distanceTo = (other: Vector) => { + return cos_sim(this.#value, other.value); + }; +} + +export { Vector }; diff --git a/packages/runtime/src/services/splitter/splitter.ts b/packages/runtime/src/services/splitter/splitter.ts new file mode 100644 index 0000000..9a42242 --- /dev/null +++ b/packages/runtime/src/services/splitter/splitter.ts @@ -0,0 +1,44 @@ +import { EmbeddingsService } from '../embeddings/embeddings.js'; +import type { Document } from '../documents/documents.schemas.js'; + +import type { Chunk, Splitter } from './splitter.types.js'; +import { textSplitter } from './splitters/splitters.text.js'; + +import type { Services } from '#root/utils/utils.services.js'; +import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; + +class SplittingService { + #services: Services; + #chunkers: Set; + + constructor(services: Services) { + this.#services = services; + this.#chunkers = new Set(); + this.addChunkers([textSplitter]); + } + + public addChunkers = (splitter: Splitter[]) => { + this.#chunkers = this.#chunkers.union(new Set(splitter)); + }; + + public chunk = async (input: Document): Promise => { + const splitter = this.#chunkers.values().find((splitter) => splitter.match(input)); + if (!splitter) { + return []; + } + const chunks = await splitter.chunk(input); + const embeddingsService = this.#services.get(EmbeddingsService); + const vectors = await embeddingsService.extract({ + input: chunks, + model: EMBEDDING_MODEL, + }); + return chunks.map((content, index) => ({ + content, + vector: vectors[index], + model: EMBEDDING_MODEL, + })); + }; +} + +export * from './splitter.types.js'; +export { SplittingService }; diff --git a/packages/runtime/src/services/splitter/splitter.types.ts b/packages/runtime/src/services/splitter/splitter.types.ts new file mode 100644 index 0000000..0eaa334 --- /dev/null +++ b/packages/runtime/src/services/splitter/splitter.types.ts @@ -0,0 +1,15 @@ +import type { Document } from '../documents/documents.schemas.js'; +import type { Vector } from '../embeddings/embeddings.vector.js'; + +type Chunk = { + content: string; + vector: Vector; + model: string; +}; + +type Splitter = { + match: (document: Document) => boolean; + chunk: (document: Document) => Promise; +}; + +export type { Chunk, Splitter }; diff --git a/packages/runtime/src/services/splitter/splitters/splitters.text.ts b/packages/runtime/src/services/splitter/splitters/splitters.text.ts new file mode 100644 index 0000000..b1841f9 --- /dev/null +++ b/packages/runtime/src/services/splitter/splitters/splitters.text.ts @@ -0,0 +1,17 @@ +import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; + +import type { Splitter } from '../splitter.types.js'; + +const textSplitter: Splitter = { + match: (document) => !!document.content, + chunk: async (document) => { + if (!document.content) { + return []; + } + const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 }); + const texts = await splitter.splitText(document.content); + return texts; + }, +}; + +export { textSplitter }; diff --git a/packages/runtime/src/services/warmup/warmup.ts b/packages/runtime/src/services/warmup/warmup.ts new file mode 100644 index 0000000..4c5af93 --- /dev/null +++ b/packages/runtime/src/services/warmup/warmup.ts @@ -0,0 +1,17 @@ +import { DatabaseService } from '../database/database.js'; + +import { Services } from '#root/utils/utils.services.js'; + +class WarmupService { + #services: Services; + + constructor(services: Services) { + this.#services = services; + } + public ensure = async () => { + const databaseService = this.#services.get(DatabaseService); + await databaseService.getInstance(); + }; +} + +export { WarmupService }; diff --git a/packages/runtime/src/utils/utils.compare.ts b/packages/runtime/src/utils/utils.compare.ts new file mode 100644 index 0000000..a2e095f --- /dev/null +++ b/packages/runtime/src/utils/utils.compare.ts @@ -0,0 +1,14 @@ +import deepEqual from 'deep-equal'; + +const compareObjectKeys = >(a: T, b: T, keys: (keyof T)[]) => { + for (const key of keys) { + const avalue = a[key]; + const bvalue = b[key]; + if (!deepEqual(avalue, bvalue)) { + return false; + } + } + return true; +}; + +export { compareObjectKeys }; diff --git a/packages/runtime/src/utils/utils.consts.ts b/packages/runtime/src/utils/utils.consts.ts new file mode 100644 index 0000000..2560b4b --- /dev/null +++ b/packages/runtime/src/utils/utils.consts.ts @@ -0,0 +1,3 @@ +const EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2'; + +export { EMBEDDING_MODEL }; diff --git a/packages/runtime/src/utils/utils.event-emitter.ts b/packages/runtime/src/utils/utils.event-emitter.ts new file mode 100644 index 0000000..65af8dc --- /dev/null +++ b/packages/runtime/src/utils/utils.event-emitter.ts @@ -0,0 +1,66 @@ +import type { ExplicitAny } from '#root/global.js'; + +type EventListener = (...args: T) => void | Promise; + +type OnOptions = { + abortSignal?: AbortSignal; +}; + +class EventEmitter void | Promise>> { + #listeners = new Map>>(); + + on = (event: K, callback: EventListener>, options: OnOptions = {}) => { + const { abortSignal } = options; + if (!this.#listeners.has(event)) { + this.#listeners.set(event, new Set()); + } + const callbackClone = (...args: Parameters) => callback(...args); + const abortController = new AbortController(); + const listeners = this.#listeners.get(event); + if (!listeners) { + throw new Error('Event registration failed'); + } + abortSignal?.addEventListener('abort', abortController.abort); + listeners.add(callbackClone); + abortController.signal.addEventListener('abort', () => { + this.#listeners.set(event, listeners?.difference(new Set([callbackClone]))); + }); + return abortController.abort; + }; + + once = (event: K, callback: EventListener>, options: OnOptions = {}) => { + const abortController = new AbortController(); + options.abortSignal?.addEventListener('abort', abortController.abort); + return this.on( + event, + async (...args) => { + abortController.abort(); + await callback(...args); + }, + { + ...options, + abortSignal: abortController.signal, + }, + ); + }; + + emit = (event: K, ...args: Parameters) => { + const listeners = this.#listeners.get(event); + if (!listeners) { + return; + } + for (const listener of listeners) { + listener(...args); + } + }; + + emitAsync = async (event: K, ...args: Parameters) => { + const listeners = this.#listeners.get(event); + if (!listeners) { + return; + } + await Promise.all(listeners.values().map((listener) => listener(...args))); + }; +} + +export { EventEmitter }; diff --git a/packages/runtime/src/utils/utils.query.ts b/packages/runtime/src/utils/utils.query.ts new file mode 100644 index 0000000..b31b4af --- /dev/null +++ b/packages/runtime/src/utils/utils.query.ts @@ -0,0 +1,161 @@ +import type { + QueryCondition, + QueryConditionNumber, + QueryConditionText, + QueryFilter, +} from '@morten-olsen/stash-query-dsl'; +import { type Knex } from 'knex'; +/** + * Escapes a JSON key for use in PostgreSQL JSON operators. + * Escapes single quotes by doubling them, which is the PostgreSQL standard. + */ +const escapeJsonKey = (key: string): string => { + return key.replace(/'/g, "''"); +}; + +const getFieldSelector = (query: Knex.QueryBuilder, field: string[], tableName?: string) => { + const baseColumn = field[0]; + if (field.length === 1) { + return tableName ? `${tableName}.${baseColumn}` : baseColumn; + } + + const baseFieldRef = tableName ? query.client.ref(baseColumn).withSchema(tableName) : query.client.ref(baseColumn); + const jsonPath = field.slice(1); + let sqlExpression = baseFieldRef.toString(); + + for (let i = 0; i < jsonPath.length - 1; i++) { + const escapedKey = escapeJsonKey(jsonPath[i]); + sqlExpression += ` -> '${escapedKey}'`; + } + + const finalElement = jsonPath[jsonPath.length - 1]; + const escapedFinalKey = escapeJsonKey(finalElement); + sqlExpression += ` ->> '${escapedFinalKey}'`; + return query.client.raw(sqlExpression); +}; + +const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionText) => { + const selector = getFieldSelector(query, field, tableName); + if (conditions.equal) { + query = query.where(selector, '=', conditions.equal); + } + if (conditions.notEqual) { + query = query.where(selector, '<>', conditions.notEqual); + } + if (conditions.like) { + query = query.whereLike(selector, conditions.like); + } + if (conditions.notLike) { + query = query.not.whereLike(selector, conditions.notLike); + } + if (conditions.equal === null) { + query = query.whereNull(selector); + } + if (conditions.notEqual === null) { + query = query.whereNotNull(selector); + } + if (conditions.in) { + query = query.whereIn(selector, conditions.in); + } + if (conditions.notIn) { + query = query.whereNotIn(selector, conditions.notIn); + } + return query; +}; + +const applyQueryConditionNumber = ( + query: Knex.QueryBuilder, + { field, tableName, conditions }: QueryConditionNumber, +) => { + const selector = getFieldSelector(query, field, tableName); + if (conditions.equals !== undefined && conditions.equals !== null) { + query = query.where(selector, '=', conditions.equals); + } + if (conditions.notEquals !== undefined && conditions.notEquals !== null) { + query = query.where(selector, '<>', conditions.notEquals); + } + if (conditions.equals === null) { + query = query.whereNull(selector); + } + if (conditions.notEquals === null) { + query = query.whereNotNull(selector); + } + if (conditions.greaterThan) { + query = query.where(selector, '>', conditions.greaterThan); + } + if (conditions.greaterThanOrEqual) { + query = query.where(selector, '>=', conditions.greaterThanOrEqual); + } + if (conditions.lessThan) { + query = query.where(selector, '<', conditions.lessThan); + } + if (conditions.lessThanOrEqual) { + query = query.where(selector, '<=', conditions.lessThanOrEqual); + } + if (conditions.in) { + query = query.whereIn(selector, conditions.in); + } + if (conditions.notIn) { + query = query.whereNotIn(selector, conditions.notIn); + } + return query; +}; + +const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) => { + switch (options.type) { + case 'text': { + return applyQueryConditionText(query, options); + } + case 'number': { + return applyQueryConditionNumber(query, options); + } + default: { + throw new Error(`Unknown filter type`); + } + } +}; + +const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => { + if (filter.type === 'operator') { + if (filter.conditions.length === 0) { + return query; + } + + switch (filter.operator) { + case 'or': { + return query.where((subquery) => { + let isFirst = true; + for (const condition of filter.conditions) { + if (isFirst) { + applyQueryFilter(subquery, condition); + isFirst = false; + } else { + subquery.orWhere((subSubquery) => { + applyQueryFilter(subSubquery, condition); + }); + } + } + }); + } + case 'and': { + return query.where((subquery) => { + let isFirst = true; + for (const condition of filter.conditions) { + if (isFirst) { + applyQueryFilter(subquery, condition); + isFirst = false; + } else { + subquery.andWhere((subSubquery) => { + applyQueryFilter(subSubquery, condition); + }); + } + } + }); + } + } + } else { + return applyQueryCondition(query, filter); + } +}; + +export { applyQueryCondition, applyQueryFilter }; diff --git a/packages/runtime/src/utils/utils.schema.ts b/packages/runtime/src/utils/utils.schema.ts new file mode 100644 index 0000000..83dbaeb --- /dev/null +++ b/packages/runtime/src/utils/utils.schema.ts @@ -0,0 +1,8 @@ +import { z, type ZodType } from 'zod'; + +const createListResultSchema = (schema: T) => + z.object({ + items: z.array(schema), + }); + +export { createListResultSchema }; diff --git a/packages/runtime/src/utils/utils.services.ts b/packages/runtime/src/utils/utils.services.ts new file mode 100644 index 0000000..a14fb5e --- /dev/null +++ b/packages/runtime/src/utils/utils.services.ts @@ -0,0 +1,51 @@ +const destroy = Symbol('destroy'); +const instanceKey = Symbol('instances'); + +type ServiceDependency = new (services: Services) => T & { + [destroy]?: () => Promise | void; +}; + +class Services { + [instanceKey]: Map, unknown>; + + constructor() { + this[instanceKey] = new Map(); + } + + public get = (service: ServiceDependency) => { + if (!this[instanceKey].has(service)) { + this[instanceKey].set(service, new service(this)); + } + const instance = this[instanceKey].get(service); + if (!instance) { + throw new Error('Could not generate instance'); + } + return instance as T; + }; + + public set = (service: ServiceDependency, instance: Partial) => { + this[instanceKey].set(service, instance); + }; + + public clone = () => { + const services = new Services(); + services[instanceKey] = Object.fromEntries(this[instanceKey].entries()); + }; + + public destroy = async () => { + await Promise.all( + this[instanceKey].values().map(async (instance) => { + if ( + typeof instance === 'object' && + instance && + destroy in instance && + typeof instance[destroy] === 'function' + ) { + await instance[destroy](); + } + }), + ); + }; +} + +export { Services, destroy }; diff --git a/packages/runtime/tsconfig.json b/packages/runtime/tsconfig.json new file mode 100644 index 0000000..09b3b05 --- /dev/null +++ b/packages/runtime/tsconfig.json @@ -0,0 +1,9 @@ +{ + "compilerOptions": { + "outDir": "./dist" + }, + "include": [ + "src/**/*.ts" + ], + "extends": "@morten-olsen/stash-configs/tsconfig.json" +} diff --git a/packages/runtime/vitest.config.ts b/packages/runtime/vitest.config.ts new file mode 100644 index 0000000..8998b5b --- /dev/null +++ b/packages/runtime/vitest.config.ts @@ -0,0 +1,12 @@ +import { defineConfig } from 'vitest/config'; +import { getAliases } from '@morten-olsen/stash-tests/vitest'; + +// eslint-disable-next-line import/no-default-export +export default defineConfig(async () => { + const aliases = await getAliases(); + return { + resolve: { + alias: aliases, + }, + }; +}); diff --git a/packages/server/package.json b/packages/server/package.json index 80ecf55..9c3188a 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -37,6 +37,7 @@ "@fastify/websocket": "11.2.0", "@huggingface/transformers": "^3.8.1", "@langchain/textsplitters": "^1.0.1", + "@morten-olsen/stash-query-dsl": "workspace:*", "@scalar/fastify-api-reference": "1.40.2", "better-sqlite3": "^12.5.0", "deep-equal": "^2.2.3", diff --git a/packages/server/src/api.ts b/packages/server/src/api.ts index 317b371..f4fda22 100644 --- a/packages/server/src/api.ts +++ b/packages/server/src/api.ts @@ -10,12 +10,12 @@ import { type ZodTypeProvider, } from 'fastify-type-provider-zod'; -import { Services } from './utils/utils.services.ts'; -import { systemEndpoints } from './endpoints/system/system.ts'; -import { WarmupService } from './services/warmup/warmup.ts'; -import { documentEndpoints } from './endpoints/documents/documents.ts'; -import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.ts'; -import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.ts'; +import { Services } from './utils/utils.services.js'; +import { systemEndpoints } from './endpoints/system/system.js'; +import { WarmupService } from './services/warmup/warmup.js'; +import { documentEndpoints } from './endpoints/documents/documents.js'; +import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.js'; +import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.js'; class BaseError extends Error { public statusCode: number; diff --git a/packages/server/src/dev.ts b/packages/server/src/dev.ts index d2fcc15..38c2059 100644 --- a/packages/server/src/dev.ts +++ b/packages/server/src/dev.ts @@ -1,6 +1,6 @@ import { createApi } from './api.js'; -import { DocumentsService, type DocumentUpsert } from './services/documents/documents.ts'; -import { Services } from './utils/utils.services.ts'; +import { DocumentsService, type DocumentUpsert } from './services/documents/documents.js'; +import { Services } from './utils/utils.services.js'; const services = new Services(); const server = await createApi(services); diff --git a/packages/server/src/endpoints/document-chunk-filters/document-chunk-filters.ts b/packages/server/src/endpoints/document-chunk-filters/document-chunk-filters.ts index 6a47a38..f800ba0 100644 --- a/packages/server/src/endpoints/document-chunk-filters/document-chunk-filters.ts +++ b/packages/server/src/endpoints/document-chunk-filters/document-chunk-filters.ts @@ -4,7 +4,7 @@ import { documentChunkFilterSchema, documentChunksFindResultSchema, DocumentChunksService, -} from '#root/services/document-chunks/document-chunks.ts'; +} from '#root/services/document-chunks/document-chunks.js'; const documentChunkFilterEndpoints: FastifyPluginAsyncZod = async (instance) => { instance.route({ diff --git a/packages/server/src/endpoints/document-filters/document-filters.ts b/packages/server/src/endpoints/document-filters/document-filters.ts index 8537ea4..7858f45 100644 --- a/packages/server/src/endpoints/document-filters/document-filters.ts +++ b/packages/server/src/endpoints/document-filters/document-filters.ts @@ -4,7 +4,7 @@ import { documentFilterSchema, documentFindResultSchema, DocumentsService, -} from '#root/services/documents/documents.ts'; +} from '#root/services/documents/documents.js'; const documentFilterEndpoints: FastifyPluginAsyncZod = async (instance) => { instance.route({ diff --git a/packages/server/src/endpoints/documents/documents.ts b/packages/server/src/endpoints/documents/documents.ts index 67b9041..3c7957e 100644 --- a/packages/server/src/endpoints/documents/documents.ts +++ b/packages/server/src/endpoints/documents/documents.ts @@ -4,7 +4,7 @@ import { DocumentsService, documentUpsertResultSchema, documentUpsertSchema, -} from '#root/services/documents/documents.ts'; +} from '#root/services/documents/documents.js'; const documentEndpoints: FastifyPluginAsyncZod = async (instance) => { instance.route({ diff --git a/packages/server/src/endpoints/system/system.ts b/packages/server/src/endpoints/system/system.ts index 24a183f..4c8bbfe 100644 --- a/packages/server/src/endpoints/system/system.ts +++ b/packages/server/src/endpoints/system/system.ts @@ -1,7 +1,7 @@ import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod'; import { z } from 'zod'; -import { DatabaseService } from '#root/services/database/database.ts'; +import { DatabaseService } from '#root/services/database/database.js'; const systemEndpoints: FastifyPluginAsyncZod = async (instance) => { instance.route({ diff --git a/packages/server/src/query-parser/query-parser.lexer.ts b/packages/server/src/query-parser/query-parser.lexer.ts deleted file mode 100644 index aa92156..0000000 --- a/packages/server/src/query-parser/query-parser.lexer.ts +++ /dev/null @@ -1,202 +0,0 @@ -import type { Token } from './query-parser.types.ts'; - -class Lexer { - #input: string; - #position = 0; - #tokens: Token[] = []; - - constructor(input: string) { - this.#input = input; - } - - #skipWhitespace = (): void => { - while (this.#position < this.#input.length && /\s/.test(this.#input[this.#position])) { - this.#position++; - } - }; - - #nextToken = (): Token | null => { - const char = this.#input[this.#position]; - const startPosition = this.#position; - - // Single character tokens - if (char === '(') { - this.#position++; - return { type: 'LPAREN', value: '(', position: startPosition }; - } - if (char === ')') { - this.#position++; - return { type: 'RPAREN', value: ')', position: startPosition }; - } - if (char === ',') { - this.#position++; - return { type: 'COMMA', value: ',', position: startPosition }; - } - if (char === '.') { - this.#position++; - return { type: 'DOT', value: '.', position: startPosition }; - } - - // Two-character operators - if (char === '!' && this.#input[this.#position + 1] === '=') { - this.#position += 2; - return { type: 'NOT_EQUALS', value: '!=', position: startPosition }; - } - if (char === '>' && this.#input[this.#position + 1] === '=') { - this.#position += 2; - return { type: 'GREATER_THAN_OR_EQUAL', value: '>=', position: startPosition }; - } - if (char === '<' && this.#input[this.#position + 1] === '=') { - this.#position += 2; - return { type: 'LESS_THAN_OR_EQUAL', value: '<=', position: startPosition }; - } - - // Single character operators - if (char === '=') { - this.#position++; - return { type: 'EQUALS', value: '=', position: startPosition }; - } - if (char === '>') { - this.#position++; - return { type: 'GREATER_THAN', value: '>', position: startPosition }; - } - if (char === '<') { - this.#position++; - return { type: 'LESS_THAN', value: '<', position: startPosition }; - } - - // String literal - if (char === "'") { - return this.#readString(); - } - - // Number - if (/[0-9]/.test(char) || (char === '-' && /[0-9]/.test(this.#input[this.#position + 1]))) { - return this.#readNumber(); - } - - // Identifier or keyword - if (/[a-zA-Z_]/.test(char)) { - return this.#readIdentifierOrKeyword(); - } - - throw new Error(`Unexpected character '${char}' at position ${this.#position}`); - }; - - #readString = (): Token => { - const startPosition = this.#position; - this.#position++; // Skip opening quote - let value = ''; - - while (this.#position < this.#input.length) { - const char = this.#input[this.#position]; - if (char === "'") { - // Check for escaped quote - if (this.#input[this.#position + 1] === "'") { - value += "'"; - this.#position += 2; - } else { - this.#position++; // Skip closing quote - return { type: 'STRING', value, position: startPosition }; - } - } else { - value += char; - this.#position++; - } - } - - throw new Error(`Unterminated string starting at position ${startPosition}`); - }; - - #readNumber = (): Token => { - const startPosition = this.#position; - let value = ''; - - // Optional minus sign - if (this.#input[this.#position] === '-') { - value += '-'; - this.#position++; - } - - // Integer part - while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) { - value += this.#input[this.#position]; - this.#position++; - } - - // Decimal part - if (this.#input[this.#position] === '.' && /[0-9]/.test(this.#input[this.#position + 1])) { - value += '.'; - this.#position++; - while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) { - value += this.#input[this.#position]; - this.#position++; - } - } - - // Scientific notation - if (this.#input[this.#position] === 'e' || this.#input[this.#position] === 'E') { - value += this.#input[this.#position]; - this.#position++; - if (this.#input[this.#position] === '+' || this.#input[this.#position] === '-') { - value += this.#input[this.#position]; - this.#position++; - } - while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) { - value += this.#input[this.#position]; - this.#position++; - } - } - - return { type: 'NUMBER', value, position: startPosition }; - }; - - #readIdentifierOrKeyword = (): Token => { - const startPosition = this.#position; - let value = ''; - - while (this.#position < this.#input.length && /[a-zA-Z0-9_]/.test(this.#input[this.#position])) { - value += this.#input[this.#position]; - this.#position++; - } - - const upperValue = value.toUpperCase(); - - // Keywords - switch (upperValue) { - case 'AND': - return { type: 'AND', value, position: startPosition }; - case 'OR': - return { type: 'OR', value, position: startPosition }; - case 'LIKE': - return { type: 'LIKE', value, position: startPosition }; - case 'NOT': - return { type: 'NOT', value, position: startPosition }; - case 'IN': - return { type: 'IN', value, position: startPosition }; - case 'IS': - return { type: 'IS', value, position: startPosition }; - case 'NULL': - return { type: 'NULL', value, position: startPosition }; - default: - return { type: 'IDENTIFIER', value, position: startPosition }; - } - }; - - public tokenize = (): Token[] => { - while (this.#position < this.#input.length) { - this.#skipWhitespace(); - if (this.#position >= this.#input.length) break; - - const token = this.#nextToken(); - if (token) { - this.#tokens.push(token); - } - } - - this.#tokens.push({ type: 'EOF', value: '', position: this.#position }); - return this.#tokens; - }; -} - -export { Lexer }; diff --git a/packages/server/src/query-parser/query-parser.parser.ts b/packages/server/src/query-parser/query-parser.parser.ts deleted file mode 100644 index 2b6ad1d..0000000 --- a/packages/server/src/query-parser/query-parser.parser.ts +++ /dev/null @@ -1,317 +0,0 @@ -import { Lexer } from './query-parser.lexer.ts'; -import type { Token, TokenType } from './query-parser.types.ts'; - -import type { QueryConditionText, QueryConditionNumber, QueryFilter, QueryCondition } from '#root/utils/utils.query.ts'; - -class Parser { - #tokens: Token[] = []; - #position = 0; - - #current = (): Token => { - return this.#tokens[this.#position]; - }; - - #advance = (): Token => { - const token = this.#current(); - this.#position++; - return token; - }; - - #expect = (type: TokenType): Token => { - const token = this.#current(); - if (token.type !== type) { - throw new Error(`Expected ${type} but got ${token.type} at position ${token.position}`); - } - return this.#advance(); - }; - - #parseExpression = (): QueryFilter => { - return this.#parseOr(); - }; - - #parseOr = (): QueryFilter => { - let left = this.#parseAnd(); - - while (this.#current().type === 'OR') { - this.#advance(); - const right = this.#parseAnd(); - left = this.#combineWithOperator(left, right, 'or'); - } - - return left; - }; - - #parseAnd = (): QueryFilter => { - let left = this.#parsePrimary(); - - while (this.#current().type === 'AND') { - this.#advance(); - const right = this.#parsePrimary(); - left = this.#combineWithOperator(left, right, 'and'); - } - - return left; - }; - - #combineWithOperator = (left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter => { - // If left is already an operator of the same type, add to its conditions - if (left.type === 'operator' && left.operator === operator) { - return { - type: 'operator', - operator, - conditions: [...left.conditions, right], - }; - } - - return { - type: 'operator', - operator, - conditions: [left, right], - }; - }; - - #parsePrimary = (): QueryFilter => { - // Handle parenthesized expressions - if (this.#current().type === 'LPAREN') { - this.#advance(); - const expr = this.#parseExpression(); - this.#expect('RPAREN'); - return expr; - } - - // Must be a condition - return this.#parseCondition(); - }; - - #parseCondition = (): QueryCondition => { - const field = this.#parseField(); - - const token = this.#current(); - - // IS NULL / IS NOT NULL - if (token.type === 'IS') { - this.#advance(); - const isNot = this.#current().type === 'NOT'; - if (isNot) { - this.#advance(); - } - this.#expect('NULL'); - - // IS NULL / IS NOT NULL could be either text or number - default to text - return { - type: 'text', - field, - conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null }, - } satisfies QueryConditionText; - } - - // NOT IN / NOT LIKE - if (token.type === 'NOT') { - this.#advance(); - const nextToken = this.#current(); - - if (nextToken.type === 'IN') { - this.#advance(); - return this.#parseInCondition(field, true); - } - - if (nextToken.type === 'LIKE') { - this.#advance(); - const pattern = this.#expect('STRING').value; - return { - type: 'text', - field, - conditions: { notLike: pattern }, - }; - } - - throw new Error(`Expected IN or LIKE after NOT at position ${nextToken.position}`); - } - - // IN - if (token.type === 'IN') { - this.#advance(); - return this.#parseInCondition(field, false); - } - - // LIKE - if (token.type === 'LIKE') { - this.#advance(); - const pattern = this.#expect('STRING').value; - return { - type: 'text', - field, - conditions: { like: pattern }, - }; - } - - // Comparison operators - if (token.type === 'EQUALS') { - this.#advance(); - return this.#parseValueCondition(field, 'equals'); - } - - if (token.type === 'NOT_EQUALS') { - this.#advance(); - return this.#parseValueCondition(field, 'notEquals'); - } - - if (token.type === 'GREATER_THAN') { - this.#advance(); - const value = this.#parseNumber(); - return { - type: 'number', - field, - conditions: { greaterThan: value }, - }; - } - - if (token.type === 'GREATER_THAN_OR_EQUAL') { - this.#advance(); - const value = this.#parseNumber(); - return { - type: 'number', - field, - conditions: { greaterThanOrEqual: value }, - }; - } - - if (token.type === 'LESS_THAN') { - this.#advance(); - const value = this.#parseNumber(); - return { - type: 'number', - field, - conditions: { lessThan: value }, - }; - } - - if (token.type === 'LESS_THAN_OR_EQUAL') { - this.#advance(); - const value = this.#parseNumber(); - return { - type: 'number', - field, - conditions: { lessThanOrEqual: value }, - }; - } - - throw new Error(`Unexpected token '${token.value}' at position ${token.position}`); - }; - - #parseField = (): string[] => { - const parts: string[] = []; - parts.push(this.#expect('IDENTIFIER').value); - - while (this.#current().type === 'DOT') { - this.#advance(); - parts.push(this.#expect('IDENTIFIER').value); - } - - return parts; - }; - - #parseValueCondition = (field: string[], operator: 'equals' | 'notEquals'): QueryCondition => { - const token = this.#current(); - - if (token.type === 'STRING') { - this.#advance(); - const textCondition: QueryConditionText = { - type: 'text', - field, - conditions: operator === 'equals' ? { equal: token.value } : { notEqual: token.value }, - }; - return textCondition; - } - - if (token.type === 'NUMBER') { - this.#advance(); - const value = parseFloat(token.value); - const numCondition: QueryConditionNumber = { - type: 'number', - field, - conditions: operator === 'equals' ? { equals: value } : { notEquals: value }, - }; - return numCondition; - } - - if (token.type === 'NULL') { - this.#advance(); - // NULL equality - default to text type - return { - type: 'text', - field, - conditions: operator === 'equals' ? { equal: null } : {}, - } as QueryConditionText; - } - - throw new Error(`Expected value but got ${token.type} at position ${token.position}`); - }; - - #parseNumber = (): number => { - const token = this.#expect('NUMBER'); - return parseFloat(token.value); - }; - - #parseInCondition = (field: string[], isNot: boolean): QueryCondition => { - this.#expect('LPAREN'); - - const firstToken = this.#current(); - - if (firstToken.type === 'STRING') { - // Text IN - const values: string[] = []; - values.push(this.#advance().value); - - while (this.#current().type === 'COMMA') { - this.#advance(); - values.push(this.#expect('STRING').value); - } - - this.#expect('RPAREN'); - - return { - type: 'text', - field, - conditions: isNot ? { notIn: values } : { in: values }, - }; - } - - if (firstToken.type === 'NUMBER') { - // Numeric IN - const values: number[] = []; - values.push(parseFloat(this.#advance().value)); - - while (this.#current().type === 'COMMA') { - this.#advance(); - values.push(parseFloat(this.#expect('NUMBER').value)); - } - - this.#expect('RPAREN'); - - return { - type: 'number', - field, - conditions: isNot ? { notIn: values } : { in: values }, - }; - } - - throw new Error(`Expected STRING or NUMBER in IN list at position ${firstToken.position}`); - }; - - public parse(input: string): QueryFilter { - const lexer = new Lexer(input); - this.#tokens = lexer.tokenize(); - this.#position = 0; - - const result = this.#parseExpression(); - - if (this.#current().type !== 'EOF') { - throw new Error(`Unexpected token '${this.#current().value}' at position ${this.#current().position}`); - } - - return result; - } -} - -export { Parser }; diff --git a/packages/server/src/query-parser/query-parser.ts b/packages/server/src/query-parser/query-parser.ts deleted file mode 100644 index de02786..0000000 --- a/packages/server/src/query-parser/query-parser.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { Stringifier } from './query-parser.stringifier.ts'; -import { Parser } from './query-parser.parser.ts'; - -import type { QueryFilter } from '#root/utils/utils.query.ts'; - -class QueryParser { - private parser = new Parser(); - private stringifier = new Stringifier(); - - public parse = (input: string): QueryFilter => { - return this.parser.parse(input); - }; - - public stringify = (filter: QueryFilter): string => { - return this.stringifier.stringify(filter); - }; -} - -export { QueryParser }; diff --git a/packages/server/src/query-parser/query-parser.types.ts b/packages/server/src/query-parser/query-parser.types.ts deleted file mode 100644 index c29ce4f..0000000 --- a/packages/server/src/query-parser/query-parser.types.ts +++ /dev/null @@ -1,30 +0,0 @@ -type TokenType = - | 'IDENTIFIER' - | 'STRING' - | 'NUMBER' - | 'AND' - | 'OR' - | 'LIKE' - | 'NOT' - | 'IN' - | 'IS' - | 'NULL' - | 'EQUALS' - | 'NOT_EQUALS' - | 'GREATER_THAN' - | 'GREATER_THAN_OR_EQUAL' - | 'LESS_THAN' - | 'LESS_THAN_OR_EQUAL' - | 'LPAREN' - | 'RPAREN' - | 'COMMA' - | 'DOT' - | 'EOF'; - -type Token = { - type: TokenType; - value: string; - position: number; -}; - -export type { TokenType, Token }; diff --git a/packages/server/src/services/database/database.ts b/packages/server/src/services/database/database.ts index c902a55..5865f3a 100644 --- a/packages/server/src/services/database/database.ts +++ b/packages/server/src/services/database/database.ts @@ -3,9 +3,9 @@ import ClientPgLite from 'knex-pglite'; import { PGlite } from '@electric-sql/pglite'; import { vector } from '@electric-sql/pglite/vector'; -import { migrationSource } from './migrations/migrations.ts'; +import { migrationSource } from './migrations/migrations.js'; -import { destroy, Services } from '#root/utils/utils.services.ts'; +import { destroy, Services } from '#root/utils/utils.services.js'; class DatabaseService { #services: Services; @@ -50,5 +50,5 @@ class DatabaseService { }; } -export { type TableRows, tableNames } from './migrations/migrations.ts'; +export { type TableRows, tableNames } from './migrations/migrations.js'; export { DatabaseService }; diff --git a/packages/server/src/services/database/migrations/migrations.001-init.ts b/packages/server/src/services/database/migrations/migrations.001-init.ts index 7141864..08da637 100644 --- a/packages/server/src/services/database/migrations/migrations.001-init.ts +++ b/packages/server/src/services/database/migrations/migrations.001-init.ts @@ -1,7 +1,7 @@ -import type { Migration } from './migrations.types.ts'; +import type { Migration } from './migrations.types.js'; -import { EmbeddingsService } from '#root/services/embeddings/embeddings.ts'; -import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts'; +import { EmbeddingsService } from '#root/services/embeddings/embeddings.js'; +import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; const tableNames = { documents: 'documents', diff --git a/packages/server/src/services/database/migrations/migrations.ts b/packages/server/src/services/database/migrations/migrations.ts index 21a4261..e83094f 100644 --- a/packages/server/src/services/database/migrations/migrations.ts +++ b/packages/server/src/services/database/migrations/migrations.ts @@ -1,9 +1,9 @@ import type { Knex } from 'knex'; -import type { Migration } from './migrations.types.ts'; -import { init } from './migrations.001-init.ts'; +import type { Migration } from './migrations.types.js'; +import { init } from './migrations.001-init.js'; -import type { Services } from '#root/utils/utils.services.ts'; +import type { Services } from '#root/utils/utils.services.js'; const migrations = [init] satisfies Migration[]; @@ -21,5 +21,5 @@ const migrationSource = (options: MigrationSourceOptions): Knex.MigrationSource< getMigrations: async () => migrations, }); -export { type TableRows, tableNames } from './migrations.001-init.ts'; +export { type TableRows, tableNames } from './migrations.001-init.js'; export { migrationSource }; diff --git a/packages/server/src/services/database/migrations/migrations.types.ts b/packages/server/src/services/database/migrations/migrations.types.ts index 88b3d72..6897776 100644 --- a/packages/server/src/services/database/migrations/migrations.types.ts +++ b/packages/server/src/services/database/migrations/migrations.types.ts @@ -1,6 +1,6 @@ import type { Knex } from 'knex'; -import type { Services } from '#root/utils/utils.services.ts'; +import type { Services } from '#root/utils/utils.services.js'; type MigrationOptions = { knex: Knex; diff --git a/packages/server/src/services/document-chunks/document-chunks.mappings.ts b/packages/server/src/services/document-chunks/document-chunks.mappings.ts new file mode 100644 index 0000000..6a38e46 --- /dev/null +++ b/packages/server/src/services/document-chunks/document-chunks.mappings.ts @@ -0,0 +1,13 @@ +import type { TableRows } from '../database/database.js'; + +import type { DocumentChunk } from './document-chunks.schemas.js'; + +const mapFromDocumentChunkRow = ( + row: TableRows['documentChunks'] & { + metadata: unknown; + }, +): DocumentChunk => ({ + ...row, +}); + +export { mapFromDocumentChunkRow }; diff --git a/packages/server/src/services/document-chunks/document-chunks.schemas.ts b/packages/server/src/services/document-chunks/document-chunks.schemas.ts index 81e344d..1a4da7d 100644 --- a/packages/server/src/services/document-chunks/document-chunks.schemas.ts +++ b/packages/server/src/services/document-chunks/document-chunks.schemas.ts @@ -1,7 +1,7 @@ import { z } from 'zod'; +import { queryFilterSchema } from '@morten-olsen/stash-query-dsl'; -import { createListResultSchema } from '#root/utils/utils.schema.ts'; -import { queryFilterSchema } from '#root/utils/utils.query.ts'; +import { createListResultSchema } from '#root/utils/utils.schema.js'; const documentChunkSchema = z.object({ id: z.string(), diff --git a/packages/server/src/services/document-chunks/document-chunks.ts b/packages/server/src/services/document-chunks/document-chunks.ts index 1ab713d..fac2845 100644 --- a/packages/server/src/services/document-chunks/document-chunks.ts +++ b/packages/server/src/services/document-chunks/document-chunks.ts @@ -1,14 +1,15 @@ -import { DatabaseService, tableNames, type TableRows } from '../database/database.ts'; -import { EmbeddingsService } from '../embeddings/embeddings.ts'; +import { QueryParser } from '@morten-olsen/stash-query-dsl'; -import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.ts'; -import { mapFromDocumentChunkRow } from './document.mappings.ts'; +import { DatabaseService, tableNames, type TableRows } from '../database/database.js'; +import { EmbeddingsService } from '../embeddings/embeddings.js'; -import type { Services } from '#root/utils/utils.services.ts'; -import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts'; +import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.js'; +import { mapFromDocumentChunkRow } from './document-chunks.mappings.js'; + +import type { Services } from '#root/utils/utils.services.js'; +import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; import type { ExplicitAny } from '#root/global.js'; -import { applyQueryFilter } from '#root/utils/utils.query.ts'; -import { QueryParser } from '#root/query-parser/query-parser.ts'; +import { applyQueryFilter } from '#root/utils/utils.query.js'; const baseFields = [ `${tableNames.documentChunks}.*`, @@ -61,5 +62,5 @@ class DocumentChunksService { }; } -export * from './document-chunks.schemas.ts'; +export * from './document-chunks.schemas.js'; export { DocumentChunksService }; diff --git a/packages/server/src/services/documents/documents.mapping.ts b/packages/server/src/services/documents/documents.mapping.ts index 51030fa..e85f0b2 100644 --- a/packages/server/src/services/documents/documents.mapping.ts +++ b/packages/server/src/services/documents/documents.mapping.ts @@ -1,6 +1,6 @@ -import type { TableRows } from '../database/database.ts'; +import type { TableRows } from '../database/database.js'; -import type { Document } from './documents.schemas.ts'; +import type { Document } from './documents.schemas.js'; const mapFromDocumentRow = (row: TableRows['documents']): Document => ({ ...row, diff --git a/packages/server/src/services/documents/documents.schemas.ts b/packages/server/src/services/documents/documents.schemas.ts index cfd708d..75222e5 100644 --- a/packages/server/src/services/documents/documents.schemas.ts +++ b/packages/server/src/services/documents/documents.schemas.ts @@ -1,7 +1,7 @@ import { z } from 'zod'; +import { queryFilterSchema } from '@morten-olsen/stash-query-dsl'; -import { createListResultSchema } from '#root/utils/utils.schema.ts'; -import { queryFilterSchema } from '#root/utils/utils.query.ts'; +import { createListResultSchema } from '#root/utils/utils.schema.js'; const documentSchema = z.object({ id: z.string(), diff --git a/packages/server/src/services/documents/documents.ts b/packages/server/src/services/documents/documents.ts index 1541676..ea5b6f8 100644 --- a/packages/server/src/services/documents/documents.ts +++ b/packages/server/src/services/documents/documents.ts @@ -1,5 +1,7 @@ -import { DatabaseService, tableNames, type TableRows } from '../database/database.ts'; -import { SplittingService } from '../splitter/splitter.ts'; +import { QueryParser } from '@morten-olsen/stash-query-dsl'; + +import { DatabaseService, tableNames, type TableRows } from '../database/database.js'; +import { SplittingService } from '../splitter/splitter.js'; import type { Document, @@ -8,13 +10,12 @@ import type { DocumentUpsert, DocumentUpsertResult, } from './documents.schemas.ts'; -import { mapFromDocumentRow } from './documents.mapping.ts'; +import { mapFromDocumentRow } from './documents.mapping.js'; -import { EventEmitter } from '#root/utils/utils.event-emitter.ts'; -import type { Services } from '#root/utils/utils.services.ts'; -import { compareObjectKeys } from '#root/utils/utils.compare.ts'; -import { applyQueryFilter } from '#root/utils/utils.query.ts'; -import { QueryParser } from '#root/query-parser/query-parser.ts'; +import { EventEmitter } from '#root/utils/utils.event-emitter.js'; +import type { Services } from '#root/utils/utils.services.js'; +import { compareObjectKeys } from '#root/utils/utils.compare.js'; +import { applyQueryFilter } from '#root/utils/utils.query.js'; type DocumentsServiceEvents = { upserted: (document: Document) => void; @@ -174,5 +175,5 @@ class DocumentsService extends EventEmitter { }; } -export * from './documents.schemas.ts'; +export * from './documents.schemas.js'; export { DocumentsService }; diff --git a/packages/server/src/services/embeddings/embeddings.ts b/packages/server/src/services/embeddings/embeddings.ts index 5583868..debdc41 100644 --- a/packages/server/src/services/embeddings/embeddings.ts +++ b/packages/server/src/services/embeddings/embeddings.ts @@ -1,6 +1,8 @@ import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers'; -import { Vector } from './embeddings.vector.ts'; +import { Vector } from './embeddings.vector.js'; + +import type { ExplicitAny } from '#root/global.js'; type ExtractOptions = { input: string[]; @@ -57,4 +59,4 @@ class EmbeddingsService { }; } -export { EmbeddingsService }; +export { EmbeddingsService, Vector }; diff --git a/packages/server/src/services/splitter/splitter.ts b/packages/server/src/services/splitter/splitter.ts index 7768aef..9a42242 100644 --- a/packages/server/src/services/splitter/splitter.ts +++ b/packages/server/src/services/splitter/splitter.ts @@ -1,11 +1,11 @@ -import { EmbeddingsService } from '../embeddings/embeddings.ts'; -import type { Document } from '../documents/documents.schemas.ts'; +import { EmbeddingsService } from '../embeddings/embeddings.js'; +import type { Document } from '../documents/documents.schemas.js'; -import type { Chunk, Splitter } from './splitter.types.ts'; -import { textSplitter } from './splitters/splitters.text.ts'; +import type { Chunk, Splitter } from './splitter.types.js'; +import { textSplitter } from './splitters/splitters.text.js'; -import type { Services } from '#root/utils/utils.services.ts'; -import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts'; +import type { Services } from '#root/utils/utils.services.js'; +import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; class SplittingService { #services: Services; @@ -40,5 +40,5 @@ class SplittingService { }; } -export * from './splitter.types.ts'; +export * from './splitter.types.js'; export { SplittingService }; diff --git a/packages/server/src/services/splitter/splitter.types.ts b/packages/server/src/services/splitter/splitter.types.ts index 1aeb157..0eaa334 100644 --- a/packages/server/src/services/splitter/splitter.types.ts +++ b/packages/server/src/services/splitter/splitter.types.ts @@ -1,5 +1,5 @@ -import type { Document } from '../documents/documents.schemas.ts'; -import type { Vector } from '../embeddings/embeddings.vector.ts'; +import type { Document } from '../documents/documents.schemas.js'; +import type { Vector } from '../embeddings/embeddings.vector.js'; type Chunk = { content: string; diff --git a/packages/server/src/services/splitter/splitters/splitters.text.ts b/packages/server/src/services/splitter/splitters/splitters.text.ts index 2b86854..b1841f9 100644 --- a/packages/server/src/services/splitter/splitters/splitters.text.ts +++ b/packages/server/src/services/splitter/splitters/splitters.text.ts @@ -1,6 +1,6 @@ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; -import type { Splitter } from '../splitter.types.ts'; +import type { Splitter } from '../splitter.types.js'; const textSplitter: Splitter = { match: (document) => !!document.content, diff --git a/packages/server/src/services/warmup/warmup.ts b/packages/server/src/services/warmup/warmup.ts index 4a55b44..4c5af93 100644 --- a/packages/server/src/services/warmup/warmup.ts +++ b/packages/server/src/services/warmup/warmup.ts @@ -1,6 +1,6 @@ -import { DatabaseService } from '../database/database.ts'; +import { DatabaseService } from '../database/database.js'; -import { Services } from '#root/utils/utils.services.ts'; +import { Services } from '#root/utils/utils.services.js'; class WarmupService { #services: Services; diff --git a/packages/server/src/utils/utils.event-emitter.ts b/packages/server/src/utils/utils.event-emitter.ts index e58953f..65af8dc 100644 --- a/packages/server/src/utils/utils.event-emitter.ts +++ b/packages/server/src/utils/utils.event-emitter.ts @@ -1,3 +1,5 @@ +import type { ExplicitAny } from '#root/global.js'; + type EventListener = (...args: T) => void | Promise; type OnOptions = { diff --git a/packages/server/src/utils/utils.query.ts b/packages/server/src/utils/utils.query.ts index 34905ac..b31b4af 100644 --- a/packages/server/src/utils/utils.query.ts +++ b/packages/server/src/utils/utils.query.ts @@ -1,6 +1,10 @@ +import type { + QueryCondition, + QueryConditionNumber, + QueryConditionText, + QueryFilter, +} from '@morten-olsen/stash-query-dsl'; import { type Knex } from 'knex'; -import { z } from 'zod'; - /** * Escapes a JSON key for use in PostgreSQL JSON operators. * Escapes single quotes by doubling them, which is the PostgreSQL standard. @@ -30,74 +34,6 @@ const getFieldSelector = (query: Knex.QueryBuilder, field: string[], tableName?: return query.client.raw(sqlExpression); }; -const queryConditionTextSchema = z - .object({ - type: z.literal('text'), - tableName: z.string().optional(), - field: z.array(z.string()), - conditions: z.object({ - equal: z.string().nullish(), - notEqual: z.string().optional(), - like: z.string().optional(), - notLike: z.string().optional(), - in: z.array(z.string()).optional(), - notIn: z.array(z.string()).optional(), - }), - }) - .meta({ - example: { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - examples: [ - { - summary: 'Equal condition', - value: { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - }, - { - summary: 'Like condition', - value: { - type: 'text', - field: ['content'], - conditions: { - like: '%cat%', - }, - }, - }, - { - summary: 'In condition', - value: { - type: 'text', - field: ['type'], - conditions: { - in: ['demo', 'article', 'post'], - }, - }, - }, - { - summary: 'Null check', - value: { - type: 'text', - field: ['source'], - conditions: { - equal: null, - }, - }, - }, - ], - }); - -type QueryConditionText = z.infer; - const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionText) => { const selector = getFieldSelector(query, field, tableName); if (conditions.equal) { @@ -127,77 +63,6 @@ const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, c return query; }; -const queryConditionNumberSchema = z - .object({ - type: z.literal('number'), - tableName: z.string().optional(), - field: z.array(z.string()), - conditions: z.object({ - equals: z.number().nullish(), - notEquals: z.number().nullish(), - greaterThan: z.number().optional(), - greaterThanOrEqual: z.number().optional(), - lessThan: z.number().optional(), - lessThanOrEqual: z.number().optional(), - in: z.array(z.number()).optional(), - notIn: z.array(z.number()).optional(), - }), - }) - .meta({ - example: { - type: 'number', - field: ['typeVersion'], - conditions: { - equals: 1, - }, - }, - examples: [ - { - summary: 'Equals condition', - value: { - type: 'number', - field: ['typeVersion'], - conditions: { - equals: 1, - }, - }, - }, - { - summary: 'Greater than condition', - value: { - type: 'number', - field: ['typeVersion'], - conditions: { - greaterThan: 0, - }, - }, - }, - { - summary: 'Range condition', - value: { - type: 'number', - field: ['typeVersion'], - conditions: { - greaterThanOrEqual: 1, - lessThanOrEqual: 10, - }, - }, - }, - { - summary: 'In condition', - value: { - type: 'number', - field: ['typeVersion'], - conditions: { - in: [1, 2, 3], - }, - }, - }, - ], - }); - -type QueryConditionNumber = z.infer; - const applyQueryConditionNumber = ( query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionNumber, @@ -236,10 +101,6 @@ const applyQueryConditionNumber = ( return query; }; -const queryConditionSchema = z.discriminatedUnion('type', [queryConditionTextSchema, queryConditionNumberSchema]); - -type QueryCondition = z.infer; - const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) => { switch (options.type) { case 'text': { @@ -254,254 +115,6 @@ const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) } }; -type QueryFilter = QueryCondition | QueryOperator; - -type QueryOperator = { - type: 'operator'; - operator: 'and' | 'or'; - conditions: QueryFilter[]; -}; - -// Create a depth-limited recursive schema for OpenAPI compatibility -// This supports up to 3 levels of nesting, which should be sufficient for most use cases -// OpenAPI cannot handle z.lazy(), so we manually define the nesting -// If you need deeper nesting, you can add more levels (Level3, Level4, etc.) -const queryFilterSchemaLevel0: z.ZodType = z.union([ - queryConditionSchema, - z - .object({ - type: z.literal('operator'), - operator: z.enum(['and', 'or']), - conditions: z.array(queryConditionSchema), - }) - .meta({ - example: { - type: 'operator', - operator: 'and', - conditions: [ - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - ], - }, - examples: [ - { - summary: 'AND operator', - value: { - type: 'operator', - operator: 'and', - conditions: [ - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - { - type: 'text', - field: ['type'], - conditions: { - equal: 'demo', - }, - }, - ], - }, - }, - { - summary: 'OR operator', - value: { - type: 'operator', - operator: 'or', - conditions: [ - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'baz', - }, - }, - ], - }, - }, - ], - }), -]); - -const queryFilterSchemaLevel1: z.ZodType = z.union([ - queryConditionSchema, - z - .object({ - type: z.literal('operator'), - operator: z.enum(['and', 'or']), - conditions: z.array(queryFilterSchemaLevel0), - }) - .meta({ - example: { - type: 'operator', - operator: 'or', - conditions: [ - { - type: 'operator', - operator: 'and', - conditions: [ - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - ], - }, - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'baz', - }, - }, - ], - }, - examples: [ - { - summary: 'Nested AND within OR', - value: { - type: 'operator', - operator: 'or', - conditions: [ - { - type: 'operator', - operator: 'and', - conditions: [ - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - { - type: 'text', - field: ['type'], - conditions: { - equal: 'demo', - }, - }, - ], - }, - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'baz', - }, - }, - ], - }, - }, - ], - }), -]); - -const queryFilterSchemaLevel2: z.ZodType = z.union([ - queryConditionSchema, - z - .object({ - type: z.literal('operator'), - operator: z.enum(['and', 'or']), - conditions: z.array(queryFilterSchemaLevel1), - }) - .meta({ - example: { - type: 'operator', - operator: 'and', - conditions: [ - { - type: 'operator', - operator: 'or', - conditions: [ - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'baz', - }, - }, - ], - }, - { - type: 'text', - field: ['type'], - conditions: { - equal: 'demo', - }, - }, - ], - }, - examples: [ - { - summary: 'Complex nested query', - value: { - type: 'operator', - operator: 'and', - conditions: [ - { - type: 'operator', - operator: 'or', - conditions: [ - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'bar', - }, - }, - { - type: 'text', - field: ['metadata', 'foo'], - conditions: { - equal: 'baz', - }, - }, - ], - }, - { - type: 'text', - field: ['type'], - conditions: { - equal: 'demo', - }, - }, - ], - }, - }, - ], - }), -]); - -// Export the depth-limited schema (supports 3 levels of nesting) -// This works with OpenAPI schema generation -const queryFilterSchema = queryFilterSchemaLevel2; - const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => { if (filter.type === 'operator') { if (filter.conditions.length === 0) { @@ -545,5 +158,4 @@ const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => { } }; -export type { QueryConditionText, QueryConditionNumber, QueryOperator, QueryCondition, QueryFilter }; -export { applyQueryCondition, queryConditionSchema, queryFilterSchema, applyQueryFilter }; +export { applyQueryCondition, applyQueryFilter }; diff --git a/packages/server/tsconfig.json b/packages/server/tsconfig.json index 846129e..b8403e6 100644 --- a/packages/server/tsconfig.json +++ b/packages/server/tsconfig.json @@ -1,6 +1,7 @@ { "compilerOptions": { "outDir": "./dist", + "rootDir": "./src", "paths": { "#root/*": [ "./src/*" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 292f53b..a51977e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -50,6 +50,55 @@ importers: packages/configs: {} + packages/query-dsl: + dependencies: + chevrotain: + specifier: ^11.0.3 + version: 11.0.3 + zod: + specifier: 4.1.13 + version: 4.1.13 + devDependencies: + '@morten-olsen/stash-configs': + specifier: workspace:* + version: link:../configs + '@morten-olsen/stash-tests': + specifier: workspace:* + version: link:../tests + '@types/node': + specifier: 24.10.2 + version: 24.10.2 + '@vitest/coverage-v8': + specifier: 4.0.15 + version: 4.0.15(vitest@4.0.15(@types/node@24.10.2)(tsx@4.21.0)(yaml@2.8.2)) + typescript: + specifier: 5.9.3 + version: 5.9.3 + vitest: + specifier: 4.0.15 + version: 4.0.15(@types/node@24.10.2)(tsx@4.21.0)(yaml@2.8.2) + + packages/runtime: + devDependencies: + '@morten-olsen/stash-configs': + specifier: workspace:* + version: link:../configs + '@morten-olsen/stash-tests': + specifier: workspace:* + version: link:../tests + '@types/node': + specifier: 24.10.2 + version: 24.10.2 + '@vitest/coverage-v8': + specifier: 4.0.15 + version: 4.0.15(vitest@4.0.15(@types/node@24.10.2)(tsx@4.21.0)(yaml@2.8.2)) + typescript: + specifier: 5.9.3 + version: 5.9.3 + vitest: + specifier: 4.0.15 + version: 4.0.15(@types/node@24.10.2)(tsx@4.21.0)(yaml@2.8.2) + packages/server: dependencies: '@electric-sql/pglite': @@ -70,6 +119,9 @@ importers: '@langchain/textsplitters': specifier: ^1.0.1 version: 1.0.1(@langchain/core@1.1.4) + '@morten-olsen/stash-query-dsl': + specifier: workspace:* + version: link:../query-dsl '@scalar/fastify-api-reference': specifier: 1.40.2 version: 1.40.2 @@ -181,6 +233,21 @@ packages: '@cfworker/json-schema@4.1.1': resolution: {integrity: sha512-gAmrUZSGtKc3AiBL71iNWxDsyUC5uMaKKGdvzYsBoTW/xi42JQHl7eKV2OYzCUqvc+D2RCcf7EXY2iCyFIk6og==} + '@chevrotain/cst-dts-gen@11.0.3': + resolution: {integrity: sha512-BvIKpRLeS/8UbfxXxgC33xOumsacaeCKAjAeLyOn7Pcp95HiRbrpl14S+9vaZLolnbssPIUuiUd8IvgkRyt6NQ==} + + '@chevrotain/gast@11.0.3': + resolution: {integrity: sha512-+qNfcoNk70PyS/uxmj3li5NiECO+2YKZZQMbmjTqRI3Qchu8Hig/Q9vgkHpI3alNjr7M+a2St5pw5w5F6NL5/Q==} + + '@chevrotain/regexp-to-ast@11.0.3': + resolution: {integrity: sha512-1fMHaBZxLFvWI067AVbGJav1eRY7N8DDvYCTwGBiE/ytKBgP8azTdgyrKyWZ9Mfh09eHWb5PgTSO8wi7U824RA==} + + '@chevrotain/types@11.0.3': + resolution: {integrity: sha512-gsiM3G8b58kZC2HaWR50gu6Y1440cHiJ+i3JUvcp/35JchYejb2+5MVeJK0iKThYpAa/P2PYFV4hoi44HD+aHQ==} + + '@chevrotain/utils@11.0.3': + resolution: {integrity: sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==} + '@electric-sql/pglite@0.3.14': resolution: {integrity: sha512-3DB258dhqdsArOI1fIt7cb9RpUOgcDg5hXWVgVHAeqVQ/qxtFy605QKs4gx6mFq3jWsSPqDN8TgSEsqC3OfV9Q==} @@ -1422,6 +1489,9 @@ packages: resolution: {integrity: sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==} engines: {node: '>=10'} + chevrotain@11.0.3: + resolution: {integrity: sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==} + chownr@1.1.4: resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} @@ -2288,6 +2358,9 @@ packages: resolution: {integrity: sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==} engines: {node: '>=10'} + lodash-es@4.17.21: + resolution: {integrity: sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==} + lodash.merge@4.6.2: resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==} @@ -3373,6 +3446,23 @@ snapshots: '@cfworker/json-schema@4.1.1': {} + '@chevrotain/cst-dts-gen@11.0.3': + dependencies: + '@chevrotain/gast': 11.0.3 + '@chevrotain/types': 11.0.3 + lodash-es: 4.17.21 + + '@chevrotain/gast@11.0.3': + dependencies: + '@chevrotain/types': 11.0.3 + lodash-es: 4.17.21 + + '@chevrotain/regexp-to-ast@11.0.3': {} + + '@chevrotain/types@11.0.3': {} + + '@chevrotain/utils@11.0.3': {} + '@electric-sql/pglite@0.3.14': {} '@emnapi/runtime@1.7.1': @@ -4580,6 +4670,15 @@ snapshots: char-regex@1.0.2: {} + chevrotain@11.0.3: + dependencies: + '@chevrotain/cst-dts-gen': 11.0.3 + '@chevrotain/gast': 11.0.3 + '@chevrotain/regexp-to-ast': 11.0.3 + '@chevrotain/types': 11.0.3 + '@chevrotain/utils': 11.0.3 + lodash-es: 4.17.21 + chownr@1.1.4: {} chownr@3.0.0: {} @@ -5567,6 +5666,8 @@ snapshots: dependencies: p-locate: 5.0.0 + lodash-es@4.17.21: {} + lodash.merge@4.6.2: {} lodash@4.17.21: {} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..19f2e3b --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,11 @@ +{ + "include": [], + "references": [ + { + "path": "./packages/query-dsl/tsconfig.json" + }, + { + "path": "./packages/server/tsconfig.json" + } + ] +}