This commit is contained in:
Morten Olsen
2025-12-10 09:11:03 +01:00
parent 9f9bc03d03
commit f9494c88e2
74 changed files with 2004 additions and 1035 deletions

View File

@@ -10,11 +10,9 @@
"resolveJsonModule": true,
"allowSyntheticDefaultImports": true,
"skipLibCheck": true,
"noEmit": true,
"jsx": "react-jsx",
"isolatedModules": true,
"verbatimModuleSyntax": true,
"erasableSyntaxOnly": true,
"allowImportingTsExtensions": true
"erasableSyntaxOnly": true
}
}

4
packages/query-dsl/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
/node_modules/
/dist/
/coverage/
/.env

View File

@@ -0,0 +1,33 @@
{
"type": "module",
"main": "dist/exports.js",
"scripts": {
"build": "tsc --build",
"test:unit": "vitest --run --passWithNoTests",
"test": "pnpm run \"/^test:/\""
},
"packageManager": "pnpm@10.6.0",
"files": [
"dist"
],
"exports": {
".": "./dist/exports.js"
},
"devDependencies": {
"@morten-olsen/stash-configs": "workspace:*",
"@morten-olsen/stash-tests": "workspace:*",
"@types/node": "24.10.2",
"@vitest/coverage-v8": "4.0.15",
"typescript": "5.9.3",
"vitest": "4.0.15"
},
"name": "@morten-olsen/stash-query-dsl",
"version": "1.0.0",
"imports": {
"#root/*": "./src/*"
},
"dependencies": {
"chevrotain": "^11.0.3",
"zod": "4.1.13"
}
}

View File

@@ -0,0 +1,2 @@
export * from './query-parser.schemas.js';
export { QueryParser } from './query-parser.js';

View File

@@ -0,0 +1,457 @@
import { createToken, Lexer, EmbeddedActionsParser } from 'chevrotain';
import type { QueryFilter, QueryCondition } from './query-parser.schemas.js';
// ----------------- Lexer -----------------
// Whitespace (skipped)
const WhiteSpace = createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED });
// Keywords (must be defined before Identifier to take precedence)
const And = createToken({ name: 'And', pattern: /AND/i, longer_alt: undefined });
const Or = createToken({ name: 'Or', pattern: /OR/i, longer_alt: undefined });
const Like = createToken({ name: 'Like', pattern: /LIKE/i, longer_alt: undefined });
const Not = createToken({ name: 'Not', pattern: /NOT/i, longer_alt: undefined });
const In = createToken({ name: 'In', pattern: /IN/i, longer_alt: undefined });
const Is = createToken({ name: 'Is', pattern: /IS/i, longer_alt: undefined });
const Null = createToken({ name: 'Null', pattern: /NULL/i, longer_alt: undefined });
// Identifier (must come after keywords)
const Identifier = createToken({ name: 'Identifier', pattern: /[a-zA-Z_][a-zA-Z0-9_]*/ });
// Set longer_alt for keywords to handle cases like "ANDROID" not matching "AND"
And.LONGER_ALT = Identifier;
Or.LONGER_ALT = Identifier;
Like.LONGER_ALT = Identifier;
Not.LONGER_ALT = Identifier;
In.LONGER_ALT = Identifier;
Is.LONGER_ALT = Identifier;
Null.LONGER_ALT = Identifier;
// Literals
const StringLiteral = createToken({
name: 'StringLiteral',
pattern: /'(?:''|[^'])*'/,
});
const NumberLiteral = createToken({
name: 'NumberLiteral',
pattern: /-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?/,
});
// Operators
const NotEquals = createToken({ name: 'NotEquals', pattern: /!=/ });
const GreaterThanOrEqual = createToken({ name: 'GreaterThanOrEqual', pattern: />=/ });
const LessThanOrEqual = createToken({ name: 'LessThanOrEqual', pattern: /<=/ });
const Equals = createToken({ name: 'Equals', pattern: /=/ });
const GreaterThan = createToken({ name: 'GreaterThan', pattern: />/ });
const LessThan = createToken({ name: 'LessThan', pattern: /</ });
// Punctuation
const LParen = createToken({ name: 'LParen', pattern: /\(/ });
const RParen = createToken({ name: 'RParen', pattern: /\)/ });
const Comma = createToken({ name: 'Comma', pattern: /,/ });
const Dot = createToken({ name: 'Dot', pattern: /\./ });
// Token order matters! More specific patterns first.
const allTokens = [
WhiteSpace,
// Multi-char operators first
NotEquals,
GreaterThanOrEqual,
LessThanOrEqual,
// Single-char operators
Equals,
GreaterThan,
LessThan,
// Punctuation
LParen,
RParen,
Comma,
Dot,
// Keywords (before Identifier)
And,
Or,
Like,
Not,
In,
Is,
Null,
// Literals
StringLiteral,
NumberLiteral,
// Identifier last
Identifier,
];
const QueryLexer = new Lexer(allTokens);
// ----------------- Parser -----------------
class QueryParserParser extends EmbeddedActionsParser {
constructor() {
super(allTokens);
this.performSelfAnalysis();
}
// OR has lowest precedence
#orExpression = this.RULE('orExpression', (): QueryFilter => {
let left = this.SUBRULE(this.#andExpression);
this.MANY(() => {
this.CONSUME(Or);
const right = this.SUBRULE2(this.#andExpression);
left = this.ACTION(() => this.#combineWithOperator(left, right, 'or'));
});
return left;
});
// AND has higher precedence than OR
#andExpression = this.RULE('andExpression', (): QueryFilter => {
let left = this.SUBRULE(this.#primaryExpression);
this.MANY(() => {
this.CONSUME(And);
const right = this.SUBRULE2(this.#primaryExpression);
left = this.ACTION(() => this.#combineWithOperator(left, right, 'and'));
});
return left;
});
// Primary: parenthesized expression or condition
#primaryExpression = this.RULE('primaryExpression', (): QueryFilter => {
return this.OR([
{
ALT: () => {
this.CONSUME(LParen);
const expr = this.SUBRULE(this.#orExpression);
this.CONSUME(RParen);
return expr;
},
},
{ ALT: () => this.SUBRULE(this.#condition) },
]);
});
// Condition: field followed by operator and value(s)
#condition = this.RULE('condition', (): QueryCondition => {
const field = this.SUBRULE(this.#fieldReference);
return this.OR([
// IS NULL / IS NOT NULL
{
ALT: () => {
this.CONSUME(Is);
const isNot = this.OPTION(() => this.CONSUME(Not)) !== undefined;
this.CONSUME(Null);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null },
}));
},
},
// NOT IN (strings) - LA(1)=NOT, LA(2)=IN, LA(3)=(, LA(4)=value
{
GATE: () => this.LA(4).tokenType === StringLiteral,
ALT: () => {
this.CONSUME2(Not);
this.CONSUME(In);
const values = this.SUBRULE(this.#stringInList);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { notIn: values },
}));
},
},
// NOT IN (numbers)
{
GATE: () => this.LA(4).tokenType === NumberLiteral,
ALT: () => {
this.CONSUME3(Not);
this.CONSUME2(In);
const values = this.SUBRULE(this.#numberInList);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { notIn: values },
}));
},
},
// NOT LIKE
{
ALT: () => {
this.CONSUME4(Not);
this.CONSUME(Like);
const pattern = this.CONSUME(StringLiteral);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { notLike: this.#extractStringValue(pattern.image) },
}));
},
},
// IN (strings) - LA(1)=IN, LA(2)=(, LA(3)=value
{
GATE: () => this.LA(3).tokenType === StringLiteral,
ALT: () => {
this.CONSUME3(In);
const values = this.SUBRULE2(this.#stringInList);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { in: values },
}));
},
},
// IN (numbers)
{
GATE: () => this.LA(3).tokenType === NumberLiteral,
ALT: () => {
this.CONSUME4(In);
const values = this.SUBRULE2(this.#numberInList);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { in: values },
}));
},
},
// LIKE
{
ALT: () => {
this.CONSUME2(Like);
const pattern = this.CONSUME2(StringLiteral);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { like: this.#extractStringValue(pattern.image) },
}));
},
},
// = string
{
GATE: () => this.LA(2).tokenType === StringLiteral,
ALT: () => {
this.CONSUME(Equals);
const token = this.CONSUME3(StringLiteral);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { equal: this.#extractStringValue(token.image) },
}));
},
},
// = number
{
GATE: () => this.LA(2).tokenType === NumberLiteral,
ALT: () => {
this.CONSUME2(Equals);
const token = this.CONSUME(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { equals: parseFloat(token.image) },
}));
},
},
// = NULL
{
ALT: () => {
this.CONSUME3(Equals);
this.CONSUME2(Null);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { equal: null },
}));
},
},
// != string
{
GATE: () => this.LA(2).tokenType === StringLiteral,
ALT: () => {
this.CONSUME(NotEquals);
const token = this.CONSUME4(StringLiteral);
return this.ACTION(() => ({
type: 'text' as const,
field,
conditions: { notEqual: this.#extractStringValue(token.image) },
}));
},
},
// != number
{
ALT: () => {
this.CONSUME2(NotEquals);
const token = this.CONSUME2(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { notEquals: parseFloat(token.image) },
}));
},
},
// > number
{
ALT: () => {
this.CONSUME(GreaterThan);
const token = this.CONSUME3(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { greaterThan: parseFloat(token.image) },
}));
},
},
// >= number
{
ALT: () => {
this.CONSUME(GreaterThanOrEqual);
const token = this.CONSUME4(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { greaterThanOrEqual: parseFloat(token.image) },
}));
},
},
// < number
{
ALT: () => {
this.CONSUME(LessThan);
const token = this.CONSUME5(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { lessThan: parseFloat(token.image) },
}));
},
},
// <= number
{
ALT: () => {
this.CONSUME(LessThanOrEqual);
const token = this.CONSUME6(NumberLiteral);
return this.ACTION(() => ({
type: 'number' as const,
field,
conditions: { lessThanOrEqual: parseFloat(token.image) },
}));
},
},
]);
});
// Field reference: identifier.identifier.identifier...
#fieldReference = this.RULE('fieldReference', (): string[] => {
const parts: string[] = [];
const first = this.CONSUME(Identifier);
this.ACTION(() => parts.push(first.image));
this.MANY(() => {
this.CONSUME(Dot);
const next = this.CONSUME2(Identifier);
this.ACTION(() => parts.push(next.image));
});
return parts;
});
// String IN list: ('val1', 'val2', ...)
#stringInList = this.RULE('stringInList', (): string[] => {
const values: string[] = [];
this.CONSUME(LParen);
const first = this.CONSUME(StringLiteral);
this.ACTION(() => values.push(this.#extractStringValue(first.image)));
this.MANY(() => {
this.CONSUME(Comma);
const next = this.CONSUME2(StringLiteral);
this.ACTION(() => values.push(this.#extractStringValue(next.image)));
});
this.CONSUME(RParen);
return values;
});
// Number IN list: (1, 2, 3, ...)
#numberInList = this.RULE('numberInList', (): number[] => {
const values: number[] = [];
this.CONSUME2(LParen);
const first = this.CONSUME(NumberLiteral);
this.ACTION(() => values.push(parseFloat(first.image)));
this.MANY(() => {
this.CONSUME2(Comma);
const next = this.CONSUME2(NumberLiteral);
this.ACTION(() => values.push(parseFloat(next.image)));
});
this.CONSUME2(RParen);
return values;
});
// Extract string value from quoted literal, handling escaped quotes
#extractStringValue(image: string): string {
// Remove surrounding quotes and unescape doubled quotes
return image.slice(1, -1).replace(/''/g, "'");
}
// Combine two filters with an operator, flattening if possible
#combineWithOperator(left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter {
if (left.type === 'operator' && left.operator === operator) {
return {
type: 'operator',
operator,
conditions: [...left.conditions, right],
};
}
return {
type: 'operator',
operator,
conditions: [left, right],
};
}
// Entry point
#query = this.RULE('query', (): QueryFilter => {
return this.SUBRULE(this.#orExpression);
});
public parse = (input: string): QueryFilter => {
const lexResult = QueryLexer.tokenize(input);
if (lexResult.errors.length > 0) {
const error = lexResult.errors[0];
// Check if this looks like an unterminated string (starts with ' but lexer failed)
if (error.message.includes("'") || input.slice(error.offset).startsWith("'")) {
// Count unescaped single quotes
const unescapedQuotes = input.replace(/''/g, '').match(/'/g);
if (unescapedQuotes && unescapedQuotes.length % 2 !== 0) {
throw new Error(`Unterminated string starting at position ${error.offset}`);
}
}
throw new Error(`Lexer error at position ${error.offset}: ${error.message}`);
}
this.input = lexResult.tokens;
const result = this.#query();
if (this.errors.length > 0) {
const error = this.errors[0];
throw new Error(`Parse error: ${error.message}`);
}
return result;
};
}
export { QueryParserParser, QueryLexer };

View File

@@ -0,0 +1,85 @@
import { z } from 'zod';
const queryConditionTextSchema = z.object({
type: z.literal('text'),
tableName: z.string().optional(),
field: z.array(z.string()),
conditions: z.object({
equal: z.string().nullish(),
notEqual: z.string().optional(),
like: z.string().optional(),
notLike: z.string().optional(),
in: z.array(z.string()).optional(),
notIn: z.array(z.string()).optional(),
}),
});
type QueryConditionText = z.infer<typeof queryConditionTextSchema>;
const queryConditionNumberSchema = z.object({
type: z.literal('number'),
tableName: z.string().optional(),
field: z.array(z.string()),
conditions: z.object({
equals: z.number().nullish(),
notEquals: z.number().nullish(),
greaterThan: z.number().optional(),
greaterThanOrEqual: z.number().optional(),
lessThan: z.number().optional(),
lessThanOrEqual: z.number().optional(),
in: z.array(z.number()).optional(),
notIn: z.array(z.number()).optional(),
}),
});
type QueryConditionNumber = z.infer<typeof queryConditionNumberSchema>;
const queryConditionSchema = z.discriminatedUnion('type', [queryConditionTextSchema, queryConditionNumberSchema]);
type QueryCondition = z.infer<typeof queryConditionSchema>;
type QueryFilter = QueryCondition | QueryOperator;
type QueryOperator = {
type: 'operator';
operator: 'and' | 'or';
conditions: QueryFilter[];
};
// Create a depth-limited recursive schema for OpenAPI compatibility
// This supports up to 3 levels of nesting, which should be sufficient for most use cases
// OpenAPI cannot handle z.lazy(), so we manually define the nesting
// If you need deeper nesting, you can add more levels (Level3, Level4, etc.)
const queryFilterSchemaLevel0: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryConditionSchema),
}),
]);
const queryFilterSchemaLevel1: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryFilterSchemaLevel0),
}),
]);
const queryFilterSchemaLevel2: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryFilterSchemaLevel1),
}),
]);
// Export the depth-limited schema (supports 3 levels of nesting)
// This works with OpenAPI schema generation
const queryFilterSchema = queryFilterSchemaLevel2;
export type { QueryConditionText, QueryConditionNumber, QueryOperator, QueryCondition, QueryFilter };
export { queryConditionSchema, queryFilterSchema };

View File

@@ -4,7 +4,7 @@ import type {
QueryCondition,
QueryConditionText,
QueryConditionNumber,
} from '#root/utils/utils.query.ts';
} from './query-parser.schemas.js';
class Stringifier {
#stringifyFilter = (filter: QueryFilter, needsParens: boolean): string => {

View File

@@ -1,8 +1,7 @@
import { describe, it, expect } from 'vitest';
import { QueryParser } from './query-parser.ts';
import type { QueryConditionNumber, QueryConditionText, QueryFilter, QueryOperator } from '#root/utils/utils.query.ts';
import { QueryParser } from './query-parser.js';
import type { QueryConditionNumber, QueryConditionText, QueryFilter, QueryOperator } from './query-parser.schemas.js';
describe('QueryParser', () => {
const parser = new QueryParser();

View File

@@ -0,0 +1,18 @@
import { Stringifier } from './query-parser.stringifier.js';
import { QueryParserParser } from './query-parser.parser.js';
import type { QueryFilter } from './query-parser.schemas.js';
class QueryParser {
#stringifier = new Stringifier();
#parser = new QueryParserParser();
public parse = (input: string): QueryFilter => {
return this.#parser.parse(input);
};
public stringify = (filter: QueryFilter): string => {
return this.#stringifier.stringify(filter);
};
}
export { QueryParser };

View File

@@ -0,0 +1,10 @@
{
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src"
},
"include": [
"src/**/*.ts"
],
"extends": "@morten-olsen/stash-configs/tsconfig.json"
}

View File

@@ -0,0 +1,12 @@
import { defineConfig } from 'vitest/config';
import { getAliases } from '@morten-olsen/stash-tests/vitest';
// eslint-disable-next-line import/no-default-export
export default defineConfig(async () => {
const aliases = await getAliases();
return {
resolve: {
alias: aliases,
},
};
});

4
packages/runtime/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
/node_modules/
/dist/
/coverage/
/.env

View File

@@ -0,0 +1,29 @@
{
"type": "module",
"main": "dist/exports.js",
"scripts": {
"build": "tsc --build",
"test:unit": "vitest --run --passWithNoTests",
"test": "pnpm run \"/^test:/\""
},
"packageManager": "pnpm@10.6.0",
"files": [
"dist"
],
"exports": {
".": "./dist/exports.js"
},
"devDependencies": {
"@types/node": "24.10.2",
"@vitest/coverage-v8": "4.0.15",
"typescript": "5.9.3",
"vitest": "4.0.15",
"@morten-olsen/stash-configs": "workspace:*",
"@morten-olsen/stash-tests": "workspace:*"
},
"name": "@morten-olsen/stash-runtime",
"version": "1.0.0",
"imports": {
"#root/*": "./src/*"
}
}

View File

@@ -0,0 +1 @@
export * from './api.js';

12
packages/runtime/src/global.d.ts vendored Normal file
View File

@@ -0,0 +1,12 @@
import 'fastify';
import type { Services } from './utils/utils.services.ts';
// eslint-disable-next-line
declare type ExplicitAny = any;
declare module 'fastify' {
// eslint-disable-next-line
export interface FastifyInstance {
services: Services;
}
}

View File

@@ -0,0 +1,14 @@
import { DocumentsService } from './services/documents/documents.js';
import { Services } from './utils/utils.services.js';
class StashRuntime {
#services: Services;
constructor(services: Services) { }
public get documents() {
return this.#services.get(DocumentsService);
}
}
export { StashRuntime };

View File

@@ -0,0 +1,54 @@
import knex, { type Knex } from 'knex';
import ClientPgLite from 'knex-pglite';
import { PGlite } from '@electric-sql/pglite';
import { vector } from '@electric-sql/pglite/vector';
import { migrationSource } from './migrations/migrations.js';
import { destroy, Services } from '#root/utils/utils.services.js';
class DatabaseService {
#services: Services;
#instance?: Promise<Knex>;
constructor(services: Services) {
this.#services = services;
}
#setup = async () => {
const pglite = new PGlite({
extensions: { vector },
});
const instance = knex({
client: ClientPgLite,
dialect: 'postgres',
connection: () => ({ pglite }) as object,
});
await instance.raw(`CREATE EXTENSION IF NOT EXISTS vector`);
await instance.migrate.latest({
migrationSource: migrationSource({ services: this.#services }),
});
return instance;
};
public getInstance = () => {
if (!this.#instance) {
this.#instance = this.#setup();
}
return this.#instance;
};
[destroy] = async () => {
if (!this.#instance) {
return;
}
const instance = await this.#instance;
await instance.destroy();
};
}
export { type TableRows, tableNames } from './migrations/migrations.js';
export { DatabaseService };

View File

@@ -0,0 +1,112 @@
import type { Migration } from './migrations.types.js';
import { EmbeddingsService } from '#root/services/embeddings/embeddings.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
const tableNames = {
documents: 'documents',
documentChunks: 'documentChunks',
relations: 'relations',
};
const init: Migration = {
name: 'init',
up: async ({ knex, services }) => {
const embedding = services.get(EmbeddingsService);
const embeddingField = await embedding.getFieldType(EMBEDDING_MODEL);
await knex.schema.createTable(tableNames.documents, (table) => {
table.uuid('id').primary();
table.uuid('owner').nullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.datetime('updatedAt').notNullable();
table.datetime('createdAt').notNullable();
table.datetime('deletedAt').nullable();
table.string('contentType').nullable();
table.text('content').nullable();
table.string('source').nullable();
table.string('sourceId').nullable();
table.string('type').notNullable();
table.integer('typeVersion').nullable();
table.text('searchText').nullable();
table.jsonb('metadata').nullable();
table.index(['source', 'sourceId']);
table.index(['owner']);
});
await knex.schema.createTable(tableNames.documentChunks, (table) => {
table.uuid('id').primary();
table.uuid('owner').nullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.text('content').notNullable();
table.specificType('embedding', embeddingField).notNullable();
table.string('embeddingModel').notNullable();
table.index(['owner']);
});
knex.raw(`create index on ${tableNames.documentChunks} using GIN ("embeddingg")`);
await knex.schema.createTable(tableNames.relations, (table) => {
table.uuid('from').notNullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.uuid('to').notNullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.string('type').nullable();
table.string('typeVersion').nullable();
table.datetime('updatedAt').notNullable();
table.datetime('createdAt').notNullable();
table.datetime('deletedAt').nullable();
table.jsonb('data');
table.primary(['from', 'to', 'type']);
table.index(['from']);
table.index(['to']);
});
},
down: async ({ knex }) => {
await knex.schema.dropTableIfExists(tableNames.relations);
await knex.schema.dropTableIfExists(tableNames.documentChunks);
await knex.schema.dropTableIfExists(tableNames.documents);
},
};
type DocumentRow = {
id: string;
owner: string | null;
updatedAt: Date;
createdAt: Date;
deletedAt: Date | null;
contentType: string | null;
content: string | null;
source: string | null;
sourceId: string | null;
type: string;
typeVersion: number | null;
searchText: string | null;
metadata: unknown;
};
type DocumentChunkRow = {
id: string;
owner: string;
content: string;
embedding: unknown;
embeddingModel: string;
};
type RelationRow = {
from: string;
to: string;
type: string;
typeVersion: string | null;
updatedAt: Date;
createdAt: Date;
deletedAt: Date | null;
data: unknown;
};
type TableRows = {
documents: DocumentRow;
documentChunks: DocumentChunkRow;
replations: RelationRow;
};
export type { TableRows };
export { tableNames, init };

View File

@@ -0,0 +1,25 @@
import type { Knex } from 'knex';
import type { Migration } from './migrations.types.js';
import { init } from './migrations.001-init.js';
import type { Services } from '#root/utils/utils.services.js';
const migrations = [init] satisfies Migration[];
type MigrationSourceOptions = {
services: Services;
};
const migrationSource = (options: MigrationSourceOptions): Knex.MigrationSource<Migration> => ({
getMigrationName: (migration) => migration.name,
getMigration: async (migration) => ({
name: migration.name,
up: (knex) => migration.up({ ...options, knex }),
down: (knex) => migration.down({ ...options, knex }),
}),
getMigrations: async () => migrations,
});
export { type TableRows, tableNames } from './migrations.001-init.js';
export { migrationSource };

View File

@@ -0,0 +1,16 @@
import type { Knex } from 'knex';
import type { Services } from '#root/utils/utils.services.js';
type MigrationOptions = {
knex: Knex;
services: Services;
};
type Migration = {
name: string;
up: (options: MigrationOptions) => Promise<void>;
down: (options: MigrationOptions) => Promise<void>;
};
export type { Migration };

View File

@@ -1,6 +1,6 @@
import type { TableRows } from '../database/database.ts';
import type { TableRows } from '../database/database.js';
import type { DocumentChunk } from './document-chunks.schemas.ts';
import type { DocumentChunk } from './document-chunks.schemas.js';
const mapFromDocumentChunkRow = (
row: TableRows['documentChunks'] & {

View File

@@ -0,0 +1,33 @@
import { z } from 'zod';
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
import { createListResultSchema } from '#root/utils/utils.schema.js';
const documentChunkSchema = z.object({
id: z.string(),
owner: z.string(),
content: z.string(),
metadata: z.unknown(),
});
type DocumentChunk = z.infer<typeof documentChunkSchema>;
const documentChunkFilterSchema = z.object({
limit: z.number().default(20),
offset: z.number().default(0),
semanticText: z.string().optional(),
conditions: z.union([queryFilterSchema, z.string()]).optional(),
});
type DocumentChunkFilter = z.infer<typeof documentChunkFilterSchema>;
const documentChunksFindResultSchema = createListResultSchema(
documentChunkSchema.extend({
distance: z.number().optional(),
}),
);
type DocumentChunksFindResult = z.infer<typeof documentChunksFindResultSchema>;
export type { DocumentChunk, DocumentChunkFilter, DocumentChunksFindResult };
export { documentChunkSchema, documentChunkFilterSchema, documentChunksFindResultSchema };

View File

@@ -0,0 +1,66 @@
import { QueryParser } from '@morten-olsen/stash-query-dsl';
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
import { EmbeddingsService } from '../embeddings/embeddings.js';
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.js';
import { mapFromDocumentChunkRow } from './document-chunks.mappings.js';
import type { Services } from '#root/utils/utils.services.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
import type { ExplicitAny } from '#root/global.js';
import { applyQueryFilter } from '#root/utils/utils.query.js';
const baseFields = [
`${tableNames.documentChunks}.*`,
`${tableNames.documents}.metadata`,
`${tableNames.documents}.createdAt`,
];
class DocumentChunksService {
#services: Services;
constructor(services: Services) {
this.#services = services;
}
public find = async (filter: DocumentChunkFilter): Promise<DocumentChunksFindResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
let query = db<TableRows['documentChunks']>(tableNames.documentChunks);
query.join(tableNames.documents, `${tableNames.documents}.id`, `${tableNames.documentChunks}.owner`);
if (filter.semanticText) {
const embedding = this.#services.get(EmbeddingsService);
const [vector] = await embedding.extract({
input: [filter.semanticText],
model: EMBEDDING_MODEL,
});
query = query.select(...baseFields, db.raw(`embedding <=> '${vector.toSql()}' as distance`));
query = query.where(`${tableNames.documentChunks}.embeddingModel`, EMBEDDING_MODEL);
query = query.orderBy('distance', 'asc');
} else {
query = query.select(baseFields);
query = query.orderBy('createdAt', 'desc');
}
if (filter.conditions) {
const parser = this.#services.get(QueryParser);
query = applyQueryFilter(
query,
typeof filter.conditions === 'string' ? parser.parse(filter.conditions) : filter.conditions,
);
}
query = query.limit(filter.limit).offset(filter.offset);
const items = await query;
return {
items: items.map(mapFromDocumentChunkRow as ExplicitAny),
};
};
}
export * from './document-chunks.schemas.js';
export { DocumentChunksService };

View File

@@ -0,0 +1,12 @@
import type { TableRows } from '../database/database.js';
import type { Document } from './documents.schemas.js';
const mapFromDocumentRow = (row: TableRows['documents']): Document => ({
...row,
createdAt: row.createdAt.toISOString(),
updatedAt: row.updatedAt.toISOString(),
deletedAt: row.deletedAt?.toISOString() || null,
});
export { mapFromDocumentRow };

View File

@@ -0,0 +1,80 @@
import { z } from 'zod';
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
import { createListResultSchema } from '#root/utils/utils.schema.js';
const documentSchema = z.object({
id: z.string(),
owner: z.string().nullable(),
createdAt: z.iso.datetime(),
updatedAt: z.iso.datetime(),
deletedAt: z.iso.datetime().nullable(),
contentType: z.string().nullable(),
content: z.string().nullable(),
source: z.string().nullable(),
sourceId: z.string().nullable(),
type: z.string(),
typeVersion: z.int().nullable(),
searchText: z.string().nullable(),
metadata: z.unknown(),
});
type Document = z.infer<typeof documentSchema>;
const documentUpsertSchema = z
.object({
id: z.string().nullish(),
owner: z.string().nullish(),
contentType: z.string().nullish(),
content: z.string().nullish(),
source: z.string().nullish(),
sourceId: z.string().nullish(),
type: z.string().optional(),
typeVersion: z.int().nullish(),
searchText: z.string().nullish(),
metadata: z.unknown().nullish(),
})
.meta({
example: {
content: 'the cat is yellow',
contentType: 'text/plain',
source: 'test',
sourceId: 'test',
type: 'raw',
metadata: {
foo: 'bar',
bar: 'baz',
},
},
});
type DocumentUpsert = z.infer<typeof documentUpsertSchema>;
const documentUpsertResultSchema = z.object({
action: z.enum(['inserted', 'updated', 'skipped']),
id: z.string(),
document: documentSchema,
});
type DocumentUpsertResult = z.infer<typeof documentUpsertResultSchema>;
const documentFilterSchema = z.object({
offset: z.number().default(0),
limit: z.number().default(20),
condition: z.union([queryFilterSchema, z.string()]),
});
type DocumentFilter = z.infer<typeof documentFilterSchema>;
const documentFindResultSchema = createListResultSchema(documentSchema);
type DocumentFindResult = z.infer<typeof documentFindResultSchema>;
export type { Document, DocumentUpsert, DocumentUpsertResult, DocumentFilter, DocumentFindResult };
export {
documentSchema,
documentUpsertSchema,
documentUpsertResultSchema,
documentFilterSchema,
documentFindResultSchema,
};

View File

@@ -0,0 +1,179 @@
import { QueryParser } from '@morten-olsen/stash-query-dsl';
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
import { SplittingService } from '../splitter/splitter.js';
import type {
Document,
DocumentFilter,
DocumentFindResult,
DocumentUpsert,
DocumentUpsertResult,
} from './documents.schemas.ts';
import { mapFromDocumentRow } from './documents.mapping.js';
import { EventEmitter } from '#root/utils/utils.event-emitter.js';
import type { Services } from '#root/utils/utils.services.js';
import { compareObjectKeys } from '#root/utils/utils.compare.js';
import { applyQueryFilter } from '#root/utils/utils.query.js';
type DocumentsServiceEvents = {
upserted: (document: Document) => void;
inserted: (document: Document) => void;
updated: (document: Document) => void;
};
class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
#services: Services;
constructor(services: Services) {
super();
this.#services = services;
}
public find = async (filter: DocumentFilter): Promise<DocumentFindResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
let query = db<TableRows['documents']>(tableNames.documents);
if (filter) {
const parser = this.#services.get(QueryParser);
query = applyQueryFilter(
query,
typeof filter.condition === 'string' ? parser.parse(filter.condition) : filter.condition,
);
}
query = query.limit(filter.limit).offset(filter.offset);
const items = await query;
return {
items: items.map(mapFromDocumentRow),
};
};
public get = async (id: string): Promise<Document> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
const [item] = await db<TableRows['documents']>(tableNames.documents).where('id', id).limit(1);
return mapFromDocumentRow(item);
};
public remove = async (id: string): Promise<void> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
await db<TableRows['documents']>(tableNames.documents).where('id', id).delete();
};
public upsert = async (document: DocumentUpsert): Promise<DocumentUpsertResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
const result = await db.transaction(async (trx) => {
let id = document.id || crypto.randomUUID();
if (document.source && document.sourceId) {
const [currentSourceDocument] = await trx<TableRows['documents']>(tableNames.documents)
.where('source', document.source)
.andWhere('sourceId', document.sourceId)
.limit(1);
if (currentSourceDocument) {
id = currentSourceDocument.id;
}
}
const now = new Date();
const [current] = await trx<TableRows['documents']>(tableNames.documents).where('id', id).limit(1);
if (current) {
if (
compareObjectKeys(current, document, [
'sourceId',
'source',
'content',
'contentType',
'searchText',
'type',
'typeVersion',
'metadata',
])
) {
return {
id,
action: 'skipped',
document: mapFromDocumentRow(current),
} as const;
}
await trx<TableRows['documents']>(tableNames.documents)
.update({
...document,
id,
updatedAt: now,
})
.where('id', id);
const resultDocument: Document = mapFromDocumentRow({
...current,
...document,
id,
});
this.emit('updated', resultDocument);
this.emit('upserted', resultDocument);
return {
id,
action: 'updated',
document: resultDocument,
} as const;
} else {
await trx<TableRows['documents']>(tableNames.documents).insert({
metadata: {},
type: 'raw',
...document,
id,
createdAt: now,
updatedAt: now,
});
const resultDocument: Document = mapFromDocumentRow({
type: 'raw',
owner: null,
contentType: null,
content: null,
source: null,
sourceId: null,
typeVersion: null,
searchText: null,
metadata: {},
...document,
deletedAt: null,
id,
createdAt: now,
updatedAt: now,
});
this.emit('inserted', resultDocument);
this.emit('upserted', resultDocument);
return {
id,
action: 'inserted',
document: resultDocument,
} as const;
}
});
if (result.action !== 'skipped') {
await db.transaction(async (trx) => {
await trx<TableRows['documentChunks']>(tableNames.documentChunks).delete().where('owner', result.id);
const splittingService = this.#services.get(SplittingService);
const chunks = await splittingService.chunk(result.document);
if (chunks.length > 0) {
await trx<TableRows['documentChunks']>(tableNames.documentChunks).insert(
chunks.map((chunk) => ({
id: crypto.randomUUID(),
owner: result.id,
content: chunk.content,
embedding: chunk.vector.toSql(),
embeddingModel: chunk.model,
})),
);
}
});
}
return result;
};
}
export * from './documents.schemas.js';
export { DocumentsService };

View File

@@ -0,0 +1,62 @@
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import { Vector } from './embeddings.vector.js';
import type { ExplicitAny } from '#root/global.js';
type ExtractOptions = {
input: string[];
model: string;
};
type Extractor = {
extractor: FeatureExtractionPipeline;
dimensions: number;
};
class EmbeddingsService {
#extractors = new Map<string, Promise<Extractor>>();
#setupExctractor = async (model: string) => {
const extractor = await pipeline('feature-extraction', model, {});
const { config } = extractor.model;
if (!('hidden_size' in config) || typeof config.hidden_size !== 'number') {
throw new Error('Invalid model configuration');
}
return {
extractor,
dimensions: config.hidden_size,
};
};
#getExtractor = async (name: string) => {
if (!this.#extractors.has(name)) {
this.#extractors.set(name, this.#setupExctractor(name));
}
const extractor = await this.#extractors.get(name);
if (!extractor) {
throw new Error('Extractor not found');
}
return extractor;
};
public extract = async (options: ExtractOptions) => {
const { input, model } = options;
const { extractor, dimensions } = await this.#getExtractor(model);
const output = await extractor(input, { pooling: 'cls' });
return output.tolist().map((v: ExplicitAny) => new Vector(v, dimensions));
};
public getDimensions = async (model: string) => {
const { dimensions } = await this.#getExtractor(model);
return dimensions;
};
public getFieldType = async (model: string) => {
const dimensions = await this.getDimensions(model);
return `vector(${dimensions})`;
};
}
export { EmbeddingsService, Vector };

View File

@@ -0,0 +1,37 @@
import { cos_sim } from '@huggingface/transformers';
import { toSql } from 'pgvector';
class Vector {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
#value: any;
#dimentions: number;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
constructor(value: any, dimentions: number) {
this.#value = value;
this.#dimentions = dimentions;
}
public get value() {
return this.#value;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
public set value(value: any) {
this.#value = value;
}
public get dimentions() {
return this.#dimentions;
}
public toSql = () => {
return toSql(this.#value);
};
public distanceTo = (other: Vector) => {
return cos_sim(this.#value, other.value);
};
}
export { Vector };

View File

@@ -0,0 +1,44 @@
import { EmbeddingsService } from '../embeddings/embeddings.js';
import type { Document } from '../documents/documents.schemas.js';
import type { Chunk, Splitter } from './splitter.types.js';
import { textSplitter } from './splitters/splitters.text.js';
import type { Services } from '#root/utils/utils.services.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
class SplittingService {
#services: Services;
#chunkers: Set<Splitter>;
constructor(services: Services) {
this.#services = services;
this.#chunkers = new Set();
this.addChunkers([textSplitter]);
}
public addChunkers = (splitter: Splitter[]) => {
this.#chunkers = this.#chunkers.union(new Set(splitter));
};
public chunk = async (input: Document): Promise<Chunk[]> => {
const splitter = this.#chunkers.values().find((splitter) => splitter.match(input));
if (!splitter) {
return [];
}
const chunks = await splitter.chunk(input);
const embeddingsService = this.#services.get(EmbeddingsService);
const vectors = await embeddingsService.extract({
input: chunks,
model: EMBEDDING_MODEL,
});
return chunks.map((content, index) => ({
content,
vector: vectors[index],
model: EMBEDDING_MODEL,
}));
};
}
export * from './splitter.types.js';
export { SplittingService };

View File

@@ -0,0 +1,15 @@
import type { Document } from '../documents/documents.schemas.js';
import type { Vector } from '../embeddings/embeddings.vector.js';
type Chunk = {
content: string;
vector: Vector;
model: string;
};
type Splitter = {
match: (document: Document) => boolean;
chunk: (document: Document) => Promise<string[]>;
};
export type { Chunk, Splitter };

View File

@@ -0,0 +1,17 @@
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import type { Splitter } from '../splitter.types.js';
const textSplitter: Splitter = {
match: (document) => !!document.content,
chunk: async (document) => {
if (!document.content) {
return [];
}
const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 });
const texts = await splitter.splitText(document.content);
return texts;
},
};
export { textSplitter };

View File

@@ -0,0 +1,17 @@
import { DatabaseService } from '../database/database.js';
import { Services } from '#root/utils/utils.services.js';
class WarmupService {
#services: Services;
constructor(services: Services) {
this.#services = services;
}
public ensure = async () => {
const databaseService = this.#services.get(DatabaseService);
await databaseService.getInstance();
};
}
export { WarmupService };

View File

@@ -0,0 +1,14 @@
import deepEqual from 'deep-equal';
const compareObjectKeys = <T extends Record<string, unknown>>(a: T, b: T, keys: (keyof T)[]) => {
for (const key of keys) {
const avalue = a[key];
const bvalue = b[key];
if (!deepEqual(avalue, bvalue)) {
return false;
}
}
return true;
};
export { compareObjectKeys };

View File

@@ -0,0 +1,3 @@
const EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2';
export { EMBEDDING_MODEL };

View File

@@ -0,0 +1,66 @@
import type { ExplicitAny } from '#root/global.js';
type EventListener<T extends unknown[]> = (...args: T) => void | Promise<void>;
type OnOptions = {
abortSignal?: AbortSignal;
};
class EventEmitter<T extends Record<string, (...args: ExplicitAny[]) => void | Promise<void>>> {
#listeners = new Map<keyof T, Set<EventListener<ExplicitAny>>>();
on = <K extends keyof T>(event: K, callback: EventListener<Parameters<T[K]>>, options: OnOptions = {}) => {
const { abortSignal } = options;
if (!this.#listeners.has(event)) {
this.#listeners.set(event, new Set());
}
const callbackClone = (...args: Parameters<T[K]>) => callback(...args);
const abortController = new AbortController();
const listeners = this.#listeners.get(event);
if (!listeners) {
throw new Error('Event registration failed');
}
abortSignal?.addEventListener('abort', abortController.abort);
listeners.add(callbackClone);
abortController.signal.addEventListener('abort', () => {
this.#listeners.set(event, listeners?.difference(new Set([callbackClone])));
});
return abortController.abort;
};
once = <K extends keyof T>(event: K, callback: EventListener<Parameters<T[K]>>, options: OnOptions = {}) => {
const abortController = new AbortController();
options.abortSignal?.addEventListener('abort', abortController.abort);
return this.on(
event,
async (...args) => {
abortController.abort();
await callback(...args);
},
{
...options,
abortSignal: abortController.signal,
},
);
};
emit = <K extends keyof T>(event: K, ...args: Parameters<T[K]>) => {
const listeners = this.#listeners.get(event);
if (!listeners) {
return;
}
for (const listener of listeners) {
listener(...args);
}
};
emitAsync = async <K extends keyof T>(event: K, ...args: Parameters<T[K]>) => {
const listeners = this.#listeners.get(event);
if (!listeners) {
return;
}
await Promise.all(listeners.values().map((listener) => listener(...args)));
};
}
export { EventEmitter };

View File

@@ -0,0 +1,161 @@
import type {
QueryCondition,
QueryConditionNumber,
QueryConditionText,
QueryFilter,
} from '@morten-olsen/stash-query-dsl';
import { type Knex } from 'knex';
/**
* Escapes a JSON key for use in PostgreSQL JSON operators.
* Escapes single quotes by doubling them, which is the PostgreSQL standard.
*/
const escapeJsonKey = (key: string): string => {
return key.replace(/'/g, "''");
};
const getFieldSelector = (query: Knex.QueryBuilder, field: string[], tableName?: string) => {
const baseColumn = field[0];
if (field.length === 1) {
return tableName ? `${tableName}.${baseColumn}` : baseColumn;
}
const baseFieldRef = tableName ? query.client.ref(baseColumn).withSchema(tableName) : query.client.ref(baseColumn);
const jsonPath = field.slice(1);
let sqlExpression = baseFieldRef.toString();
for (let i = 0; i < jsonPath.length - 1; i++) {
const escapedKey = escapeJsonKey(jsonPath[i]);
sqlExpression += ` -> '${escapedKey}'`;
}
const finalElement = jsonPath[jsonPath.length - 1];
const escapedFinalKey = escapeJsonKey(finalElement);
sqlExpression += ` ->> '${escapedFinalKey}'`;
return query.client.raw(sqlExpression);
};
const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionText) => {
const selector = getFieldSelector(query, field, tableName);
if (conditions.equal) {
query = query.where(selector, '=', conditions.equal);
}
if (conditions.notEqual) {
query = query.where(selector, '<>', conditions.notEqual);
}
if (conditions.like) {
query = query.whereLike(selector, conditions.like);
}
if (conditions.notLike) {
query = query.not.whereLike(selector, conditions.notLike);
}
if (conditions.equal === null) {
query = query.whereNull(selector);
}
if (conditions.notEqual === null) {
query = query.whereNotNull(selector);
}
if (conditions.in) {
query = query.whereIn(selector, conditions.in);
}
if (conditions.notIn) {
query = query.whereNotIn(selector, conditions.notIn);
}
return query;
};
const applyQueryConditionNumber = (
query: Knex.QueryBuilder,
{ field, tableName, conditions }: QueryConditionNumber,
) => {
const selector = getFieldSelector(query, field, tableName);
if (conditions.equals !== undefined && conditions.equals !== null) {
query = query.where(selector, '=', conditions.equals);
}
if (conditions.notEquals !== undefined && conditions.notEquals !== null) {
query = query.where(selector, '<>', conditions.notEquals);
}
if (conditions.equals === null) {
query = query.whereNull(selector);
}
if (conditions.notEquals === null) {
query = query.whereNotNull(selector);
}
if (conditions.greaterThan) {
query = query.where(selector, '>', conditions.greaterThan);
}
if (conditions.greaterThanOrEqual) {
query = query.where(selector, '>=', conditions.greaterThanOrEqual);
}
if (conditions.lessThan) {
query = query.where(selector, '<', conditions.lessThan);
}
if (conditions.lessThanOrEqual) {
query = query.where(selector, '<=', conditions.lessThanOrEqual);
}
if (conditions.in) {
query = query.whereIn(selector, conditions.in);
}
if (conditions.notIn) {
query = query.whereNotIn(selector, conditions.notIn);
}
return query;
};
const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) => {
switch (options.type) {
case 'text': {
return applyQueryConditionText(query, options);
}
case 'number': {
return applyQueryConditionNumber(query, options);
}
default: {
throw new Error(`Unknown filter type`);
}
}
};
const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
if (filter.type === 'operator') {
if (filter.conditions.length === 0) {
return query;
}
switch (filter.operator) {
case 'or': {
return query.where((subquery) => {
let isFirst = true;
for (const condition of filter.conditions) {
if (isFirst) {
applyQueryFilter(subquery, condition);
isFirst = false;
} else {
subquery.orWhere((subSubquery) => {
applyQueryFilter(subSubquery, condition);
});
}
}
});
}
case 'and': {
return query.where((subquery) => {
let isFirst = true;
for (const condition of filter.conditions) {
if (isFirst) {
applyQueryFilter(subquery, condition);
isFirst = false;
} else {
subquery.andWhere((subSubquery) => {
applyQueryFilter(subSubquery, condition);
});
}
}
});
}
}
} else {
return applyQueryCondition(query, filter);
}
};
export { applyQueryCondition, applyQueryFilter };

View File

@@ -0,0 +1,8 @@
import { z, type ZodType } from 'zod';
const createListResultSchema = <T extends ZodType>(schema: T) =>
z.object({
items: z.array(schema),
});
export { createListResultSchema };

View File

@@ -0,0 +1,51 @@
const destroy = Symbol('destroy');
const instanceKey = Symbol('instances');
type ServiceDependency<T> = new (services: Services) => T & {
[destroy]?: () => Promise<void> | void;
};
class Services {
[instanceKey]: Map<ServiceDependency<unknown>, unknown>;
constructor() {
this[instanceKey] = new Map();
}
public get = <T>(service: ServiceDependency<T>) => {
if (!this[instanceKey].has(service)) {
this[instanceKey].set(service, new service(this));
}
const instance = this[instanceKey].get(service);
if (!instance) {
throw new Error('Could not generate instance');
}
return instance as T;
};
public set = <T>(service: ServiceDependency<T>, instance: Partial<T>) => {
this[instanceKey].set(service, instance);
};
public clone = () => {
const services = new Services();
services[instanceKey] = Object.fromEntries(this[instanceKey].entries());
};
public destroy = async () => {
await Promise.all(
this[instanceKey].values().map(async (instance) => {
if (
typeof instance === 'object' &&
instance &&
destroy in instance &&
typeof instance[destroy] === 'function'
) {
await instance[destroy]();
}
}),
);
};
}
export { Services, destroy };

View File

@@ -0,0 +1,9 @@
{
"compilerOptions": {
"outDir": "./dist"
},
"include": [
"src/**/*.ts"
],
"extends": "@morten-olsen/stash-configs/tsconfig.json"
}

View File

@@ -0,0 +1,12 @@
import { defineConfig } from 'vitest/config';
import { getAliases } from '@morten-olsen/stash-tests/vitest';
// eslint-disable-next-line import/no-default-export
export default defineConfig(async () => {
const aliases = await getAliases();
return {
resolve: {
alias: aliases,
},
};
});

View File

@@ -37,6 +37,7 @@
"@fastify/websocket": "11.2.0",
"@huggingface/transformers": "^3.8.1",
"@langchain/textsplitters": "^1.0.1",
"@morten-olsen/stash-query-dsl": "workspace:*",
"@scalar/fastify-api-reference": "1.40.2",
"better-sqlite3": "^12.5.0",
"deep-equal": "^2.2.3",

View File

@@ -10,12 +10,12 @@ import {
type ZodTypeProvider,
} from 'fastify-type-provider-zod';
import { Services } from './utils/utils.services.ts';
import { systemEndpoints } from './endpoints/system/system.ts';
import { WarmupService } from './services/warmup/warmup.ts';
import { documentEndpoints } from './endpoints/documents/documents.ts';
import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.ts';
import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.ts';
import { Services } from './utils/utils.services.js';
import { systemEndpoints } from './endpoints/system/system.js';
import { WarmupService } from './services/warmup/warmup.js';
import { documentEndpoints } from './endpoints/documents/documents.js';
import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.js';
import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.js';
class BaseError extends Error {
public statusCode: number;

View File

@@ -1,6 +1,6 @@
import { createApi } from './api.js';
import { DocumentsService, type DocumentUpsert } from './services/documents/documents.ts';
import { Services } from './utils/utils.services.ts';
import { DocumentsService, type DocumentUpsert } from './services/documents/documents.js';
import { Services } from './utils/utils.services.js';
const services = new Services();
const server = await createApi(services);

View File

@@ -4,7 +4,7 @@ import {
documentChunkFilterSchema,
documentChunksFindResultSchema,
DocumentChunksService,
} from '#root/services/document-chunks/document-chunks.ts';
} from '#root/services/document-chunks/document-chunks.js';
const documentChunkFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({

View File

@@ -4,7 +4,7 @@ import {
documentFilterSchema,
documentFindResultSchema,
DocumentsService,
} from '#root/services/documents/documents.ts';
} from '#root/services/documents/documents.js';
const documentFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({

View File

@@ -4,7 +4,7 @@ import {
DocumentsService,
documentUpsertResultSchema,
documentUpsertSchema,
} from '#root/services/documents/documents.ts';
} from '#root/services/documents/documents.js';
const documentEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({

View File

@@ -1,7 +1,7 @@
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import { z } from 'zod';
import { DatabaseService } from '#root/services/database/database.ts';
import { DatabaseService } from '#root/services/database/database.js';
const systemEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({

View File

@@ -1,202 +0,0 @@
import type { Token } from './query-parser.types.ts';
class Lexer {
#input: string;
#position = 0;
#tokens: Token[] = [];
constructor(input: string) {
this.#input = input;
}
#skipWhitespace = (): void => {
while (this.#position < this.#input.length && /\s/.test(this.#input[this.#position])) {
this.#position++;
}
};
#nextToken = (): Token | null => {
const char = this.#input[this.#position];
const startPosition = this.#position;
// Single character tokens
if (char === '(') {
this.#position++;
return { type: 'LPAREN', value: '(', position: startPosition };
}
if (char === ')') {
this.#position++;
return { type: 'RPAREN', value: ')', position: startPosition };
}
if (char === ',') {
this.#position++;
return { type: 'COMMA', value: ',', position: startPosition };
}
if (char === '.') {
this.#position++;
return { type: 'DOT', value: '.', position: startPosition };
}
// Two-character operators
if (char === '!' && this.#input[this.#position + 1] === '=') {
this.#position += 2;
return { type: 'NOT_EQUALS', value: '!=', position: startPosition };
}
if (char === '>' && this.#input[this.#position + 1] === '=') {
this.#position += 2;
return { type: 'GREATER_THAN_OR_EQUAL', value: '>=', position: startPosition };
}
if (char === '<' && this.#input[this.#position + 1] === '=') {
this.#position += 2;
return { type: 'LESS_THAN_OR_EQUAL', value: '<=', position: startPosition };
}
// Single character operators
if (char === '=') {
this.#position++;
return { type: 'EQUALS', value: '=', position: startPosition };
}
if (char === '>') {
this.#position++;
return { type: 'GREATER_THAN', value: '>', position: startPosition };
}
if (char === '<') {
this.#position++;
return { type: 'LESS_THAN', value: '<', position: startPosition };
}
// String literal
if (char === "'") {
return this.#readString();
}
// Number
if (/[0-9]/.test(char) || (char === '-' && /[0-9]/.test(this.#input[this.#position + 1]))) {
return this.#readNumber();
}
// Identifier or keyword
if (/[a-zA-Z_]/.test(char)) {
return this.#readIdentifierOrKeyword();
}
throw new Error(`Unexpected character '${char}' at position ${this.#position}`);
};
#readString = (): Token => {
const startPosition = this.#position;
this.#position++; // Skip opening quote
let value = '';
while (this.#position < this.#input.length) {
const char = this.#input[this.#position];
if (char === "'") {
// Check for escaped quote
if (this.#input[this.#position + 1] === "'") {
value += "'";
this.#position += 2;
} else {
this.#position++; // Skip closing quote
return { type: 'STRING', value, position: startPosition };
}
} else {
value += char;
this.#position++;
}
}
throw new Error(`Unterminated string starting at position ${startPosition}`);
};
#readNumber = (): Token => {
const startPosition = this.#position;
let value = '';
// Optional minus sign
if (this.#input[this.#position] === '-') {
value += '-';
this.#position++;
}
// Integer part
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
value += this.#input[this.#position];
this.#position++;
}
// Decimal part
if (this.#input[this.#position] === '.' && /[0-9]/.test(this.#input[this.#position + 1])) {
value += '.';
this.#position++;
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
value += this.#input[this.#position];
this.#position++;
}
}
// Scientific notation
if (this.#input[this.#position] === 'e' || this.#input[this.#position] === 'E') {
value += this.#input[this.#position];
this.#position++;
if (this.#input[this.#position] === '+' || this.#input[this.#position] === '-') {
value += this.#input[this.#position];
this.#position++;
}
while (this.#position < this.#input.length && /[0-9]/.test(this.#input[this.#position])) {
value += this.#input[this.#position];
this.#position++;
}
}
return { type: 'NUMBER', value, position: startPosition };
};
#readIdentifierOrKeyword = (): Token => {
const startPosition = this.#position;
let value = '';
while (this.#position < this.#input.length && /[a-zA-Z0-9_]/.test(this.#input[this.#position])) {
value += this.#input[this.#position];
this.#position++;
}
const upperValue = value.toUpperCase();
// Keywords
switch (upperValue) {
case 'AND':
return { type: 'AND', value, position: startPosition };
case 'OR':
return { type: 'OR', value, position: startPosition };
case 'LIKE':
return { type: 'LIKE', value, position: startPosition };
case 'NOT':
return { type: 'NOT', value, position: startPosition };
case 'IN':
return { type: 'IN', value, position: startPosition };
case 'IS':
return { type: 'IS', value, position: startPosition };
case 'NULL':
return { type: 'NULL', value, position: startPosition };
default:
return { type: 'IDENTIFIER', value, position: startPosition };
}
};
public tokenize = (): Token[] => {
while (this.#position < this.#input.length) {
this.#skipWhitespace();
if (this.#position >= this.#input.length) break;
const token = this.#nextToken();
if (token) {
this.#tokens.push(token);
}
}
this.#tokens.push({ type: 'EOF', value: '', position: this.#position });
return this.#tokens;
};
}
export { Lexer };

View File

@@ -1,317 +0,0 @@
import { Lexer } from './query-parser.lexer.ts';
import type { Token, TokenType } from './query-parser.types.ts';
import type { QueryConditionText, QueryConditionNumber, QueryFilter, QueryCondition } from '#root/utils/utils.query.ts';
class Parser {
#tokens: Token[] = [];
#position = 0;
#current = (): Token => {
return this.#tokens[this.#position];
};
#advance = (): Token => {
const token = this.#current();
this.#position++;
return token;
};
#expect = (type: TokenType): Token => {
const token = this.#current();
if (token.type !== type) {
throw new Error(`Expected ${type} but got ${token.type} at position ${token.position}`);
}
return this.#advance();
};
#parseExpression = (): QueryFilter => {
return this.#parseOr();
};
#parseOr = (): QueryFilter => {
let left = this.#parseAnd();
while (this.#current().type === 'OR') {
this.#advance();
const right = this.#parseAnd();
left = this.#combineWithOperator(left, right, 'or');
}
return left;
};
#parseAnd = (): QueryFilter => {
let left = this.#parsePrimary();
while (this.#current().type === 'AND') {
this.#advance();
const right = this.#parsePrimary();
left = this.#combineWithOperator(left, right, 'and');
}
return left;
};
#combineWithOperator = (left: QueryFilter, right: QueryFilter, operator: 'and' | 'or'): QueryFilter => {
// If left is already an operator of the same type, add to its conditions
if (left.type === 'operator' && left.operator === operator) {
return {
type: 'operator',
operator,
conditions: [...left.conditions, right],
};
}
return {
type: 'operator',
operator,
conditions: [left, right],
};
};
#parsePrimary = (): QueryFilter => {
// Handle parenthesized expressions
if (this.#current().type === 'LPAREN') {
this.#advance();
const expr = this.#parseExpression();
this.#expect('RPAREN');
return expr;
}
// Must be a condition
return this.#parseCondition();
};
#parseCondition = (): QueryCondition => {
const field = this.#parseField();
const token = this.#current();
// IS NULL / IS NOT NULL
if (token.type === 'IS') {
this.#advance();
const isNot = this.#current().type === 'NOT';
if (isNot) {
this.#advance();
}
this.#expect('NULL');
// IS NULL / IS NOT NULL could be either text or number - default to text
return {
type: 'text',
field,
conditions: isNot ? { notEqual: undefined, equal: undefined } : { equal: null },
} satisfies QueryConditionText;
}
// NOT IN / NOT LIKE
if (token.type === 'NOT') {
this.#advance();
const nextToken = this.#current();
if (nextToken.type === 'IN') {
this.#advance();
return this.#parseInCondition(field, true);
}
if (nextToken.type === 'LIKE') {
this.#advance();
const pattern = this.#expect('STRING').value;
return {
type: 'text',
field,
conditions: { notLike: pattern },
};
}
throw new Error(`Expected IN or LIKE after NOT at position ${nextToken.position}`);
}
// IN
if (token.type === 'IN') {
this.#advance();
return this.#parseInCondition(field, false);
}
// LIKE
if (token.type === 'LIKE') {
this.#advance();
const pattern = this.#expect('STRING').value;
return {
type: 'text',
field,
conditions: { like: pattern },
};
}
// Comparison operators
if (token.type === 'EQUALS') {
this.#advance();
return this.#parseValueCondition(field, 'equals');
}
if (token.type === 'NOT_EQUALS') {
this.#advance();
return this.#parseValueCondition(field, 'notEquals');
}
if (token.type === 'GREATER_THAN') {
this.#advance();
const value = this.#parseNumber();
return {
type: 'number',
field,
conditions: { greaterThan: value },
};
}
if (token.type === 'GREATER_THAN_OR_EQUAL') {
this.#advance();
const value = this.#parseNumber();
return {
type: 'number',
field,
conditions: { greaterThanOrEqual: value },
};
}
if (token.type === 'LESS_THAN') {
this.#advance();
const value = this.#parseNumber();
return {
type: 'number',
field,
conditions: { lessThan: value },
};
}
if (token.type === 'LESS_THAN_OR_EQUAL') {
this.#advance();
const value = this.#parseNumber();
return {
type: 'number',
field,
conditions: { lessThanOrEqual: value },
};
}
throw new Error(`Unexpected token '${token.value}' at position ${token.position}`);
};
#parseField = (): string[] => {
const parts: string[] = [];
parts.push(this.#expect('IDENTIFIER').value);
while (this.#current().type === 'DOT') {
this.#advance();
parts.push(this.#expect('IDENTIFIER').value);
}
return parts;
};
#parseValueCondition = (field: string[], operator: 'equals' | 'notEquals'): QueryCondition => {
const token = this.#current();
if (token.type === 'STRING') {
this.#advance();
const textCondition: QueryConditionText = {
type: 'text',
field,
conditions: operator === 'equals' ? { equal: token.value } : { notEqual: token.value },
};
return textCondition;
}
if (token.type === 'NUMBER') {
this.#advance();
const value = parseFloat(token.value);
const numCondition: QueryConditionNumber = {
type: 'number',
field,
conditions: operator === 'equals' ? { equals: value } : { notEquals: value },
};
return numCondition;
}
if (token.type === 'NULL') {
this.#advance();
// NULL equality - default to text type
return {
type: 'text',
field,
conditions: operator === 'equals' ? { equal: null } : {},
} as QueryConditionText;
}
throw new Error(`Expected value but got ${token.type} at position ${token.position}`);
};
#parseNumber = (): number => {
const token = this.#expect('NUMBER');
return parseFloat(token.value);
};
#parseInCondition = (field: string[], isNot: boolean): QueryCondition => {
this.#expect('LPAREN');
const firstToken = this.#current();
if (firstToken.type === 'STRING') {
// Text IN
const values: string[] = [];
values.push(this.#advance().value);
while (this.#current().type === 'COMMA') {
this.#advance();
values.push(this.#expect('STRING').value);
}
this.#expect('RPAREN');
return {
type: 'text',
field,
conditions: isNot ? { notIn: values } : { in: values },
};
}
if (firstToken.type === 'NUMBER') {
// Numeric IN
const values: number[] = [];
values.push(parseFloat(this.#advance().value));
while (this.#current().type === 'COMMA') {
this.#advance();
values.push(parseFloat(this.#expect('NUMBER').value));
}
this.#expect('RPAREN');
return {
type: 'number',
field,
conditions: isNot ? { notIn: values } : { in: values },
};
}
throw new Error(`Expected STRING or NUMBER in IN list at position ${firstToken.position}`);
};
public parse(input: string): QueryFilter {
const lexer = new Lexer(input);
this.#tokens = lexer.tokenize();
this.#position = 0;
const result = this.#parseExpression();
if (this.#current().type !== 'EOF') {
throw new Error(`Unexpected token '${this.#current().value}' at position ${this.#current().position}`);
}
return result;
}
}
export { Parser };

View File

@@ -1,19 +0,0 @@
import { Stringifier } from './query-parser.stringifier.ts';
import { Parser } from './query-parser.parser.ts';
import type { QueryFilter } from '#root/utils/utils.query.ts';
class QueryParser {
private parser = new Parser();
private stringifier = new Stringifier();
public parse = (input: string): QueryFilter => {
return this.parser.parse(input);
};
public stringify = (filter: QueryFilter): string => {
return this.stringifier.stringify(filter);
};
}
export { QueryParser };

View File

@@ -1,30 +0,0 @@
type TokenType =
| 'IDENTIFIER'
| 'STRING'
| 'NUMBER'
| 'AND'
| 'OR'
| 'LIKE'
| 'NOT'
| 'IN'
| 'IS'
| 'NULL'
| 'EQUALS'
| 'NOT_EQUALS'
| 'GREATER_THAN'
| 'GREATER_THAN_OR_EQUAL'
| 'LESS_THAN'
| 'LESS_THAN_OR_EQUAL'
| 'LPAREN'
| 'RPAREN'
| 'COMMA'
| 'DOT'
| 'EOF';
type Token = {
type: TokenType;
value: string;
position: number;
};
export type { TokenType, Token };

View File

@@ -3,9 +3,9 @@ import ClientPgLite from 'knex-pglite';
import { PGlite } from '@electric-sql/pglite';
import { vector } from '@electric-sql/pglite/vector';
import { migrationSource } from './migrations/migrations.ts';
import { migrationSource } from './migrations/migrations.js';
import { destroy, Services } from '#root/utils/utils.services.ts';
import { destroy, Services } from '#root/utils/utils.services.js';
class DatabaseService {
#services: Services;
@@ -50,5 +50,5 @@ class DatabaseService {
};
}
export { type TableRows, tableNames } from './migrations/migrations.ts';
export { type TableRows, tableNames } from './migrations/migrations.js';
export { DatabaseService };

View File

@@ -1,7 +1,7 @@
import type { Migration } from './migrations.types.ts';
import type { Migration } from './migrations.types.js';
import { EmbeddingsService } from '#root/services/embeddings/embeddings.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
import { EmbeddingsService } from '#root/services/embeddings/embeddings.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
const tableNames = {
documents: 'documents',

View File

@@ -1,9 +1,9 @@
import type { Knex } from 'knex';
import type { Migration } from './migrations.types.ts';
import { init } from './migrations.001-init.ts';
import type { Migration } from './migrations.types.js';
import { init } from './migrations.001-init.js';
import type { Services } from '#root/utils/utils.services.ts';
import type { Services } from '#root/utils/utils.services.js';
const migrations = [init] satisfies Migration[];
@@ -21,5 +21,5 @@ const migrationSource = (options: MigrationSourceOptions): Knex.MigrationSource<
getMigrations: async () => migrations,
});
export { type TableRows, tableNames } from './migrations.001-init.ts';
export { type TableRows, tableNames } from './migrations.001-init.js';
export { migrationSource };

View File

@@ -1,6 +1,6 @@
import type { Knex } from 'knex';
import type { Services } from '#root/utils/utils.services.ts';
import type { Services } from '#root/utils/utils.services.js';
type MigrationOptions = {
knex: Knex;

View File

@@ -0,0 +1,13 @@
import type { TableRows } from '../database/database.js';
import type { DocumentChunk } from './document-chunks.schemas.js';
const mapFromDocumentChunkRow = (
row: TableRows['documentChunks'] & {
metadata: unknown;
},
): DocumentChunk => ({
...row,
});
export { mapFromDocumentChunkRow };

View File

@@ -1,7 +1,7 @@
import { z } from 'zod';
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
import { createListResultSchema } from '#root/utils/utils.schema.ts';
import { queryFilterSchema } from '#root/utils/utils.query.ts';
import { createListResultSchema } from '#root/utils/utils.schema.js';
const documentChunkSchema = z.object({
id: z.string(),

View File

@@ -1,14 +1,15 @@
import { DatabaseService, tableNames, type TableRows } from '../database/database.ts';
import { EmbeddingsService } from '../embeddings/embeddings.ts';
import { QueryParser } from '@morten-olsen/stash-query-dsl';
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.ts';
import { mapFromDocumentChunkRow } from './document.mappings.ts';
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
import { EmbeddingsService } from '../embeddings/embeddings.js';
import type { Services } from '#root/utils/utils.services.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.js';
import { mapFromDocumentChunkRow } from './document-chunks.mappings.js';
import type { Services } from '#root/utils/utils.services.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
import type { ExplicitAny } from '#root/global.js';
import { applyQueryFilter } from '#root/utils/utils.query.ts';
import { QueryParser } from '#root/query-parser/query-parser.ts';
import { applyQueryFilter } from '#root/utils/utils.query.js';
const baseFields = [
`${tableNames.documentChunks}.*`,
@@ -61,5 +62,5 @@ class DocumentChunksService {
};
}
export * from './document-chunks.schemas.ts';
export * from './document-chunks.schemas.js';
export { DocumentChunksService };

View File

@@ -1,6 +1,6 @@
import type { TableRows } from '../database/database.ts';
import type { TableRows } from '../database/database.js';
import type { Document } from './documents.schemas.ts';
import type { Document } from './documents.schemas.js';
const mapFromDocumentRow = (row: TableRows['documents']): Document => ({
...row,

View File

@@ -1,7 +1,7 @@
import { z } from 'zod';
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
import { createListResultSchema } from '#root/utils/utils.schema.ts';
import { queryFilterSchema } from '#root/utils/utils.query.ts';
import { createListResultSchema } from '#root/utils/utils.schema.js';
const documentSchema = z.object({
id: z.string(),

View File

@@ -1,5 +1,7 @@
import { DatabaseService, tableNames, type TableRows } from '../database/database.ts';
import { SplittingService } from '../splitter/splitter.ts';
import { QueryParser } from '@morten-olsen/stash-query-dsl';
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
import { SplittingService } from '../splitter/splitter.js';
import type {
Document,
@@ -8,13 +10,12 @@ import type {
DocumentUpsert,
DocumentUpsertResult,
} from './documents.schemas.ts';
import { mapFromDocumentRow } from './documents.mapping.ts';
import { mapFromDocumentRow } from './documents.mapping.js';
import { EventEmitter } from '#root/utils/utils.event-emitter.ts';
import type { Services } from '#root/utils/utils.services.ts';
import { compareObjectKeys } from '#root/utils/utils.compare.ts';
import { applyQueryFilter } from '#root/utils/utils.query.ts';
import { QueryParser } from '#root/query-parser/query-parser.ts';
import { EventEmitter } from '#root/utils/utils.event-emitter.js';
import type { Services } from '#root/utils/utils.services.js';
import { compareObjectKeys } from '#root/utils/utils.compare.js';
import { applyQueryFilter } from '#root/utils/utils.query.js';
type DocumentsServiceEvents = {
upserted: (document: Document) => void;
@@ -174,5 +175,5 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
};
}
export * from './documents.schemas.ts';
export * from './documents.schemas.js';
export { DocumentsService };

View File

@@ -1,6 +1,8 @@
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import { Vector } from './embeddings.vector.ts';
import { Vector } from './embeddings.vector.js';
import type { ExplicitAny } from '#root/global.js';
type ExtractOptions = {
input: string[];
@@ -57,4 +59,4 @@ class EmbeddingsService {
};
}
export { EmbeddingsService };
export { EmbeddingsService, Vector };

View File

@@ -1,11 +1,11 @@
import { EmbeddingsService } from '../embeddings/embeddings.ts';
import type { Document } from '../documents/documents.schemas.ts';
import { EmbeddingsService } from '../embeddings/embeddings.js';
import type { Document } from '../documents/documents.schemas.js';
import type { Chunk, Splitter } from './splitter.types.ts';
import { textSplitter } from './splitters/splitters.text.ts';
import type { Chunk, Splitter } from './splitter.types.js';
import { textSplitter } from './splitters/splitters.text.js';
import type { Services } from '#root/utils/utils.services.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
import type { Services } from '#root/utils/utils.services.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
class SplittingService {
#services: Services;
@@ -40,5 +40,5 @@ class SplittingService {
};
}
export * from './splitter.types.ts';
export * from './splitter.types.js';
export { SplittingService };

View File

@@ -1,5 +1,5 @@
import type { Document } from '../documents/documents.schemas.ts';
import type { Vector } from '../embeddings/embeddings.vector.ts';
import type { Document } from '../documents/documents.schemas.js';
import type { Vector } from '../embeddings/embeddings.vector.js';
type Chunk = {
content: string;

View File

@@ -1,6 +1,6 @@
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import type { Splitter } from '../splitter.types.ts';
import type { Splitter } from '../splitter.types.js';
const textSplitter: Splitter = {
match: (document) => !!document.content,

View File

@@ -1,6 +1,6 @@
import { DatabaseService } from '../database/database.ts';
import { DatabaseService } from '../database/database.js';
import { Services } from '#root/utils/utils.services.ts';
import { Services } from '#root/utils/utils.services.js';
class WarmupService {
#services: Services;

View File

@@ -1,3 +1,5 @@
import type { ExplicitAny } from '#root/global.js';
type EventListener<T extends unknown[]> = (...args: T) => void | Promise<void>;
type OnOptions = {

View File

@@ -1,6 +1,10 @@
import type {
QueryCondition,
QueryConditionNumber,
QueryConditionText,
QueryFilter,
} from '@morten-olsen/stash-query-dsl';
import { type Knex } from 'knex';
import { z } from 'zod';
/**
* Escapes a JSON key for use in PostgreSQL JSON operators.
* Escapes single quotes by doubling them, which is the PostgreSQL standard.
@@ -30,74 +34,6 @@ const getFieldSelector = (query: Knex.QueryBuilder, field: string[], tableName?:
return query.client.raw(sqlExpression);
};
const queryConditionTextSchema = z
.object({
type: z.literal('text'),
tableName: z.string().optional(),
field: z.array(z.string()),
conditions: z.object({
equal: z.string().nullish(),
notEqual: z.string().optional(),
like: z.string().optional(),
notLike: z.string().optional(),
in: z.array(z.string()).optional(),
notIn: z.array(z.string()).optional(),
}),
})
.meta({
example: {
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
examples: [
{
summary: 'Equal condition',
value: {
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
},
{
summary: 'Like condition',
value: {
type: 'text',
field: ['content'],
conditions: {
like: '%cat%',
},
},
},
{
summary: 'In condition',
value: {
type: 'text',
field: ['type'],
conditions: {
in: ['demo', 'article', 'post'],
},
},
},
{
summary: 'Null check',
value: {
type: 'text',
field: ['source'],
conditions: {
equal: null,
},
},
},
],
});
type QueryConditionText = z.infer<typeof queryConditionTextSchema>;
const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionText) => {
const selector = getFieldSelector(query, field, tableName);
if (conditions.equal) {
@@ -127,77 +63,6 @@ const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, c
return query;
};
const queryConditionNumberSchema = z
.object({
type: z.literal('number'),
tableName: z.string().optional(),
field: z.array(z.string()),
conditions: z.object({
equals: z.number().nullish(),
notEquals: z.number().nullish(),
greaterThan: z.number().optional(),
greaterThanOrEqual: z.number().optional(),
lessThan: z.number().optional(),
lessThanOrEqual: z.number().optional(),
in: z.array(z.number()).optional(),
notIn: z.array(z.number()).optional(),
}),
})
.meta({
example: {
type: 'number',
field: ['typeVersion'],
conditions: {
equals: 1,
},
},
examples: [
{
summary: 'Equals condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
equals: 1,
},
},
},
{
summary: 'Greater than condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
greaterThan: 0,
},
},
},
{
summary: 'Range condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
greaterThanOrEqual: 1,
lessThanOrEqual: 10,
},
},
},
{
summary: 'In condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
in: [1, 2, 3],
},
},
},
],
});
type QueryConditionNumber = z.infer<typeof queryConditionNumberSchema>;
const applyQueryConditionNumber = (
query: Knex.QueryBuilder,
{ field, tableName, conditions }: QueryConditionNumber,
@@ -236,10 +101,6 @@ const applyQueryConditionNumber = (
return query;
};
const queryConditionSchema = z.discriminatedUnion('type', [queryConditionTextSchema, queryConditionNumberSchema]);
type QueryCondition = z.infer<typeof queryConditionSchema>;
const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) => {
switch (options.type) {
case 'text': {
@@ -254,254 +115,6 @@ const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition)
}
};
type QueryFilter = QueryCondition | QueryOperator;
type QueryOperator = {
type: 'operator';
operator: 'and' | 'or';
conditions: QueryFilter[];
};
// Create a depth-limited recursive schema for OpenAPI compatibility
// This supports up to 3 levels of nesting, which should be sufficient for most use cases
// OpenAPI cannot handle z.lazy(), so we manually define the nesting
// If you need deeper nesting, you can add more levels (Level3, Level4, etc.)
const queryFilterSchemaLevel0: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryConditionSchema),
})
.meta({
example: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
],
},
examples: [
{
summary: 'AND operator',
value: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
},
{
summary: 'OR operator',
value: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
},
],
}),
]);
const queryFilterSchemaLevel1: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryFilterSchemaLevel0),
})
.meta({
example: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
],
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
examples: [
{
summary: 'Nested AND within OR',
value: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
},
],
}),
]);
const queryFilterSchemaLevel2: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryFilterSchemaLevel1),
})
.meta({
example: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
examples: [
{
summary: 'Complex nested query',
value: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
},
],
}),
]);
// Export the depth-limited schema (supports 3 levels of nesting)
// This works with OpenAPI schema generation
const queryFilterSchema = queryFilterSchemaLevel2;
const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
if (filter.type === 'operator') {
if (filter.conditions.length === 0) {
@@ -545,5 +158,4 @@ const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
}
};
export type { QueryConditionText, QueryConditionNumber, QueryOperator, QueryCondition, QueryFilter };
export { applyQueryCondition, queryConditionSchema, queryFilterSchema, applyQueryFilter };
export { applyQueryCondition, applyQueryFilter };

View File

@@ -1,6 +1,7 @@
{
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src",
"paths": {
"#root/*": [
"./src/*"