This commit is contained in:
Morten Olsen
2025-12-09 20:32:08 +01:00
commit 2cfd54c344
54 changed files with 8794 additions and 0 deletions

4
packages/server/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
/node_modules/
/dist/
/coverage/
/.env

6
packages/server/.u8.json Normal file
View File

@@ -0,0 +1,6 @@
{
"values": {
"monoRepo": false
},
"entries": []
}

View File

@@ -0,0 +1,52 @@
{
"type": "module",
"main": "dist/exports.js",
"scripts": {
"dev": "tsx --watch src/dev.ts",
"start": "node dist/start.js",
"build": "tsc --build",
"test:unit": "vitest --run --passWithNoTests",
"test": "pnpm run \"/^test:/\""
},
"packageManager": "pnpm@10.6.0",
"files": [
"dist"
],
"exports": {
".": "./dist/exports.js"
},
"devDependencies": {
"@morten-olsen/stash-configs": "workspace:*",
"@morten-olsen/stash-tests": "workspace:*",
"@types/deep-equal": "^1.0.4",
"@types/node": "24.10.2",
"@vitest/coverage-v8": "4.0.15",
"tsx": "4.21.0",
"typescript": "5.9.3",
"vitest": "4.0.15"
},
"name": "@morten-olsen/stash-server",
"version": "1.0.0",
"imports": {
"#root/*": "./src/*"
},
"dependencies": {
"@electric-sql/pglite": "^0.3.14",
"@fastify/cors": "11.1.0",
"@fastify/swagger": "9.6.1",
"@fastify/websocket": "11.2.0",
"@huggingface/transformers": "^3.8.1",
"@langchain/textsplitters": "^1.0.1",
"@scalar/fastify-api-reference": "1.40.2",
"better-sqlite3": "^12.5.0",
"deep-equal": "^2.2.3",
"fastify": "5.6.2",
"fastify-type-provider-zod": "6.1.0",
"knex": "^3.1.0",
"knex-pglite": "^0.13.0",
"pg": "^8.16.3",
"pgvector": "^0.2.1",
"zod": "4.1.13",
"zod-to-json-schema": "3.25.0"
}
}

109
packages/server/src/api.ts Normal file
View File

@@ -0,0 +1,109 @@
import fastifyCors from '@fastify/cors';
import fastifySwagger from '@fastify/swagger';
import fastify from 'fastify';
import {
hasZodFastifySchemaValidationErrors,
isResponseSerializationError,
jsonSchemaTransform,
serializerCompiler,
validatorCompiler,
type ZodTypeProvider,
} from 'fastify-type-provider-zod';
import { Services } from './utils/utils.services.ts';
import { systemEndpoints } from './endpoints/system/system.ts';
import { WarmupService } from './services/warmup/warmup.ts';
import { documentEndpoints } from './endpoints/documents/documents.ts';
import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.ts';
import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.ts';
class BaseError extends Error {
public statusCode: number;
constructor(message: string, statusCode = 500) {
super(message);
this.name = this.constructor.name;
this.statusCode = statusCode;
}
}
const createApi = async (services: Services = new Services()) => {
const app = fastify().withTypeProvider<ZodTypeProvider>();
app.setValidatorCompiler(validatorCompiler);
app.setSerializerCompiler(serializerCompiler);
app.decorate('services', services);
app.register(fastifyCors);
app.register(fastifySwagger, {
openapi: {
info: {
title: 'My API',
version: '1.0.0',
},
},
transform: jsonSchemaTransform,
});
await app.register(import('@scalar/fastify-api-reference'), {
routePrefix: '/docs',
});
app.setErrorHandler((err, req, reply) => {
console.error(err);
if (hasZodFastifySchemaValidationErrors(err)) {
return reply.code(400).send({
error: 'Response Validation Error',
message: "Request doesn't match the schema",
statusCode: 400,
details: {
issues: err.validation,
method: req.method,
url: req.url,
},
});
}
if (isResponseSerializationError(err)) {
return reply.code(500).send({
error: 'Internal Server Error',
message: "Response doesn't match the schema",
statusCode: 500,
details: {
issues: err.cause.issues,
method: err.method,
url: err.url,
},
});
}
if (err instanceof BaseError) {
return reply.code(err.statusCode ?? 500).send({
error: err.name,
message: err.message,
statusCode: err.statusCode,
});
}
return reply.code(500).send({
error: 'Internal Server Error',
message: err instanceof Error ? err.message : 'An unknown error occurred',
statusCode: 500,
});
});
app.addHook('onReady', async () => {
const warmupService = app.services.get(WarmupService);
await warmupService.ensure();
});
await app.register(systemEndpoints, { prefix: '/system' });
await app.register(documentEndpoints, { prefix: '/documents' });
await app.register(documentFilterEndpoints, { prefix: '/document-filters' });
await app.register(documentChunkFilterEndpoints, { prefix: '/document-chunk-filters' });
await app.ready();
app.swagger();
return app;
};
export { createApi };

View File

@@ -0,0 +1,38 @@
import { createApi } from './api.js';
import { DocumentsService, type DocumentUpsert } from './services/documents/documents.ts';
import { Services } from './utils/utils.services.ts';
const services = new Services();
const server = await createApi(services);
const documentsService = services.get(DocumentsService);
const documents: DocumentUpsert[] = [
{
metadata: {
foo: 'bar',
},
type: 'demo',
content: 'the cat is yellow',
},
{
metadata: {
foo: 'bar',
},
type: 'demo',
content: 'the dog is blue',
},
{
metadata: {
foo: 'baz',
},
source: 'test',
content: 'the pig says hi',
type: 'demo',
},
];
await Promise.all(documents.map((document) => documentsService.upsert(document)));
await server.listen({
port: 3400,
});

View File

@@ -0,0 +1,31 @@
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import {
documentChunkFilterSchema,
documentChunksFindResultSchema,
DocumentChunksService,
} from '#root/services/document-chunks/document-chunks.ts';
const documentChunkFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({
method: 'POST',
url: '',
schema: {
operationId: 'POST/documents-chunk-filters',
tags: ['document-chunks'],
summary: 'Find document chunks',
body: documentChunkFilterSchema,
response: {
200: documentChunksFindResultSchema,
},
},
handler: async (req, reply) => {
const { services } = instance;
const documentChunksService = services.get(DocumentChunksService);
const response = await documentChunksService.find(req.body);
await reply.send(response);
},
});
};
export { documentChunkFilterEndpoints };

View File

@@ -0,0 +1,31 @@
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import {
documentFilterSchema,
documentFindResultSchema,
DocumentsService,
} from '#root/services/documents/documents.ts';
const documentFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({
method: 'POST',
url: '',
schema: {
operationId: 'POST/documents-filters',
tags: ['documents'],
summary: 'Find documents',
body: documentFilterSchema,
response: {
200: documentFindResultSchema,
},
},
handler: async (req, reply) => {
const { services } = instance;
const documentsService = services.get(DocumentsService);
const response = await documentsService.find(req.body);
await reply.send(response);
},
});
};
export { documentFilterEndpoints };

View File

@@ -0,0 +1,31 @@
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import {
DocumentsService,
documentUpsertResultSchema,
documentUpsertSchema,
} from '#root/services/documents/documents.ts';
const documentEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({
method: 'POST',
url: '',
schema: {
operationId: 'POST/documents',
tags: ['documents'],
summary: 'Upsert document',
body: documentUpsertSchema,
response: {
200: documentUpsertResultSchema,
},
},
handler: async (req, reply) => {
const { services } = instance;
const documentsService = services.get(DocumentsService);
const response = await documentsService.upsert(req.body);
await reply.send(response);
},
});
};
export { documentEndpoints };

View File

@@ -0,0 +1,32 @@
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import { z } from 'zod';
import { DatabaseService } from '#root/services/database/database.ts';
const systemEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({
method: 'GET',
url: '/ready',
schema: {
tags: ['system'],
operationId: 'GET/system/ready',
summary: 'Get system ready state',
response: {
200: z.object({
status: z.literal('ok'),
}),
},
},
handler: async (_, reply) => {
const { services } = instance;
const databaseService = services.get(DatabaseService);
const db = await databaseService.getInstance();
await db.raw('SELECT 1=1');
await reply.send({
status: 'ok',
});
},
});
};
export { systemEndpoints };

View File

@@ -0,0 +1 @@
export * from './api.js';

8
packages/server/src/extensions.d.ts vendored Normal file
View File

@@ -0,0 +1,8 @@
import 'fastify';
declare module 'fastify' {
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
interface FastifyRequest {
_foo: 'bar';
}
}

12
packages/server/src/global.d.ts vendored Normal file
View File

@@ -0,0 +1,12 @@
import 'fastify';
import type { Services } from './utils/utils.services.ts';
// eslint-disable-next-line
declare type ExplicitAny = any;
declare module 'fastify' {
// eslint-disable-next-line
export interface FastifyInstance {
services: Services;
}
}

View File

@@ -0,0 +1,54 @@
import knex, { type Knex } from 'knex';
import ClientPgLite from 'knex-pglite';
import { PGlite } from '@electric-sql/pglite';
import { vector } from '@electric-sql/pglite/vector';
import { migrationSource } from './migrations/migrations.ts';
import { destroy, Services } from '#root/utils/utils.services.ts';
class DatabaseService {
#services: Services;
#instance?: Promise<Knex>;
constructor(services: Services) {
this.#services = services;
}
#setup = async () => {
const pglite = new PGlite({
extensions: { vector },
});
const instance = knex({
client: ClientPgLite,
dialect: 'postgres',
connection: () => ({ pglite }) as object,
});
await instance.raw(`CREATE EXTENSION IF NOT EXISTS vector`);
await instance.migrate.latest({
migrationSource: migrationSource({ services: this.#services }),
});
return instance;
};
public getInstance = () => {
if (!this.#instance) {
this.#instance = this.#setup();
}
return this.#instance;
};
[destroy] = async () => {
if (!this.#instance) {
return;
}
const instance = await this.#instance;
await instance.destroy();
};
}
export { type TableRows, tableNames } from './migrations/migrations.ts';
export { DatabaseService };

View File

@@ -0,0 +1,112 @@
import type { Migration } from './migrations.types.ts';
import { EmbeddingsService } from '#root/services/embeddings/embeddings.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
const tableNames = {
documents: 'documents',
documentChunks: 'documentChunks',
relations: 'relations',
};
const init: Migration = {
name: 'init',
up: async ({ knex, services }) => {
const embedding = services.get(EmbeddingsService);
const embeddingField = await embedding.getFieldType(EMBEDDING_MODEL);
await knex.schema.createTable(tableNames.documents, (table) => {
table.uuid('id').primary();
table.uuid('owner').nullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.datetime('updatedAt').notNullable();
table.datetime('createdAt').notNullable();
table.datetime('deletedAt').nullable();
table.string('contentType').nullable();
table.text('content').nullable();
table.string('source').nullable();
table.string('sourceId').nullable();
table.string('type').notNullable();
table.integer('typeVersion').nullable();
table.text('searchText').nullable();
table.jsonb('metadata').nullable();
table.index(['source', 'sourceId']);
table.index(['owner']);
});
await knex.schema.createTable(tableNames.documentChunks, (table) => {
table.uuid('id').primary();
table.uuid('owner').nullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.text('content').notNullable();
table.specificType('embedding', embeddingField).notNullable();
table.string('embeddingModel').notNullable();
table.index(['owner']);
});
knex.raw(`create index on ${tableNames.documentChunks} using GIN ("embeddingg")`);
await knex.schema.createTable(tableNames.relations, (table) => {
table.uuid('from').notNullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.uuid('to').notNullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.string('type').nullable();
table.string('typeVersion').nullable();
table.datetime('updatedAt').notNullable();
table.datetime('createdAt').notNullable();
table.datetime('deletedAt').nullable();
table.jsonb('data');
table.primary(['from', 'to', 'type']);
table.index(['from']);
table.index(['to']);
});
},
down: async ({ knex }) => {
await knex.schema.dropTableIfExists(tableNames.relations);
await knex.schema.dropTableIfExists(tableNames.documentChunks);
await knex.schema.dropTableIfExists(tableNames.documents);
},
};
type DocumentRow = {
id: string;
owner: string | null;
updatedAt: Date;
createdAt: Date;
deletedAt: Date | null;
contentType: string | null;
content: string | null;
source: string | null;
sourceId: string | null;
type: string;
typeVersion: number | null;
searchText: string | null;
metadata: unknown;
};
type DocumentChunkRow = {
id: string;
owner: string;
content: string;
embedding: unknown;
embeddingModel: string;
};
type RelationRow = {
from: string;
to: string;
type: string;
typeVersion: string | null;
updatedAt: Date;
createdAt: Date;
deletedAt: Date | null;
data: unknown;
};
type TableRows = {
documents: DocumentRow;
documentChunks: DocumentChunkRow;
replations: RelationRow;
};
export type { TableRows };
export { tableNames, init };

View File

@@ -0,0 +1,25 @@
import type { Knex } from 'knex';
import type { Migration } from './migrations.types.ts';
import { init } from './migrations.001-init.ts';
import type { Services } from '#root/utils/utils.services.ts';
const migrations = [init] satisfies Migration[];
type MigrationSourceOptions = {
services: Services;
};
const migrationSource = (options: MigrationSourceOptions): Knex.MigrationSource<Migration> => ({
getMigrationName: (migration) => migration.name,
getMigration: async (migration) => ({
name: migration.name,
up: (knex) => migration.up({ ...options, knex }),
down: (knex) => migration.down({ ...options, knex }),
}),
getMigrations: async () => migrations,
});
export { type TableRows, tableNames } from './migrations.001-init.ts';
export { migrationSource };

View File

@@ -0,0 +1,16 @@
import type { Knex } from 'knex';
import type { Services } from '#root/utils/utils.services.ts';
type MigrationOptions = {
knex: Knex;
services: Services;
};
type Migration = {
name: string;
up: (options: MigrationOptions) => Promise<void>;
down: (options: MigrationOptions) => Promise<void>;
};
export type { Migration };

View File

@@ -0,0 +1,33 @@
import { z } from 'zod';
import { createListResultSchema } from '#root/utils/utils.schema.ts';
import { queryFilterSchema } from '#root/utils/utils.query.ts';
const documentChunkSchema = z.object({
id: z.string(),
owner: z.string(),
content: z.string(),
metadata: z.unknown(),
});
type DocumentChunk = z.infer<typeof documentChunkSchema>;
const documentChunkFilterSchema = z.object({
limit: z.number().default(20),
offset: z.number().default(0),
semanticText: z.string().optional(),
conditions: queryFilterSchema.optional(),
});
type DocumentChunkFilter = z.infer<typeof documentChunkFilterSchema>;
const documentChunksFindResultSchema = createListResultSchema(
documentChunkSchema.extend({
distance: z.number().optional(),
}),
);
type DocumentChunksFindResult = z.infer<typeof documentChunksFindResultSchema>;
export type { DocumentChunk, DocumentChunkFilter, DocumentChunksFindResult };
export { documentChunkSchema, documentChunkFilterSchema, documentChunksFindResultSchema };

View File

@@ -0,0 +1,60 @@
import { DatabaseService, tableNames, type TableRows } from '../database/database.ts';
import { EmbeddingsService } from '../embeddings/embeddings.ts';
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.ts';
import { mapFromDocumentChunkRow } from './document.mappings.ts';
import type { Services } from '#root/utils/utils.services.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
import type { ExplicitAny } from '#root/global.js';
import { applyQueryFilter } from '#root/utils/utils.query.ts';
const baseFields = [
`${tableNames.documentChunks}.*`,
`${tableNames.documents}.metadata`,
`${tableNames.documents}.createdAt`,
];
class DocumentChunksService {
#services: Services;
constructor(services: Services) {
this.#services = services;
}
public find = async (filter: DocumentChunkFilter): Promise<DocumentChunksFindResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
let query = db<TableRows['documentChunks']>(tableNames.documentChunks);
query.join(tableNames.documents, `${tableNames.documents}.id`, `${tableNames.documentChunks}.owner`);
if (filter.semanticText) {
const embedding = this.#services.get(EmbeddingsService);
const [vector] = await embedding.extract({
input: [filter.semanticText],
model: EMBEDDING_MODEL,
});
query = query.select(...baseFields, db.raw(`embedding <=> '${vector.toSql()}' as distance`));
query = query.where(`${tableNames.documentChunks}.embeddingModel`, EMBEDDING_MODEL);
query = query.orderBy('distance', 'asc');
} else {
query = query.select(baseFields);
query = query.orderBy('createdAt', 'desc');
}
if (filter.conditions) {
query = applyQueryFilter(query, filter.conditions);
}
query = query.limit(filter.limit).offset(filter.offset);
const items = await query;
return {
items: items.map(mapFromDocumentChunkRow as ExplicitAny),
};
};
}
export * from './document-chunks.schemas.ts';
export { DocumentChunksService };

View File

@@ -0,0 +1,13 @@
import type { TableRows } from '../database/database.ts';
import type { DocumentChunk } from './document-chunks.schemas.ts';
const mapFromDocumentChunkRow = (
row: TableRows['documentChunks'] & {
metadata: unknown;
},
): DocumentChunk => ({
...row,
});
export { mapFromDocumentChunkRow };

View File

@@ -0,0 +1,12 @@
import type { TableRows } from '../database/database.ts';
import type { Document } from './documents.schemas.ts';
const mapFromDocumentRow = (row: TableRows['documents']): Document => ({
...row,
createdAt: row.createdAt.toISOString(),
updatedAt: row.updatedAt.toISOString(),
deletedAt: row.deletedAt?.toISOString() || null,
});
export { mapFromDocumentRow };

View File

@@ -0,0 +1,66 @@
import { z } from 'zod';
import { createListResultSchema } from '#root/utils/utils.schema.ts';
import { queryFilterSchema } from '#root/utils/utils.query.ts';
const documentSchema = z.object({
id: z.string(),
owner: z.string().nullable(),
createdAt: z.iso.datetime(),
updatedAt: z.iso.datetime(),
deletedAt: z.iso.datetime().nullable(),
contentType: z.string().nullable(),
content: z.string().nullable(),
source: z.string().nullable(),
sourceId: z.string().nullable(),
type: z.string(),
typeVersion: z.int().nullable(),
searchText: z.string().nullable(),
metadata: z.unknown(),
});
type Document = z.infer<typeof documentSchema>;
const documentUpsertSchema = z.object({
id: z.string().nullish(),
owner: z.string().nullish(),
contentType: z.string().nullish(),
content: z.string().nullish(),
source: z.string().nullish(),
sourceId: z.string().nullish(),
type: z.string(),
typeVersion: z.int().nullish(),
searchText: z.string().nullish(),
metadata: z.unknown().nullish(),
});
type DocumentUpsert = z.infer<typeof documentUpsertSchema>;
const documentUpsertResultSchema = z.object({
action: z.enum(['inserted', 'updated', 'skipped']),
id: z.string(),
document: documentSchema,
});
type DocumentUpsertResult = z.infer<typeof documentUpsertResultSchema>;
const documentFilterSchema = z.object({
offset: z.number().default(0),
limit: z.number().default(20),
condition: queryFilterSchema,
});
type DocumentFilter = z.infer<typeof documentFilterSchema>;
const documentFindResultSchema = createListResultSchema(documentSchema);
type DocumentFindResult = z.infer<typeof documentFindResultSchema>;
export type { Document, DocumentUpsert, DocumentUpsertResult, DocumentFilter, DocumentFindResult };
export {
documentSchema,
documentUpsertSchema,
documentUpsertResultSchema,
documentFilterSchema,
documentFindResultSchema,
};

View File

@@ -0,0 +1,171 @@
import { DatabaseService, tableNames, type TableRows } from '../database/database.ts';
import { SplittingService } from '../splitter/splitter.ts';
import type {
Document,
DocumentFilter,
DocumentFindResult,
DocumentUpsert,
DocumentUpsertResult,
} from './documents.schemas.ts';
import { mapFromDocumentRow } from './documents.mapping.ts';
import { EventEmitter } from '#root/utils/utils.event-emitter.ts';
import type { Services } from '#root/utils/utils.services.ts';
import { compareObjectKeys } from '#root/utils/utils.compare.ts';
import { applyQueryFilter } from '#root/utils/utils.query.ts';
type DocumentsServiceEvents = {
upserted: (document: Document) => void;
inserted: (document: Document) => void;
updated: (document: Document) => void;
};
class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
#services: Services;
constructor(services: Services) {
super();
this.#services = services;
}
public find = async (filter: DocumentFilter): Promise<DocumentFindResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
let query = db<TableRows['documents']>(tableNames.documents);
if (filter) {
query = applyQueryFilter(query, filter.condition);
}
query = query.limit(filter.limit).offset(filter.offset);
const items = await query;
return {
items: items.map(mapFromDocumentRow),
};
};
public get = async (id: string): Promise<Document> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
const [item] = await db<TableRows['documents']>(tableNames.documents).where('id', id).limit(1);
return mapFromDocumentRow(item);
};
public remove = async (id: string): Promise<void> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
await db<TableRows['documents']>(tableNames.documents).where('id', id).delete();
};
public upsert = async (document: DocumentUpsert): Promise<DocumentUpsertResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
const result = await db.transaction(async (trx) => {
let id = document.id || crypto.randomUUID();
if (document.source && document.sourceId) {
const [currentSourceDocument] = await trx<TableRows['documents']>(tableNames.documents)
.where('source', document.source)
.andWhere('sourceId', document.sourceId)
.limit(1);
if (currentSourceDocument) {
id = currentSourceDocument.id;
}
}
const now = new Date();
const [current] = await trx<TableRows['documents']>(tableNames.documents).where('id', id).limit(1);
if (current) {
if (
compareObjectKeys(current, document, [
'sourceId',
'source',
'content',
'contentType',
'searchText',
'type',
'typeVersion',
'metadata',
])
) {
return {
id,
action: 'skipped',
document: mapFromDocumentRow(current),
} as const;
}
await trx<TableRows['documents']>(tableNames.documents)
.update({
...document,
id,
updatedAt: now,
})
.where('id', id);
const resultDocument: Document = mapFromDocumentRow({
...current,
...document,
id,
});
this.emit('updated', resultDocument);
this.emit('upserted', resultDocument);
return {
id,
action: 'updated',
document: resultDocument,
} as const;
} else {
await trx<TableRows['documents']>(tableNames.documents).insert({
...document,
id,
createdAt: now,
updatedAt: now,
metadata: document.metadata || {},
});
const resultDocument: Document = mapFromDocumentRow({
owner: null,
contentType: null,
content: null,
source: null,
sourceId: null,
typeVersion: null,
searchText: null,
metadata: {},
...document,
deletedAt: null,
id,
createdAt: now,
updatedAt: now,
});
this.emit('inserted', resultDocument);
this.emit('upserted', resultDocument);
return {
id,
action: 'inserted',
document: resultDocument,
} as const;
}
});
if (result.action !== 'skipped') {
await db.transaction(async (trx) => {
await trx<TableRows['documentChunks']>(tableNames.documentChunks).delete().where('owner', result.id);
const splittingService = this.#services.get(SplittingService);
const chunks = await splittingService.chunk(result.document);
if (chunks.length > 0) {
await trx<TableRows['documentChunks']>(tableNames.documentChunks).insert(
chunks.map((chunk) => ({
id: crypto.randomUUID(),
owner: result.id,
content: chunk.content,
embedding: chunk.vector.toSql(),
embeddingModel: chunk.model,
})),
);
}
});
}
return result;
};
}
export * from './documents.schemas.ts';
export { DocumentsService };

View File

@@ -0,0 +1,60 @@
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import { Vector } from './embeddings.vector.ts';
type ExtractOptions = {
input: string[];
model: string;
};
type Extractor = {
extractor: FeatureExtractionPipeline;
dimensions: number;
};
class EmbeddingsService {
#extractors = new Map<string, Promise<Extractor>>();
#setupExctractor = async (model: string) => {
const extractor = await pipeline('feature-extraction', model, {});
const { config } = extractor.model;
if (!('hidden_size' in config) || typeof config.hidden_size !== 'number') {
throw new Error('Invalid model configuration');
}
return {
extractor,
dimensions: config.hidden_size,
};
};
#getExtractor = async (name: string) => {
if (!this.#extractors.has(name)) {
this.#extractors.set(name, this.#setupExctractor(name));
}
const extractor = await this.#extractors.get(name);
if (!extractor) {
throw new Error('Extractor not found');
}
return extractor;
};
public extract = async (options: ExtractOptions) => {
const { input, model } = options;
const { extractor, dimensions } = await this.#getExtractor(model);
const output = await extractor(input, { pooling: 'cls' });
return output.tolist().map((v: ExplicitAny) => new Vector(v, dimensions));
};
public getDimensions = async (model: string) => {
const { dimensions } = await this.#getExtractor(model);
return dimensions;
};
public getFieldType = async (model: string) => {
const dimensions = await this.getDimensions(model);
return `vector(${dimensions})`;
};
}
export { EmbeddingsService };

View File

@@ -0,0 +1,37 @@
import { cos_sim } from '@huggingface/transformers';
import { toSql } from 'pgvector';
class Vector {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
#value: any;
#dimentions: number;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
constructor(value: any, dimentions: number) {
this.#value = value;
this.#dimentions = dimentions;
}
public get value() {
return this.#value;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
public set value(value: any) {
this.#value = value;
}
public get dimentions() {
return this.#dimentions;
}
public toSql = () => {
return toSql(this.#value);
};
public distanceTo = (other: Vector) => {
return cos_sim(this.#value, other.value);
};
}
export { Vector };

View File

@@ -0,0 +1,44 @@
import { EmbeddingsService } from '../embeddings/embeddings.ts';
import type { Document } from '../documents/documents.schemas.ts';
import type { Chunk, Splitter } from './splitter.types.ts';
import { textSplitter } from './splitters/splitters.text.ts';
import type { Services } from '#root/utils/utils.services.ts';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.ts';
class SplittingService {
#services: Services;
#chunkers: Set<Splitter>;
constructor(services: Services) {
this.#services = services;
this.#chunkers = new Set();
this.addChunkers([textSplitter]);
}
public addChunkers = (splitter: Splitter[]) => {
this.#chunkers = this.#chunkers.union(new Set(splitter));
};
public chunk = async (input: Document): Promise<Chunk[]> => {
const splitter = this.#chunkers.values().find((splitter) => splitter.match(input));
if (!splitter) {
return [];
}
const chunks = await splitter.chunk(input);
const embeddingsService = this.#services.get(EmbeddingsService);
const vectors = await embeddingsService.extract({
input: chunks,
model: EMBEDDING_MODEL,
});
return chunks.map((content, index) => ({
content,
vector: vectors[index],
model: EMBEDDING_MODEL,
}));
};
}
export * from './splitter.types.ts';
export { SplittingService };

View File

@@ -0,0 +1,15 @@
import type { Document } from '../documents/documents.schemas.ts';
import type { Vector } from '../embeddings/embeddings.vector.ts';
type Chunk = {
content: string;
vector: Vector;
model: string;
};
type Splitter = {
match: (document: Document) => boolean;
chunk: (document: Document) => Promise<string[]>;
};
export type { Chunk, Splitter };

View File

@@ -0,0 +1,17 @@
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import type { Splitter } from '../splitter.types.ts';
const textSplitter: Splitter = {
match: (document) => !!document.content,
chunk: async (document) => {
if (!document.content) {
return [];
}
const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 });
const texts = await splitter.splitText(document.content);
return texts;
},
};
export { textSplitter };

View File

@@ -0,0 +1,17 @@
import { DatabaseService } from '../database/database.ts';
import { Services } from '#root/utils/utils.services.ts';
class WarmupService {
#services: Services;
constructor(services: Services) {
this.#services = services;
}
public ensure = async () => {
const databaseService = this.#services.get(DatabaseService);
await databaseService.getInstance();
};
}
export { WarmupService };

View File

@@ -0,0 +1,7 @@
import { createApi } from './api.js';
const server = await createApi();
await server.listen({
port: 3400,
});

View File

@@ -0,0 +1,14 @@
import deepEqual from 'deep-equal';
const compareObjectKeys = <T extends Record<string, unknown>>(a: T, b: T, keys: (keyof T)[]) => {
for (const key of keys) {
const avalue = a[key];
const bvalue = b[key];
if (!deepEqual(avalue, bvalue)) {
return false;
}
}
return true;
};
export { compareObjectKeys };

View File

@@ -0,0 +1,3 @@
const EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2';
export { EMBEDDING_MODEL };

View File

@@ -0,0 +1,64 @@
type EventListener<T extends unknown[]> = (...args: T) => void | Promise<void>;
type OnOptions = {
abortSignal?: AbortSignal;
};
class EventEmitter<T extends Record<string, (...args: ExplicitAny[]) => void | Promise<void>>> {
#listeners = new Map<keyof T, Set<EventListener<ExplicitAny>>>();
on = <K extends keyof T>(event: K, callback: EventListener<Parameters<T[K]>>, options: OnOptions = {}) => {
const { abortSignal } = options;
if (!this.#listeners.has(event)) {
this.#listeners.set(event, new Set());
}
const callbackClone = (...args: Parameters<T[K]>) => callback(...args);
const abortController = new AbortController();
const listeners = this.#listeners.get(event);
if (!listeners) {
throw new Error('Event registration failed');
}
abortSignal?.addEventListener('abort', abortController.abort);
listeners.add(callbackClone);
abortController.signal.addEventListener('abort', () => {
this.#listeners.set(event, listeners?.difference(new Set([callbackClone])));
});
return abortController.abort;
};
once = <K extends keyof T>(event: K, callback: EventListener<Parameters<T[K]>>, options: OnOptions = {}) => {
const abortController = new AbortController();
options.abortSignal?.addEventListener('abort', abortController.abort);
return this.on(
event,
async (...args) => {
abortController.abort();
await callback(...args);
},
{
...options,
abortSignal: abortController.signal,
},
);
};
emit = <K extends keyof T>(event: K, ...args: Parameters<T[K]>) => {
const listeners = this.#listeners.get(event);
if (!listeners) {
return;
}
for (const listener of listeners) {
listener(...args);
}
};
emitAsync = async <K extends keyof T>(event: K, ...args: Parameters<T[K]>) => {
const listeners = this.#listeners.get(event);
if (!listeners) {
return;
}
await Promise.all(listeners.values().map((listener) => listener(...args)));
};
}
export { EventEmitter };

View File

@@ -0,0 +1,548 @@
import { type Knex } from 'knex';
import { z } from 'zod';
/**
* Escapes a JSON key for use in PostgreSQL JSON operators.
* Escapes single quotes by doubling them, which is the PostgreSQL standard.
*/
const escapeJsonKey = (key: string): string => {
return key.replace(/'/g, "''");
};
const getFieldSelector = (query: Knex.QueryBuilder, field: string[], tableName?: string) => {
const baseColumn = field[0];
if (field.length === 1) {
return tableName ? `${tableName}.${baseColumn}` : baseColumn;
}
const baseFieldRef = tableName ? query.client.ref(baseColumn).withSchema(tableName) : query.client.ref(baseColumn);
const jsonPath = field.slice(1);
let sqlExpression = baseFieldRef.toString();
for (let i = 0; i < jsonPath.length - 1; i++) {
const escapedKey = escapeJsonKey(jsonPath[i]);
sqlExpression += ` -> '${escapedKey}'`;
}
const finalElement = jsonPath[jsonPath.length - 1];
const escapedFinalKey = escapeJsonKey(finalElement);
sqlExpression += ` ->> '${escapedFinalKey}'`;
return query.client.raw(sqlExpression);
};
const queryConditionTextSchema = z
.object({
type: z.literal('text'),
tableName: z.string().optional(),
field: z.array(z.string()),
conditions: z.object({
equal: z.string().nullish(),
notEqual: z.string().optional(),
like: z.string().optional(),
notLike: z.string().optional(),
in: z.array(z.string()).optional(),
notIn: z.array(z.string()).optional(),
}),
})
.meta({
example: {
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
examples: [
{
summary: 'Equal condition',
value: {
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
},
{
summary: 'Like condition',
value: {
type: 'text',
field: ['content'],
conditions: {
like: '%cat%',
},
},
},
{
summary: 'In condition',
value: {
type: 'text',
field: ['type'],
conditions: {
in: ['demo', 'article', 'post'],
},
},
},
{
summary: 'Null check',
value: {
type: 'text',
field: ['source'],
conditions: {
equal: null,
},
},
},
],
});
type QueryConditionText = z.infer<typeof queryConditionTextSchema>;
const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionText) => {
const selector = getFieldSelector(query, field, tableName);
if (conditions.equal) {
query = query.where(selector, '=', conditions.equal);
}
if (conditions.notEqual) {
query = query.where(selector, '<>', conditions.notEqual);
}
if (conditions.like) {
query = query.whereLike(selector, conditions.like);
}
if (conditions.notLike) {
query = query.not.whereLike(selector, conditions.notLike);
}
if (conditions.equal === null) {
query = query.whereNull(selector);
}
if (conditions.notEqual === null) {
query = query.whereNotNull(selector);
}
if (conditions.in) {
query = query.whereIn(selector, conditions.in);
}
if (conditions.notIn) {
query = query.whereNotIn(selector, conditions.notIn);
}
return query;
};
const queryConditionNumberSchema = z
.object({
type: z.literal('number'),
tableName: z.string().optional(),
field: z.array(z.string()),
conditions: z.object({
equals: z.number().nullish(),
notEquals: z.number().nullish(),
greaterThan: z.number().optional(),
greaterThanOrEqual: z.number().optional(),
lessThan: z.number().optional(),
lessThanOrEqual: z.number().optional(),
in: z.array(z.number()).optional(),
notIn: z.array(z.number()).optional(),
}),
})
.meta({
example: {
type: 'number',
field: ['typeVersion'],
conditions: {
equals: 1,
},
},
examples: [
{
summary: 'Equals condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
equals: 1,
},
},
},
{
summary: 'Greater than condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
greaterThan: 0,
},
},
},
{
summary: 'Range condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
greaterThanOrEqual: 1,
lessThanOrEqual: 10,
},
},
},
{
summary: 'In condition',
value: {
type: 'number',
field: ['typeVersion'],
conditions: {
in: [1, 2, 3],
},
},
},
],
});
type QueryConditionNumber = z.infer<typeof queryConditionNumberSchema>;
const applyQueryConditionNumber = (
query: Knex.QueryBuilder,
{ field, tableName, conditions }: QueryConditionNumber,
) => {
const selector = getFieldSelector(query, field, tableName);
if (conditions.equals !== undefined && conditions.equals !== null) {
query = query.where(selector, '=', conditions.equals);
}
if (conditions.notEquals !== undefined && conditions.notEquals !== null) {
query = query.where(selector, '<>', conditions.notEquals);
}
if (conditions.equals === null) {
query = query.whereNull(selector);
}
if (conditions.notEquals === null) {
query = query.whereNotNull(selector);
}
if (conditions.greaterThan) {
query = query.where(selector, '>', conditions.greaterThan);
}
if (conditions.greaterThanOrEqual) {
query = query.where(selector, '>=', conditions.greaterThanOrEqual);
}
if (conditions.lessThan) {
query = query.where(selector, '<', conditions.lessThan);
}
if (conditions.lessThanOrEqual) {
query = query.where(selector, '<=', conditions.lessThanOrEqual);
}
if (conditions.in) {
query = query.whereIn(selector, conditions.in);
}
if (conditions.notIn) {
query = query.whereNotIn(selector, conditions.notIn);
}
return query;
};
const queryConditionSchema = z.discriminatedUnion('type', [queryConditionTextSchema, queryConditionNumberSchema]);
type QueryCondition = z.infer<typeof queryConditionSchema>;
const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) => {
switch (options.type) {
case 'text': {
return applyQueryConditionText(query, options);
}
case 'number': {
return applyQueryConditionNumber(query, options);
}
default: {
throw new Error(`Unknown filter type`);
}
}
};
type QueryFilter = QueryCondition | QueryOperator;
type QueryOperator = {
type: 'operator';
operator: 'and' | 'or';
conditions: QueryFilter[];
};
// Create a depth-limited recursive schema for OpenAPI compatibility
// This supports up to 3 levels of nesting, which should be sufficient for most use cases
// OpenAPI cannot handle z.lazy(), so we manually define the nesting
// If you need deeper nesting, you can add more levels (Level3, Level4, etc.)
const queryFilterSchemaLevel0: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryConditionSchema),
})
.meta({
example: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
],
},
examples: [
{
summary: 'AND operator',
value: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
},
{
summary: 'OR operator',
value: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
},
],
}),
]);
const queryFilterSchemaLevel1: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryFilterSchemaLevel0),
})
.meta({
example: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
],
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
examples: [
{
summary: 'Nested AND within OR',
value: {
type: 'operator',
operator: 'or',
conditions: [
{
type: 'operator',
operator: 'and',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
},
],
}),
]);
const queryFilterSchemaLevel2: z.ZodType<QueryFilter> = z.union([
queryConditionSchema,
z
.object({
type: z.literal('operator'),
operator: z.enum(['and', 'or']),
conditions: z.array(queryFilterSchemaLevel1),
})
.meta({
example: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
examples: [
{
summary: 'Complex nested query',
value: {
type: 'operator',
operator: 'and',
conditions: [
{
type: 'operator',
operator: 'or',
conditions: [
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'bar',
},
},
{
type: 'text',
field: ['metadata', 'foo'],
conditions: {
equal: 'baz',
},
},
],
},
{
type: 'text',
field: ['type'],
conditions: {
equal: 'demo',
},
},
],
},
},
],
}),
]);
// Export the depth-limited schema (supports 3 levels of nesting)
// This works with OpenAPI schema generation
const queryFilterSchema = queryFilterSchemaLevel2;
const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
if (filter.type === 'operator') {
if (filter.conditions.length === 0) {
return query;
}
switch (filter.operator) {
case 'or': {
return query.where((subquery) => {
let isFirst = true;
for (const condition of filter.conditions) {
if (isFirst) {
applyQueryFilter(subquery, condition);
isFirst = false;
} else {
subquery.orWhere((subSubquery) => {
applyQueryFilter(subSubquery, condition);
});
}
}
});
}
case 'and': {
return query.where((subquery) => {
let isFirst = true;
for (const condition of filter.conditions) {
if (isFirst) {
applyQueryFilter(subquery, condition);
isFirst = false;
} else {
subquery.andWhere((subSubquery) => {
applyQueryFilter(subSubquery, condition);
});
}
}
});
}
}
} else {
return applyQueryCondition(query, filter);
}
};
export { applyQueryCondition, queryConditionSchema, queryFilterSchema, applyQueryFilter };

View File

@@ -0,0 +1,8 @@
import { z, type ZodType } from 'zod';
const createListResultSchema = <T extends ZodType>(schema: T) =>
z.object({
items: z.array(schema),
});
export { createListResultSchema };

View File

@@ -0,0 +1,51 @@
const destroy = Symbol('destroy');
const instanceKey = Symbol('instances');
type ServiceDependency<T> = new (services: Services) => T & {
[destroy]?: () => Promise<void> | void;
};
class Services {
[instanceKey]: Map<ServiceDependency<unknown>, unknown>;
constructor() {
this[instanceKey] = new Map();
}
public get = <T>(service: ServiceDependency<T>) => {
if (!this[instanceKey].has(service)) {
this[instanceKey].set(service, new service(this));
}
const instance = this[instanceKey].get(service);
if (!instance) {
throw new Error('Could not generate instance');
}
return instance as T;
};
public set = <T>(service: ServiceDependency<T>, instance: Partial<T>) => {
this[instanceKey].set(service, instance);
};
public clone = () => {
const services = new Services();
services[instanceKey] = Object.fromEntries(this[instanceKey].entries());
};
public destroy = async () => {
await Promise.all(
this[instanceKey].values().map(async (instance) => {
if (
typeof instance === 'object' &&
instance &&
destroy in instance &&
typeof instance[destroy] === 'function'
) {
await instance[destroy]();
}
}),
);
};
}
export { Services, destroy };

View File

@@ -0,0 +1,14 @@
{
"compilerOptions": {
"outDir": "./dist",
"paths": {
"#root/*": [
"./src/*"
],
}
},
"include": [
"src/**/*.ts"
],
"extends": "@morten-olsen/stash-configs/tsconfig.json"
}

View File

@@ -0,0 +1,12 @@
import { defineConfig } from 'vitest/config';
import { getAliases } from '@morten-olsen/stash-tests/vitest';
// eslint-disable-next-line import/no-default-export
export default defineConfig(async () => {
const aliases = await getAliases();
return {
resolve: {
alias: aliases,
},
};
});