chore: seperate into packages

This commit is contained in:
Morten Olsen
2025-12-10 10:26:14 +01:00
parent f9494c88e2
commit d02102977a
40 changed files with 753 additions and 1175 deletions

View File

@@ -31,22 +31,14 @@
"#root/*": "./src/*"
},
"dependencies": {
"@electric-sql/pglite": "^0.3.14",
"@fastify/cors": "11.1.0",
"@fastify/swagger": "9.6.1",
"@fastify/websocket": "11.2.0",
"@huggingface/transformers": "^3.8.1",
"@langchain/textsplitters": "^1.0.1",
"@morten-olsen/stash-query-dsl": "workspace:*",
"@morten-olsen/stash-runtime": "workspace:*",
"@scalar/fastify-api-reference": "1.40.2",
"better-sqlite3": "^12.5.0",
"deep-equal": "^2.2.3",
"fastify": "5.6.2",
"fastify-type-provider-zod": "6.1.0",
"knex": "^3.1.0",
"knex-pglite": "^0.13.0",
"pg": "^8.16.3",
"pgvector": "^0.2.1",
"zod": "4.1.13",
"zod-to-json-schema": "3.25.0"
}

View File

@@ -9,10 +9,9 @@ import {
validatorCompiler,
type ZodTypeProvider,
} from 'fastify-type-provider-zod';
import { StashRuntime } from '@morten-olsen/stash-runtime';
import { Services } from './utils/utils.services.js';
import { systemEndpoints } from './endpoints/system/system.js';
import { WarmupService } from './services/warmup/warmup.js';
import { documentEndpoints } from './endpoints/documents/documents.js';
import { documentFilterEndpoints } from './endpoints/document-filters/document-filters.js';
import { documentChunkFilterEndpoints } from './endpoints/document-chunk-filters/document-chunk-filters.js';
@@ -26,12 +25,12 @@ class BaseError extends Error {
}
}
const createApi = async (services: Services = new Services()) => {
const createApi = async (runtime: StashRuntime = new StashRuntime()) => {
const app = fastify().withTypeProvider<ZodTypeProvider>();
app.setValidatorCompiler(validatorCompiler);
app.setSerializerCompiler(serializerCompiler);
app.decorate('services', services);
app.decorate('runtime', runtime);
app.register(fastifyCors);
app.register(fastifySwagger, {
@@ -92,8 +91,7 @@ const createApi = async (services: Services = new Services()) => {
});
app.addHook('onReady', async () => {
const warmupService = app.services.get(WarmupService);
await warmupService.ensure();
app.runtime.warmup.ensure();
});
await app.register(systemEndpoints, { prefix: '/system' });

View File

@@ -1,11 +1,10 @@
import { StashRuntime, type DocumentUpsert } from '@morten-olsen/stash-runtime';
import { createApi } from './api.js';
import { DocumentsService, type DocumentUpsert } from './services/documents/documents.js';
import { Services } from './utils/utils.services.js';
const services = new Services();
const server = await createApi(services);
const runtime = new StashRuntime();
const server = await createApi(runtime);
const documentsService = services.get(DocumentsService);
const documents: DocumentUpsert[] = [
{
metadata: {
@@ -31,7 +30,7 @@ const documents: DocumentUpsert[] = [
},
];
await Promise.all(documents.map((document) => documentsService.upsert(document)));
await Promise.all(documents.map((document) => runtime.documents.upsert(document)));
await server.listen({
port: 3400,

View File

@@ -1,11 +1,6 @@
import { documentChunkFilterSchema, documentChunksFindResultSchema } from '@morten-olsen/stash-runtime';
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import {
documentChunkFilterSchema,
documentChunksFindResultSchema,
DocumentChunksService,
} from '#root/services/document-chunks/document-chunks.js';
const documentChunkFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({
method: 'POST',
@@ -20,9 +15,8 @@ const documentChunkFilterEndpoints: FastifyPluginAsyncZod = async (instance) =>
},
},
handler: async (req, reply) => {
const { services } = instance;
const documentChunksService = services.get(DocumentChunksService);
const response = await documentChunksService.find(req.body);
const { runtime } = instance;
const response = await runtime.documentChunks.find(req.body);
await reply.send(response);
},
});

View File

@@ -1,11 +1,6 @@
import { documentFilterSchema, documentFindResultSchema } from '@morten-olsen/stash-runtime';
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import {
documentFilterSchema,
documentFindResultSchema,
DocumentsService,
} from '#root/services/documents/documents.js';
const documentFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({
method: 'POST',
@@ -20,9 +15,8 @@ const documentFilterEndpoints: FastifyPluginAsyncZod = async (instance) => {
},
},
handler: async (req, reply) => {
const { services } = instance;
const documentsService = services.get(DocumentsService);
const response = await documentsService.find(req.body);
const { runtime } = instance;
const response = await runtime.documents.find(req.body);
await reply.send(response);
},
});

View File

@@ -1,11 +1,6 @@
import { documentUpsertResultSchema, documentUpsertSchema } from '@morten-olsen/stash-runtime';
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import {
DocumentsService,
documentUpsertResultSchema,
documentUpsertSchema,
} from '#root/services/documents/documents.js';
const documentEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({
method: 'POST',
@@ -20,9 +15,8 @@ const documentEndpoints: FastifyPluginAsyncZod = async (instance) => {
},
},
handler: async (req, reply) => {
const { services } = instance;
const documentsService = services.get(DocumentsService);
const response = await documentsService.upsert(req.body);
const { runtime } = instance;
const response = await runtime.documents.upsert(req.body);
await reply.send(response);
},
});

View File

@@ -1,8 +1,6 @@
import type { FastifyPluginAsyncZod } from 'fastify-type-provider-zod';
import { z } from 'zod';
import { DatabaseService } from '#root/services/database/database.js';
const systemEndpoints: FastifyPluginAsyncZod = async (instance) => {
instance.route({
method: 'GET',
@@ -18,9 +16,8 @@ const systemEndpoints: FastifyPluginAsyncZod = async (instance) => {
},
},
handler: async (_, reply) => {
const { services } = instance;
const databaseService = services.get(DatabaseService);
const db = await databaseService.getInstance();
const { runtime } = instance;
const db = await runtime.database.getInstance();
await db.raw('SELECT 1=1');
await reply.send({
status: 'ok',

View File

@@ -1,5 +1,5 @@
import 'fastify';
import type { Services } from './utils/utils.services.ts';
import type { StashRuntime } from '@morten-olsen/stash-runtime';
// eslint-disable-next-line
declare type ExplicitAny = any;
@@ -7,6 +7,6 @@ declare type ExplicitAny = any;
declare module 'fastify' {
// eslint-disable-next-line
export interface FastifyInstance {
services: Services;
runtime: StashRuntime;
}
}

View File

@@ -1,54 +0,0 @@
import knex, { type Knex } from 'knex';
import ClientPgLite from 'knex-pglite';
import { PGlite } from '@electric-sql/pglite';
import { vector } from '@electric-sql/pglite/vector';
import { migrationSource } from './migrations/migrations.js';
import { destroy, Services } from '#root/utils/utils.services.js';
class DatabaseService {
#services: Services;
#instance?: Promise<Knex>;
constructor(services: Services) {
this.#services = services;
}
#setup = async () => {
const pglite = new PGlite({
extensions: { vector },
});
const instance = knex({
client: ClientPgLite,
dialect: 'postgres',
connection: () => ({ pglite }) as object,
});
await instance.raw(`CREATE EXTENSION IF NOT EXISTS vector`);
await instance.migrate.latest({
migrationSource: migrationSource({ services: this.#services }),
});
return instance;
};
public getInstance = () => {
if (!this.#instance) {
this.#instance = this.#setup();
}
return this.#instance;
};
[destroy] = async () => {
if (!this.#instance) {
return;
}
const instance = await this.#instance;
await instance.destroy();
};
}
export { type TableRows, tableNames } from './migrations/migrations.js';
export { DatabaseService };

View File

@@ -1,112 +0,0 @@
import type { Migration } from './migrations.types.js';
import { EmbeddingsService } from '#root/services/embeddings/embeddings.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
const tableNames = {
documents: 'documents',
documentChunks: 'documentChunks',
relations: 'relations',
};
const init: Migration = {
name: 'init',
up: async ({ knex, services }) => {
const embedding = services.get(EmbeddingsService);
const embeddingField = await embedding.getFieldType(EMBEDDING_MODEL);
await knex.schema.createTable(tableNames.documents, (table) => {
table.uuid('id').primary();
table.uuid('owner').nullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.datetime('updatedAt').notNullable();
table.datetime('createdAt').notNullable();
table.datetime('deletedAt').nullable();
table.string('contentType').nullable();
table.text('content').nullable();
table.string('source').nullable();
table.string('sourceId').nullable();
table.string('type').notNullable();
table.integer('typeVersion').nullable();
table.text('searchText').nullable();
table.jsonb('metadata').nullable();
table.index(['source', 'sourceId']);
table.index(['owner']);
});
await knex.schema.createTable(tableNames.documentChunks, (table) => {
table.uuid('id').primary();
table.uuid('owner').nullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.text('content').notNullable();
table.specificType('embedding', embeddingField).notNullable();
table.string('embeddingModel').notNullable();
table.index(['owner']);
});
knex.raw(`create index on ${tableNames.documentChunks} using GIN ("embeddingg")`);
await knex.schema.createTable(tableNames.relations, (table) => {
table.uuid('from').notNullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.uuid('to').notNullable().references('id').inTable(tableNames.documents).onDelete('CASCADE');
table.string('type').nullable();
table.string('typeVersion').nullable();
table.datetime('updatedAt').notNullable();
table.datetime('createdAt').notNullable();
table.datetime('deletedAt').nullable();
table.jsonb('data');
table.primary(['from', 'to', 'type']);
table.index(['from']);
table.index(['to']);
});
},
down: async ({ knex }) => {
await knex.schema.dropTableIfExists(tableNames.relations);
await knex.schema.dropTableIfExists(tableNames.documentChunks);
await knex.schema.dropTableIfExists(tableNames.documents);
},
};
type DocumentRow = {
id: string;
owner: string | null;
updatedAt: Date;
createdAt: Date;
deletedAt: Date | null;
contentType: string | null;
content: string | null;
source: string | null;
sourceId: string | null;
type: string;
typeVersion: number | null;
searchText: string | null;
metadata: unknown;
};
type DocumentChunkRow = {
id: string;
owner: string;
content: string;
embedding: unknown;
embeddingModel: string;
};
type RelationRow = {
from: string;
to: string;
type: string;
typeVersion: string | null;
updatedAt: Date;
createdAt: Date;
deletedAt: Date | null;
data: unknown;
};
type TableRows = {
documents: DocumentRow;
documentChunks: DocumentChunkRow;
replations: RelationRow;
};
export type { TableRows };
export { tableNames, init };

View File

@@ -1,25 +0,0 @@
import type { Knex } from 'knex';
import type { Migration } from './migrations.types.js';
import { init } from './migrations.001-init.js';
import type { Services } from '#root/utils/utils.services.js';
const migrations = [init] satisfies Migration[];
type MigrationSourceOptions = {
services: Services;
};
const migrationSource = (options: MigrationSourceOptions): Knex.MigrationSource<Migration> => ({
getMigrationName: (migration) => migration.name,
getMigration: async (migration) => ({
name: migration.name,
up: (knex) => migration.up({ ...options, knex }),
down: (knex) => migration.down({ ...options, knex }),
}),
getMigrations: async () => migrations,
});
export { type TableRows, tableNames } from './migrations.001-init.js';
export { migrationSource };

View File

@@ -1,16 +0,0 @@
import type { Knex } from 'knex';
import type { Services } from '#root/utils/utils.services.js';
type MigrationOptions = {
knex: Knex;
services: Services;
};
type Migration = {
name: string;
up: (options: MigrationOptions) => Promise<void>;
down: (options: MigrationOptions) => Promise<void>;
};
export type { Migration };

View File

@@ -1,13 +0,0 @@
import type { TableRows } from '../database/database.js';
import type { DocumentChunk } from './document-chunks.schemas.js';
const mapFromDocumentChunkRow = (
row: TableRows['documentChunks'] & {
metadata: unknown;
},
): DocumentChunk => ({
...row,
});
export { mapFromDocumentChunkRow };

View File

@@ -1,33 +0,0 @@
import { z } from 'zod';
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
import { createListResultSchema } from '#root/utils/utils.schema.js';
const documentChunkSchema = z.object({
id: z.string(),
owner: z.string(),
content: z.string(),
metadata: z.unknown(),
});
type DocumentChunk = z.infer<typeof documentChunkSchema>;
const documentChunkFilterSchema = z.object({
limit: z.number().default(20),
offset: z.number().default(0),
semanticText: z.string().optional(),
conditions: z.union([queryFilterSchema, z.string()]).optional(),
});
type DocumentChunkFilter = z.infer<typeof documentChunkFilterSchema>;
const documentChunksFindResultSchema = createListResultSchema(
documentChunkSchema.extend({
distance: z.number().optional(),
}),
);
type DocumentChunksFindResult = z.infer<typeof documentChunksFindResultSchema>;
export type { DocumentChunk, DocumentChunkFilter, DocumentChunksFindResult };
export { documentChunkSchema, documentChunkFilterSchema, documentChunksFindResultSchema };

View File

@@ -1,66 +0,0 @@
import { QueryParser } from '@morten-olsen/stash-query-dsl';
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
import { EmbeddingsService } from '../embeddings/embeddings.js';
import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.js';
import { mapFromDocumentChunkRow } from './document-chunks.mappings.js';
import type { Services } from '#root/utils/utils.services.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
import type { ExplicitAny } from '#root/global.js';
import { applyQueryFilter } from '#root/utils/utils.query.js';
const baseFields = [
`${tableNames.documentChunks}.*`,
`${tableNames.documents}.metadata`,
`${tableNames.documents}.createdAt`,
];
class DocumentChunksService {
#services: Services;
constructor(services: Services) {
this.#services = services;
}
public find = async (filter: DocumentChunkFilter): Promise<DocumentChunksFindResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
let query = db<TableRows['documentChunks']>(tableNames.documentChunks);
query.join(tableNames.documents, `${tableNames.documents}.id`, `${tableNames.documentChunks}.owner`);
if (filter.semanticText) {
const embedding = this.#services.get(EmbeddingsService);
const [vector] = await embedding.extract({
input: [filter.semanticText],
model: EMBEDDING_MODEL,
});
query = query.select(...baseFields, db.raw(`embedding <=> '${vector.toSql()}' as distance`));
query = query.where(`${tableNames.documentChunks}.embeddingModel`, EMBEDDING_MODEL);
query = query.orderBy('distance', 'asc');
} else {
query = query.select(baseFields);
query = query.orderBy('createdAt', 'desc');
}
if (filter.conditions) {
const parser = this.#services.get(QueryParser);
query = applyQueryFilter(
query,
typeof filter.conditions === 'string' ? parser.parse(filter.conditions) : filter.conditions,
);
}
query = query.limit(filter.limit).offset(filter.offset);
const items = await query;
return {
items: items.map(mapFromDocumentChunkRow as ExplicitAny),
};
};
}
export * from './document-chunks.schemas.js';
export { DocumentChunksService };

View File

@@ -1,12 +0,0 @@
import type { TableRows } from '../database/database.js';
import type { Document } from './documents.schemas.js';
const mapFromDocumentRow = (row: TableRows['documents']): Document => ({
...row,
createdAt: row.createdAt.toISOString(),
updatedAt: row.updatedAt.toISOString(),
deletedAt: row.deletedAt?.toISOString() || null,
});
export { mapFromDocumentRow };

View File

@@ -1,80 +0,0 @@
import { z } from 'zod';
import { queryFilterSchema } from '@morten-olsen/stash-query-dsl';
import { createListResultSchema } from '#root/utils/utils.schema.js';
const documentSchema = z.object({
id: z.string(),
owner: z.string().nullable(),
createdAt: z.iso.datetime(),
updatedAt: z.iso.datetime(),
deletedAt: z.iso.datetime().nullable(),
contentType: z.string().nullable(),
content: z.string().nullable(),
source: z.string().nullable(),
sourceId: z.string().nullable(),
type: z.string(),
typeVersion: z.int().nullable(),
searchText: z.string().nullable(),
metadata: z.unknown(),
});
type Document = z.infer<typeof documentSchema>;
const documentUpsertSchema = z
.object({
id: z.string().nullish(),
owner: z.string().nullish(),
contentType: z.string().nullish(),
content: z.string().nullish(),
source: z.string().nullish(),
sourceId: z.string().nullish(),
type: z.string().optional(),
typeVersion: z.int().nullish(),
searchText: z.string().nullish(),
metadata: z.unknown().nullish(),
})
.meta({
example: {
content: 'the cat is yellow',
contentType: 'text/plain',
source: 'test',
sourceId: 'test',
type: 'raw',
metadata: {
foo: 'bar',
bar: 'baz',
},
},
});
type DocumentUpsert = z.infer<typeof documentUpsertSchema>;
const documentUpsertResultSchema = z.object({
action: z.enum(['inserted', 'updated', 'skipped']),
id: z.string(),
document: documentSchema,
});
type DocumentUpsertResult = z.infer<typeof documentUpsertResultSchema>;
const documentFilterSchema = z.object({
offset: z.number().default(0),
limit: z.number().default(20),
condition: z.union([queryFilterSchema, z.string()]),
});
type DocumentFilter = z.infer<typeof documentFilterSchema>;
const documentFindResultSchema = createListResultSchema(documentSchema);
type DocumentFindResult = z.infer<typeof documentFindResultSchema>;
export type { Document, DocumentUpsert, DocumentUpsertResult, DocumentFilter, DocumentFindResult };
export {
documentSchema,
documentUpsertSchema,
documentUpsertResultSchema,
documentFilterSchema,
documentFindResultSchema,
};

View File

@@ -1,179 +0,0 @@
import { QueryParser } from '@morten-olsen/stash-query-dsl';
import { DatabaseService, tableNames, type TableRows } from '../database/database.js';
import { SplittingService } from '../splitter/splitter.js';
import type {
Document,
DocumentFilter,
DocumentFindResult,
DocumentUpsert,
DocumentUpsertResult,
} from './documents.schemas.ts';
import { mapFromDocumentRow } from './documents.mapping.js';
import { EventEmitter } from '#root/utils/utils.event-emitter.js';
import type { Services } from '#root/utils/utils.services.js';
import { compareObjectKeys } from '#root/utils/utils.compare.js';
import { applyQueryFilter } from '#root/utils/utils.query.js';
type DocumentsServiceEvents = {
upserted: (document: Document) => void;
inserted: (document: Document) => void;
updated: (document: Document) => void;
};
class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
#services: Services;
constructor(services: Services) {
super();
this.#services = services;
}
public find = async (filter: DocumentFilter): Promise<DocumentFindResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
let query = db<TableRows['documents']>(tableNames.documents);
if (filter) {
const parser = this.#services.get(QueryParser);
query = applyQueryFilter(
query,
typeof filter.condition === 'string' ? parser.parse(filter.condition) : filter.condition,
);
}
query = query.limit(filter.limit).offset(filter.offset);
const items = await query;
return {
items: items.map(mapFromDocumentRow),
};
};
public get = async (id: string): Promise<Document> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
const [item] = await db<TableRows['documents']>(tableNames.documents).where('id', id).limit(1);
return mapFromDocumentRow(item);
};
public remove = async (id: string): Promise<void> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
await db<TableRows['documents']>(tableNames.documents).where('id', id).delete();
};
public upsert = async (document: DocumentUpsert): Promise<DocumentUpsertResult> => {
const databaseService = this.#services.get(DatabaseService);
const db = await databaseService.getInstance();
const result = await db.transaction(async (trx) => {
let id = document.id || crypto.randomUUID();
if (document.source && document.sourceId) {
const [currentSourceDocument] = await trx<TableRows['documents']>(tableNames.documents)
.where('source', document.source)
.andWhere('sourceId', document.sourceId)
.limit(1);
if (currentSourceDocument) {
id = currentSourceDocument.id;
}
}
const now = new Date();
const [current] = await trx<TableRows['documents']>(tableNames.documents).where('id', id).limit(1);
if (current) {
if (
compareObjectKeys(current, document, [
'sourceId',
'source',
'content',
'contentType',
'searchText',
'type',
'typeVersion',
'metadata',
])
) {
return {
id,
action: 'skipped',
document: mapFromDocumentRow(current),
} as const;
}
await trx<TableRows['documents']>(tableNames.documents)
.update({
...document,
id,
updatedAt: now,
})
.where('id', id);
const resultDocument: Document = mapFromDocumentRow({
...current,
...document,
id,
});
this.emit('updated', resultDocument);
this.emit('upserted', resultDocument);
return {
id,
action: 'updated',
document: resultDocument,
} as const;
} else {
await trx<TableRows['documents']>(tableNames.documents).insert({
metadata: {},
type: 'raw',
...document,
id,
createdAt: now,
updatedAt: now,
});
const resultDocument: Document = mapFromDocumentRow({
type: 'raw',
owner: null,
contentType: null,
content: null,
source: null,
sourceId: null,
typeVersion: null,
searchText: null,
metadata: {},
...document,
deletedAt: null,
id,
createdAt: now,
updatedAt: now,
});
this.emit('inserted', resultDocument);
this.emit('upserted', resultDocument);
return {
id,
action: 'inserted',
document: resultDocument,
} as const;
}
});
if (result.action !== 'skipped') {
await db.transaction(async (trx) => {
await trx<TableRows['documentChunks']>(tableNames.documentChunks).delete().where('owner', result.id);
const splittingService = this.#services.get(SplittingService);
const chunks = await splittingService.chunk(result.document);
if (chunks.length > 0) {
await trx<TableRows['documentChunks']>(tableNames.documentChunks).insert(
chunks.map((chunk) => ({
id: crypto.randomUUID(),
owner: result.id,
content: chunk.content,
embedding: chunk.vector.toSql(),
embeddingModel: chunk.model,
})),
);
}
});
}
return result;
};
}
export * from './documents.schemas.js';
export { DocumentsService };

View File

@@ -1,62 +0,0 @@
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import { Vector } from './embeddings.vector.js';
import type { ExplicitAny } from '#root/global.js';
type ExtractOptions = {
input: string[];
model: string;
};
type Extractor = {
extractor: FeatureExtractionPipeline;
dimensions: number;
};
class EmbeddingsService {
#extractors = new Map<string, Promise<Extractor>>();
#setupExctractor = async (model: string) => {
const extractor = await pipeline('feature-extraction', model, {});
const { config } = extractor.model;
if (!('hidden_size' in config) || typeof config.hidden_size !== 'number') {
throw new Error('Invalid model configuration');
}
return {
extractor,
dimensions: config.hidden_size,
};
};
#getExtractor = async (name: string) => {
if (!this.#extractors.has(name)) {
this.#extractors.set(name, this.#setupExctractor(name));
}
const extractor = await this.#extractors.get(name);
if (!extractor) {
throw new Error('Extractor not found');
}
return extractor;
};
public extract = async (options: ExtractOptions) => {
const { input, model } = options;
const { extractor, dimensions } = await this.#getExtractor(model);
const output = await extractor(input, { pooling: 'cls' });
return output.tolist().map((v: ExplicitAny) => new Vector(v, dimensions));
};
public getDimensions = async (model: string) => {
const { dimensions } = await this.#getExtractor(model);
return dimensions;
};
public getFieldType = async (model: string) => {
const dimensions = await this.getDimensions(model);
return `vector(${dimensions})`;
};
}
export { EmbeddingsService, Vector };

View File

@@ -1,37 +0,0 @@
import { cos_sim } from '@huggingface/transformers';
import { toSql } from 'pgvector';
class Vector {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
#value: any;
#dimentions: number;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
constructor(value: any, dimentions: number) {
this.#value = value;
this.#dimentions = dimentions;
}
public get value() {
return this.#value;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
public set value(value: any) {
this.#value = value;
}
public get dimentions() {
return this.#dimentions;
}
public toSql = () => {
return toSql(this.#value);
};
public distanceTo = (other: Vector) => {
return cos_sim(this.#value, other.value);
};
}
export { Vector };

View File

@@ -1,44 +0,0 @@
import { EmbeddingsService } from '../embeddings/embeddings.js';
import type { Document } from '../documents/documents.schemas.js';
import type { Chunk, Splitter } from './splitter.types.js';
import { textSplitter } from './splitters/splitters.text.js';
import type { Services } from '#root/utils/utils.services.js';
import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js';
class SplittingService {
#services: Services;
#chunkers: Set<Splitter>;
constructor(services: Services) {
this.#services = services;
this.#chunkers = new Set();
this.addChunkers([textSplitter]);
}
public addChunkers = (splitter: Splitter[]) => {
this.#chunkers = this.#chunkers.union(new Set(splitter));
};
public chunk = async (input: Document): Promise<Chunk[]> => {
const splitter = this.#chunkers.values().find((splitter) => splitter.match(input));
if (!splitter) {
return [];
}
const chunks = await splitter.chunk(input);
const embeddingsService = this.#services.get(EmbeddingsService);
const vectors = await embeddingsService.extract({
input: chunks,
model: EMBEDDING_MODEL,
});
return chunks.map((content, index) => ({
content,
vector: vectors[index],
model: EMBEDDING_MODEL,
}));
};
}
export * from './splitter.types.js';
export { SplittingService };

View File

@@ -1,15 +0,0 @@
import type { Document } from '../documents/documents.schemas.js';
import type { Vector } from '../embeddings/embeddings.vector.js';
type Chunk = {
content: string;
vector: Vector;
model: string;
};
type Splitter = {
match: (document: Document) => boolean;
chunk: (document: Document) => Promise<string[]>;
};
export type { Chunk, Splitter };

View File

@@ -1,17 +0,0 @@
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import type { Splitter } from '../splitter.types.js';
const textSplitter: Splitter = {
match: (document) => !!document.content,
chunk: async (document) => {
if (!document.content) {
return [];
}
const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 });
const texts = await splitter.splitText(document.content);
return texts;
},
};
export { textSplitter };

View File

@@ -1,17 +0,0 @@
import { DatabaseService } from '../database/database.js';
import { Services } from '#root/utils/utils.services.js';
class WarmupService {
#services: Services;
constructor(services: Services) {
this.#services = services;
}
public ensure = async () => {
const databaseService = this.#services.get(DatabaseService);
await databaseService.getInstance();
};
}
export { WarmupService };

View File

@@ -1,14 +0,0 @@
import deepEqual from 'deep-equal';
const compareObjectKeys = <T extends Record<string, unknown>>(a: T, b: T, keys: (keyof T)[]) => {
for (const key of keys) {
const avalue = a[key];
const bvalue = b[key];
if (!deepEqual(avalue, bvalue)) {
return false;
}
}
return true;
};
export { compareObjectKeys };

View File

@@ -1,3 +0,0 @@
const EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2';
export { EMBEDDING_MODEL };

View File

@@ -1,66 +0,0 @@
import type { ExplicitAny } from '#root/global.js';
type EventListener<T extends unknown[]> = (...args: T) => void | Promise<void>;
type OnOptions = {
abortSignal?: AbortSignal;
};
class EventEmitter<T extends Record<string, (...args: ExplicitAny[]) => void | Promise<void>>> {
#listeners = new Map<keyof T, Set<EventListener<ExplicitAny>>>();
on = <K extends keyof T>(event: K, callback: EventListener<Parameters<T[K]>>, options: OnOptions = {}) => {
const { abortSignal } = options;
if (!this.#listeners.has(event)) {
this.#listeners.set(event, new Set());
}
const callbackClone = (...args: Parameters<T[K]>) => callback(...args);
const abortController = new AbortController();
const listeners = this.#listeners.get(event);
if (!listeners) {
throw new Error('Event registration failed');
}
abortSignal?.addEventListener('abort', abortController.abort);
listeners.add(callbackClone);
abortController.signal.addEventListener('abort', () => {
this.#listeners.set(event, listeners?.difference(new Set([callbackClone])));
});
return abortController.abort;
};
once = <K extends keyof T>(event: K, callback: EventListener<Parameters<T[K]>>, options: OnOptions = {}) => {
const abortController = new AbortController();
options.abortSignal?.addEventListener('abort', abortController.abort);
return this.on(
event,
async (...args) => {
abortController.abort();
await callback(...args);
},
{
...options,
abortSignal: abortController.signal,
},
);
};
emit = <K extends keyof T>(event: K, ...args: Parameters<T[K]>) => {
const listeners = this.#listeners.get(event);
if (!listeners) {
return;
}
for (const listener of listeners) {
listener(...args);
}
};
emitAsync = async <K extends keyof T>(event: K, ...args: Parameters<T[K]>) => {
const listeners = this.#listeners.get(event);
if (!listeners) {
return;
}
await Promise.all(listeners.values().map((listener) => listener(...args)));
};
}
export { EventEmitter };

View File

@@ -1,161 +0,0 @@
import type {
QueryCondition,
QueryConditionNumber,
QueryConditionText,
QueryFilter,
} from '@morten-olsen/stash-query-dsl';
import { type Knex } from 'knex';
/**
* Escapes a JSON key for use in PostgreSQL JSON operators.
* Escapes single quotes by doubling them, which is the PostgreSQL standard.
*/
const escapeJsonKey = (key: string): string => {
return key.replace(/'/g, "''");
};
const getFieldSelector = (query: Knex.QueryBuilder, field: string[], tableName?: string) => {
const baseColumn = field[0];
if (field.length === 1) {
return tableName ? `${tableName}.${baseColumn}` : baseColumn;
}
const baseFieldRef = tableName ? query.client.ref(baseColumn).withSchema(tableName) : query.client.ref(baseColumn);
const jsonPath = field.slice(1);
let sqlExpression = baseFieldRef.toString();
for (let i = 0; i < jsonPath.length - 1; i++) {
const escapedKey = escapeJsonKey(jsonPath[i]);
sqlExpression += ` -> '${escapedKey}'`;
}
const finalElement = jsonPath[jsonPath.length - 1];
const escapedFinalKey = escapeJsonKey(finalElement);
sqlExpression += ` ->> '${escapedFinalKey}'`;
return query.client.raw(sqlExpression);
};
const applyQueryConditionText = (query: Knex.QueryBuilder, { field, tableName, conditions }: QueryConditionText) => {
const selector = getFieldSelector(query, field, tableName);
if (conditions.equal) {
query = query.where(selector, '=', conditions.equal);
}
if (conditions.notEqual) {
query = query.where(selector, '<>', conditions.notEqual);
}
if (conditions.like) {
query = query.whereLike(selector, conditions.like);
}
if (conditions.notLike) {
query = query.not.whereLike(selector, conditions.notLike);
}
if (conditions.equal === null) {
query = query.whereNull(selector);
}
if (conditions.notEqual === null) {
query = query.whereNotNull(selector);
}
if (conditions.in) {
query = query.whereIn(selector, conditions.in);
}
if (conditions.notIn) {
query = query.whereNotIn(selector, conditions.notIn);
}
return query;
};
const applyQueryConditionNumber = (
query: Knex.QueryBuilder,
{ field, tableName, conditions }: QueryConditionNumber,
) => {
const selector = getFieldSelector(query, field, tableName);
if (conditions.equals !== undefined && conditions.equals !== null) {
query = query.where(selector, '=', conditions.equals);
}
if (conditions.notEquals !== undefined && conditions.notEquals !== null) {
query = query.where(selector, '<>', conditions.notEquals);
}
if (conditions.equals === null) {
query = query.whereNull(selector);
}
if (conditions.notEquals === null) {
query = query.whereNotNull(selector);
}
if (conditions.greaterThan) {
query = query.where(selector, '>', conditions.greaterThan);
}
if (conditions.greaterThanOrEqual) {
query = query.where(selector, '>=', conditions.greaterThanOrEqual);
}
if (conditions.lessThan) {
query = query.where(selector, '<', conditions.lessThan);
}
if (conditions.lessThanOrEqual) {
query = query.where(selector, '<=', conditions.lessThanOrEqual);
}
if (conditions.in) {
query = query.whereIn(selector, conditions.in);
}
if (conditions.notIn) {
query = query.whereNotIn(selector, conditions.notIn);
}
return query;
};
const applyQueryCondition = (query: Knex.QueryBuilder, options: QueryCondition) => {
switch (options.type) {
case 'text': {
return applyQueryConditionText(query, options);
}
case 'number': {
return applyQueryConditionNumber(query, options);
}
default: {
throw new Error(`Unknown filter type`);
}
}
};
const applyQueryFilter = (query: Knex.QueryBuilder, filter: QueryFilter) => {
if (filter.type === 'operator') {
if (filter.conditions.length === 0) {
return query;
}
switch (filter.operator) {
case 'or': {
return query.where((subquery) => {
let isFirst = true;
for (const condition of filter.conditions) {
if (isFirst) {
applyQueryFilter(subquery, condition);
isFirst = false;
} else {
subquery.orWhere((subSubquery) => {
applyQueryFilter(subSubquery, condition);
});
}
}
});
}
case 'and': {
return query.where((subquery) => {
let isFirst = true;
for (const condition of filter.conditions) {
if (isFirst) {
applyQueryFilter(subquery, condition);
isFirst = false;
} else {
subquery.andWhere((subSubquery) => {
applyQueryFilter(subSubquery, condition);
});
}
}
});
}
}
} else {
return applyQueryCondition(query, filter);
}
};
export { applyQueryCondition, applyQueryFilter };

View File

@@ -1,8 +0,0 @@
import { z, type ZodType } from 'zod';
const createListResultSchema = <T extends ZodType>(schema: T) =>
z.object({
items: z.array(schema),
});
export { createListResultSchema };

View File

@@ -1,51 +0,0 @@
const destroy = Symbol('destroy');
const instanceKey = Symbol('instances');
type ServiceDependency<T> = new (services: Services) => T & {
[destroy]?: () => Promise<void> | void;
};
class Services {
[instanceKey]: Map<ServiceDependency<unknown>, unknown>;
constructor() {
this[instanceKey] = new Map();
}
public get = <T>(service: ServiceDependency<T>) => {
if (!this[instanceKey].has(service)) {
this[instanceKey].set(service, new service(this));
}
const instance = this[instanceKey].get(service);
if (!instance) {
throw new Error('Could not generate instance');
}
return instance as T;
};
public set = <T>(service: ServiceDependency<T>, instance: Partial<T>) => {
this[instanceKey].set(service, instance);
};
public clone = () => {
const services = new Services();
services[instanceKey] = Object.fromEntries(this[instanceKey].entries());
};
public destroy = async () => {
await Promise.all(
this[instanceKey].values().map(async (instance) => {
if (
typeof instance === 'object' &&
instance &&
destroy in instance &&
typeof instance[destroy] === 'function'
) {
await instance[destroy]();
}
}),
);
};
}
export { Services, destroy };