diff --git a/packages/runtime/src/services/database/migrations/migrations.001-init.ts b/packages/runtime/src/services/database/migrations/migrations.001-init.ts index 46c83f1..19b3594 100644 --- a/packages/runtime/src/services/database/migrations/migrations.001-init.ts +++ b/packages/runtime/src/services/database/migrations/migrations.001-init.ts @@ -22,12 +22,12 @@ const init: Migration = { table.datetime('createdAt').notNullable(); table.datetime('deletedAt').nullable(); table.string('contentType').nullable(); - table.text('content').nullable(); + table.binary('content').nullable(); + table.text('text').nullable(); table.string('source').nullable(); table.string('sourceId').nullable(); table.string('type').notNullable(); table.integer('typeVersion').nullable(); - table.text('searchText').nullable(); table.jsonb('metadata').nullable(); table.index(['source', 'sourceId']); @@ -74,12 +74,12 @@ type DocumentRow = { createdAt: Date; deletedAt: Date | null; contentType: string | null; - content: string | null; + content: Buffer | null; + text: string | null; source: string | null; sourceId: string | null; type: string; typeVersion: number | null; - searchText: string | null; metadata: unknown; }; diff --git a/packages/runtime/src/services/documents/documents.schemas.ts b/packages/runtime/src/services/documents/documents.schemas.ts index c12001c..db749f4 100644 --- a/packages/runtime/src/services/documents/documents.schemas.ts +++ b/packages/runtime/src/services/documents/documents.schemas.ts @@ -11,12 +11,11 @@ const documentSchema = z updatedAt: z.iso.datetime(), deletedAt: z.iso.datetime().nullable(), contentType: z.string().nullable(), - content: z.string().nullable(), + text: z.string().nullable(), source: z.string().nullable(), sourceId: z.string().nullable(), type: z.string(), typeVersion: z.int().nullable(), - searchText: z.string().nullable(), metadata: z.unknown(), }) .meta({ id: 'Document' }); @@ -29,17 +28,17 @@ const documentUpsertSchema = z owner: z.string().nullish(), contentType: z.string().nullish(), content: z.string().nullish(), + text: z.string().nullish(), source: z.string().nullish(), sourceId: z.string().nullish(), type: z.string().optional(), typeVersion: z.int().nullish(), - searchText: z.string().nullish(), metadata: z.unknown().nullish(), }) .meta({ id: 'DocumentUpsert', example: { - content: 'the cat is yellow', + text: 'the cat is yellow', contentType: 'text/plain', source: 'test', sourceId: 'test', diff --git a/packages/runtime/src/services/documents/documents.ts b/packages/runtime/src/services/documents/documents.ts index ef0332b..e24ba8b 100644 --- a/packages/runtime/src/services/documents/documents.ts +++ b/packages/runtime/src/services/documents/documents.ts @@ -6,6 +6,7 @@ import { EventEmitter } from '../../utils/utils.event-emitter.js'; import type { Services } from '../../utils/utils.services.js'; import { compareObjectKeys } from '../../utils/utils.compare.js'; import { applyQueryFilter } from '../../utils/utils.query.js'; +import { base64ToMaybeBuffer } from '../../utils/utils.binary.js'; import { mapFromDocumentRow } from './documents.mapping.js'; import type { @@ -85,7 +86,6 @@ class DocumentsService extends EventEmitter { 'source', 'content', 'contentType', - 'searchText', 'type', 'typeVersion', 'metadata', @@ -100,6 +100,7 @@ class DocumentsService extends EventEmitter { await trx(tableNames.documents) .update({ ...document, + content: base64ToMaybeBuffer(document.content), id, updatedAt: now, }) @@ -107,6 +108,7 @@ class DocumentsService extends EventEmitter { const resultDocument: Document = mapFromDocumentRow({ ...current, ...document, + content: base64ToMaybeBuffer(document.content ?? current.content) || null, id, }); this.emit('updated', resultDocument); @@ -121,21 +123,22 @@ class DocumentsService extends EventEmitter { metadata: {}, type: 'raw', ...document, + content: base64ToMaybeBuffer(document.content), id, createdAt: now, updatedAt: now, }); const resultDocument: Document = mapFromDocumentRow({ type: 'raw', + text: null, owner: null, contentType: null, - content: null, source: null, sourceId: null, typeVersion: null, - searchText: null, metadata: {}, ...document, + content: base64ToMaybeBuffer(document.content) || null, deletedAt: null, id, createdAt: now, diff --git a/packages/runtime/src/services/splitter/splitters/splitters.text.ts b/packages/runtime/src/services/splitter/splitters/splitters.text.ts index b1841f9..23422d2 100644 --- a/packages/runtime/src/services/splitter/splitters/splitters.text.ts +++ b/packages/runtime/src/services/splitter/splitters/splitters.text.ts @@ -3,13 +3,13 @@ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; import type { Splitter } from '../splitter.types.js'; const textSplitter: Splitter = { - match: (document) => !!document.content, + match: (document) => !!document.text, chunk: async (document) => { - if (!document.content) { + if (!document.text) { return []; } const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 }); - const texts = await splitter.splitText(document.content); + const texts = await splitter.splitText(document.text); return texts; }, }; diff --git a/packages/runtime/src/utils/utils.binary.ts b/packages/runtime/src/utils/utils.binary.ts new file mode 100644 index 0000000..d7b1d03 --- /dev/null +++ b/packages/runtime/src/utils/utils.binary.ts @@ -0,0 +1,14 @@ +const base64ToMaybeBuffer = (input?: string | null | Buffer) => { + if (input === null) { + return input; + } + if (!input) { + return undefined; + } + if (typeof input === 'object') { + return input; + } + return Buffer.from(input, 'base64'); +}; + +export { base64ToMaybeBuffer }; diff --git a/packages/runtime/src/utils/utils.compare.ts b/packages/runtime/src/utils/utils.compare.ts index a2e095f..61c0752 100644 --- a/packages/runtime/src/utils/utils.compare.ts +++ b/packages/runtime/src/utils/utils.compare.ts @@ -1,9 +1,13 @@ import deepEqual from 'deep-equal'; -const compareObjectKeys = >(a: T, b: T, keys: (keyof T)[]) => { +const compareObjectKeys = , B extends Record>( + a: A, + b: B, + keys: (keyof (A & B))[], +) => { for (const key of keys) { - const avalue = a[key]; - const bvalue = b[key]; + const avalue = a[key as keyof A]; + const bvalue = b[key as keyof B]; if (!deepEqual(avalue, bvalue)) { return false; } diff --git a/packages/server/src/dev.ts b/packages/server/src/dev.ts index 00b57f1..87ab23e 100644 --- a/packages/server/src/dev.ts +++ b/packages/server/src/dev.ts @@ -11,21 +11,21 @@ const documents: DocumentUpsert[] = [ foo: 'bar', }, type: 'demo', - content: 'the cat is yellow', + text: 'the cat is yellow', }, { metadata: { foo: 'bar', }, type: 'demo', - content: 'the dog is blue', + text: 'the dog is blue', }, { metadata: { foo: 'baz', }, source: 'test', - content: 'the pig says hi', + text: 'the pig says hi', type: 'demo', }, ];