change document to text and binary content
Some checks failed
Build and release / Build (push) Failing after 1m12s
Build and release / update-release-draft (push) Has been skipped
Build and release / Release (push) Has been skipped

This commit is contained in:
Morten Olsen
2025-12-10 22:06:15 +01:00
parent 25f614a730
commit 1255639058
7 changed files with 40 additions and 20 deletions

View File

@@ -22,12 +22,12 @@ const init: Migration = {
table.datetime('createdAt').notNullable(); table.datetime('createdAt').notNullable();
table.datetime('deletedAt').nullable(); table.datetime('deletedAt').nullable();
table.string('contentType').nullable(); table.string('contentType').nullable();
table.text('content').nullable(); table.binary('content').nullable();
table.text('text').nullable();
table.string('source').nullable(); table.string('source').nullable();
table.string('sourceId').nullable(); table.string('sourceId').nullable();
table.string('type').notNullable(); table.string('type').notNullable();
table.integer('typeVersion').nullable(); table.integer('typeVersion').nullable();
table.text('searchText').nullable();
table.jsonb('metadata').nullable(); table.jsonb('metadata').nullable();
table.index(['source', 'sourceId']); table.index(['source', 'sourceId']);
@@ -74,12 +74,12 @@ type DocumentRow = {
createdAt: Date; createdAt: Date;
deletedAt: Date | null; deletedAt: Date | null;
contentType: string | null; contentType: string | null;
content: string | null; content: Buffer | null;
text: string | null;
source: string | null; source: string | null;
sourceId: string | null; sourceId: string | null;
type: string; type: string;
typeVersion: number | null; typeVersion: number | null;
searchText: string | null;
metadata: unknown; metadata: unknown;
}; };

View File

@@ -11,12 +11,11 @@ const documentSchema = z
updatedAt: z.iso.datetime(), updatedAt: z.iso.datetime(),
deletedAt: z.iso.datetime().nullable(), deletedAt: z.iso.datetime().nullable(),
contentType: z.string().nullable(), contentType: z.string().nullable(),
content: z.string().nullable(), text: z.string().nullable(),
source: z.string().nullable(), source: z.string().nullable(),
sourceId: z.string().nullable(), sourceId: z.string().nullable(),
type: z.string(), type: z.string(),
typeVersion: z.int().nullable(), typeVersion: z.int().nullable(),
searchText: z.string().nullable(),
metadata: z.unknown(), metadata: z.unknown(),
}) })
.meta({ id: 'Document' }); .meta({ id: 'Document' });
@@ -29,17 +28,17 @@ const documentUpsertSchema = z
owner: z.string().nullish(), owner: z.string().nullish(),
contentType: z.string().nullish(), contentType: z.string().nullish(),
content: z.string().nullish(), content: z.string().nullish(),
text: z.string().nullish(),
source: z.string().nullish(), source: z.string().nullish(),
sourceId: z.string().nullish(), sourceId: z.string().nullish(),
type: z.string().optional(), type: z.string().optional(),
typeVersion: z.int().nullish(), typeVersion: z.int().nullish(),
searchText: z.string().nullish(),
metadata: z.unknown().nullish(), metadata: z.unknown().nullish(),
}) })
.meta({ .meta({
id: 'DocumentUpsert', id: 'DocumentUpsert',
example: { example: {
content: 'the cat is yellow', text: 'the cat is yellow',
contentType: 'text/plain', contentType: 'text/plain',
source: 'test', source: 'test',
sourceId: 'test', sourceId: 'test',

View File

@@ -6,6 +6,7 @@ import { EventEmitter } from '../../utils/utils.event-emitter.js';
import type { Services } from '../../utils/utils.services.js'; import type { Services } from '../../utils/utils.services.js';
import { compareObjectKeys } from '../../utils/utils.compare.js'; import { compareObjectKeys } from '../../utils/utils.compare.js';
import { applyQueryFilter } from '../../utils/utils.query.js'; import { applyQueryFilter } from '../../utils/utils.query.js';
import { base64ToMaybeBuffer } from '../../utils/utils.binary.js';
import { mapFromDocumentRow } from './documents.mapping.js'; import { mapFromDocumentRow } from './documents.mapping.js';
import type { import type {
@@ -85,7 +86,6 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
'source', 'source',
'content', 'content',
'contentType', 'contentType',
'searchText',
'type', 'type',
'typeVersion', 'typeVersion',
'metadata', 'metadata',
@@ -100,6 +100,7 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
await trx<TableRows['documents']>(tableNames.documents) await trx<TableRows['documents']>(tableNames.documents)
.update({ .update({
...document, ...document,
content: base64ToMaybeBuffer(document.content),
id, id,
updatedAt: now, updatedAt: now,
}) })
@@ -107,6 +108,7 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
const resultDocument: Document = mapFromDocumentRow({ const resultDocument: Document = mapFromDocumentRow({
...current, ...current,
...document, ...document,
content: base64ToMaybeBuffer(document.content ?? current.content) || null,
id, id,
}); });
this.emit('updated', resultDocument); this.emit('updated', resultDocument);
@@ -121,21 +123,22 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
metadata: {}, metadata: {},
type: 'raw', type: 'raw',
...document, ...document,
content: base64ToMaybeBuffer(document.content),
id, id,
createdAt: now, createdAt: now,
updatedAt: now, updatedAt: now,
}); });
const resultDocument: Document = mapFromDocumentRow({ const resultDocument: Document = mapFromDocumentRow({
type: 'raw', type: 'raw',
text: null,
owner: null, owner: null,
contentType: null, contentType: null,
content: null,
source: null, source: null,
sourceId: null, sourceId: null,
typeVersion: null, typeVersion: null,
searchText: null,
metadata: {}, metadata: {},
...document, ...document,
content: base64ToMaybeBuffer(document.content) || null,
deletedAt: null, deletedAt: null,
id, id,
createdAt: now, createdAt: now,

View File

@@ -3,13 +3,13 @@ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import type { Splitter } from '../splitter.types.js'; import type { Splitter } from '../splitter.types.js';
const textSplitter: Splitter = { const textSplitter: Splitter = {
match: (document) => !!document.content, match: (document) => !!document.text,
chunk: async (document) => { chunk: async (document) => {
if (!document.content) { if (!document.text) {
return []; return [];
} }
const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 }); const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 });
const texts = await splitter.splitText(document.content); const texts = await splitter.splitText(document.text);
return texts; return texts;
}, },
}; };

View File

@@ -0,0 +1,14 @@
const base64ToMaybeBuffer = (input?: string | null | Buffer) => {
if (input === null) {
return input;
}
if (!input) {
return undefined;
}
if (typeof input === 'object') {
return input;
}
return Buffer.from(input, 'base64');
};
export { base64ToMaybeBuffer };

View File

@@ -1,9 +1,13 @@
import deepEqual from 'deep-equal'; import deepEqual from 'deep-equal';
const compareObjectKeys = <T extends Record<string, unknown>>(a: T, b: T, keys: (keyof T)[]) => { const compareObjectKeys = <A extends Record<string, unknown>, B extends Record<string, unknown>>(
a: A,
b: B,
keys: (keyof (A & B))[],
) => {
for (const key of keys) { for (const key of keys) {
const avalue = a[key]; const avalue = a[key as keyof A];
const bvalue = b[key]; const bvalue = b[key as keyof B];
if (!deepEqual(avalue, bvalue)) { if (!deepEqual(avalue, bvalue)) {
return false; return false;
} }

View File

@@ -11,21 +11,21 @@ const documents: DocumentUpsert[] = [
foo: 'bar', foo: 'bar',
}, },
type: 'demo', type: 'demo',
content: 'the cat is yellow', text: 'the cat is yellow',
}, },
{ {
metadata: { metadata: {
foo: 'bar', foo: 'bar',
}, },
type: 'demo', type: 'demo',
content: 'the dog is blue', text: 'the dog is blue',
}, },
{ {
metadata: { metadata: {
foo: 'baz', foo: 'baz',
}, },
source: 'test', source: 'test',
content: 'the pig says hi', text: 'the pig says hi',
type: 'demo', type: 'demo',
}, },
]; ];