change document to text and binary content
This commit is contained in:
@@ -22,12 +22,12 @@ const init: Migration = {
|
||||
table.datetime('createdAt').notNullable();
|
||||
table.datetime('deletedAt').nullable();
|
||||
table.string('contentType').nullable();
|
||||
table.text('content').nullable();
|
||||
table.binary('content').nullable();
|
||||
table.text('text').nullable();
|
||||
table.string('source').nullable();
|
||||
table.string('sourceId').nullable();
|
||||
table.string('type').notNullable();
|
||||
table.integer('typeVersion').nullable();
|
||||
table.text('searchText').nullable();
|
||||
table.jsonb('metadata').nullable();
|
||||
|
||||
table.index(['source', 'sourceId']);
|
||||
@@ -74,12 +74,12 @@ type DocumentRow = {
|
||||
createdAt: Date;
|
||||
deletedAt: Date | null;
|
||||
contentType: string | null;
|
||||
content: string | null;
|
||||
content: Buffer | null;
|
||||
text: string | null;
|
||||
source: string | null;
|
||||
sourceId: string | null;
|
||||
type: string;
|
||||
typeVersion: number | null;
|
||||
searchText: string | null;
|
||||
metadata: unknown;
|
||||
};
|
||||
|
||||
|
||||
@@ -11,12 +11,11 @@ const documentSchema = z
|
||||
updatedAt: z.iso.datetime(),
|
||||
deletedAt: z.iso.datetime().nullable(),
|
||||
contentType: z.string().nullable(),
|
||||
content: z.string().nullable(),
|
||||
text: z.string().nullable(),
|
||||
source: z.string().nullable(),
|
||||
sourceId: z.string().nullable(),
|
||||
type: z.string(),
|
||||
typeVersion: z.int().nullable(),
|
||||
searchText: z.string().nullable(),
|
||||
metadata: z.unknown(),
|
||||
})
|
||||
.meta({ id: 'Document' });
|
||||
@@ -29,17 +28,17 @@ const documentUpsertSchema = z
|
||||
owner: z.string().nullish(),
|
||||
contentType: z.string().nullish(),
|
||||
content: z.string().nullish(),
|
||||
text: z.string().nullish(),
|
||||
source: z.string().nullish(),
|
||||
sourceId: z.string().nullish(),
|
||||
type: z.string().optional(),
|
||||
typeVersion: z.int().nullish(),
|
||||
searchText: z.string().nullish(),
|
||||
metadata: z.unknown().nullish(),
|
||||
})
|
||||
.meta({
|
||||
id: 'DocumentUpsert',
|
||||
example: {
|
||||
content: 'the cat is yellow',
|
||||
text: 'the cat is yellow',
|
||||
contentType: 'text/plain',
|
||||
source: 'test',
|
||||
sourceId: 'test',
|
||||
|
||||
@@ -6,6 +6,7 @@ import { EventEmitter } from '../../utils/utils.event-emitter.js';
|
||||
import type { Services } from '../../utils/utils.services.js';
|
||||
import { compareObjectKeys } from '../../utils/utils.compare.js';
|
||||
import { applyQueryFilter } from '../../utils/utils.query.js';
|
||||
import { base64ToMaybeBuffer } from '../../utils/utils.binary.js';
|
||||
|
||||
import { mapFromDocumentRow } from './documents.mapping.js';
|
||||
import type {
|
||||
@@ -85,7 +86,6 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
||||
'source',
|
||||
'content',
|
||||
'contentType',
|
||||
'searchText',
|
||||
'type',
|
||||
'typeVersion',
|
||||
'metadata',
|
||||
@@ -100,6 +100,7 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
||||
await trx<TableRows['documents']>(tableNames.documents)
|
||||
.update({
|
||||
...document,
|
||||
content: base64ToMaybeBuffer(document.content),
|
||||
id,
|
||||
updatedAt: now,
|
||||
})
|
||||
@@ -107,6 +108,7 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
||||
const resultDocument: Document = mapFromDocumentRow({
|
||||
...current,
|
||||
...document,
|
||||
content: base64ToMaybeBuffer(document.content ?? current.content) || null,
|
||||
id,
|
||||
});
|
||||
this.emit('updated', resultDocument);
|
||||
@@ -121,21 +123,22 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
||||
metadata: {},
|
||||
type: 'raw',
|
||||
...document,
|
||||
content: base64ToMaybeBuffer(document.content),
|
||||
id,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
});
|
||||
const resultDocument: Document = mapFromDocumentRow({
|
||||
type: 'raw',
|
||||
text: null,
|
||||
owner: null,
|
||||
contentType: null,
|
||||
content: null,
|
||||
source: null,
|
||||
sourceId: null,
|
||||
typeVersion: null,
|
||||
searchText: null,
|
||||
metadata: {},
|
||||
...document,
|
||||
content: base64ToMaybeBuffer(document.content) || null,
|
||||
deletedAt: null,
|
||||
id,
|
||||
createdAt: now,
|
||||
|
||||
@@ -3,13 +3,13 @@ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
||||
import type { Splitter } from '../splitter.types.js';
|
||||
|
||||
const textSplitter: Splitter = {
|
||||
match: (document) => !!document.content,
|
||||
match: (document) => !!document.text,
|
||||
chunk: async (document) => {
|
||||
if (!document.content) {
|
||||
if (!document.text) {
|
||||
return [];
|
||||
}
|
||||
const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 });
|
||||
const texts = await splitter.splitText(document.content);
|
||||
const texts = await splitter.splitText(document.text);
|
||||
return texts;
|
||||
},
|
||||
};
|
||||
|
||||
14
packages/runtime/src/utils/utils.binary.ts
Normal file
14
packages/runtime/src/utils/utils.binary.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
const base64ToMaybeBuffer = (input?: string | null | Buffer) => {
|
||||
if (input === null) {
|
||||
return input;
|
||||
}
|
||||
if (!input) {
|
||||
return undefined;
|
||||
}
|
||||
if (typeof input === 'object') {
|
||||
return input;
|
||||
}
|
||||
return Buffer.from(input, 'base64');
|
||||
};
|
||||
|
||||
export { base64ToMaybeBuffer };
|
||||
@@ -1,9 +1,13 @@
|
||||
import deepEqual from 'deep-equal';
|
||||
|
||||
const compareObjectKeys = <T extends Record<string, unknown>>(a: T, b: T, keys: (keyof T)[]) => {
|
||||
const compareObjectKeys = <A extends Record<string, unknown>, B extends Record<string, unknown>>(
|
||||
a: A,
|
||||
b: B,
|
||||
keys: (keyof (A & B))[],
|
||||
) => {
|
||||
for (const key of keys) {
|
||||
const avalue = a[key];
|
||||
const bvalue = b[key];
|
||||
const avalue = a[key as keyof A];
|
||||
const bvalue = b[key as keyof B];
|
||||
if (!deepEqual(avalue, bvalue)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user