change document to text and binary content
This commit is contained in:
@@ -22,12 +22,12 @@ const init: Migration = {
|
|||||||
table.datetime('createdAt').notNullable();
|
table.datetime('createdAt').notNullable();
|
||||||
table.datetime('deletedAt').nullable();
|
table.datetime('deletedAt').nullable();
|
||||||
table.string('contentType').nullable();
|
table.string('contentType').nullable();
|
||||||
table.text('content').nullable();
|
table.binary('content').nullable();
|
||||||
|
table.text('text').nullable();
|
||||||
table.string('source').nullable();
|
table.string('source').nullable();
|
||||||
table.string('sourceId').nullable();
|
table.string('sourceId').nullable();
|
||||||
table.string('type').notNullable();
|
table.string('type').notNullable();
|
||||||
table.integer('typeVersion').nullable();
|
table.integer('typeVersion').nullable();
|
||||||
table.text('searchText').nullable();
|
|
||||||
table.jsonb('metadata').nullable();
|
table.jsonb('metadata').nullable();
|
||||||
|
|
||||||
table.index(['source', 'sourceId']);
|
table.index(['source', 'sourceId']);
|
||||||
@@ -74,12 +74,12 @@ type DocumentRow = {
|
|||||||
createdAt: Date;
|
createdAt: Date;
|
||||||
deletedAt: Date | null;
|
deletedAt: Date | null;
|
||||||
contentType: string | null;
|
contentType: string | null;
|
||||||
content: string | null;
|
content: Buffer | null;
|
||||||
|
text: string | null;
|
||||||
source: string | null;
|
source: string | null;
|
||||||
sourceId: string | null;
|
sourceId: string | null;
|
||||||
type: string;
|
type: string;
|
||||||
typeVersion: number | null;
|
typeVersion: number | null;
|
||||||
searchText: string | null;
|
|
||||||
metadata: unknown;
|
metadata: unknown;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -11,12 +11,11 @@ const documentSchema = z
|
|||||||
updatedAt: z.iso.datetime(),
|
updatedAt: z.iso.datetime(),
|
||||||
deletedAt: z.iso.datetime().nullable(),
|
deletedAt: z.iso.datetime().nullable(),
|
||||||
contentType: z.string().nullable(),
|
contentType: z.string().nullable(),
|
||||||
content: z.string().nullable(),
|
text: z.string().nullable(),
|
||||||
source: z.string().nullable(),
|
source: z.string().nullable(),
|
||||||
sourceId: z.string().nullable(),
|
sourceId: z.string().nullable(),
|
||||||
type: z.string(),
|
type: z.string(),
|
||||||
typeVersion: z.int().nullable(),
|
typeVersion: z.int().nullable(),
|
||||||
searchText: z.string().nullable(),
|
|
||||||
metadata: z.unknown(),
|
metadata: z.unknown(),
|
||||||
})
|
})
|
||||||
.meta({ id: 'Document' });
|
.meta({ id: 'Document' });
|
||||||
@@ -29,17 +28,17 @@ const documentUpsertSchema = z
|
|||||||
owner: z.string().nullish(),
|
owner: z.string().nullish(),
|
||||||
contentType: z.string().nullish(),
|
contentType: z.string().nullish(),
|
||||||
content: z.string().nullish(),
|
content: z.string().nullish(),
|
||||||
|
text: z.string().nullish(),
|
||||||
source: z.string().nullish(),
|
source: z.string().nullish(),
|
||||||
sourceId: z.string().nullish(),
|
sourceId: z.string().nullish(),
|
||||||
type: z.string().optional(),
|
type: z.string().optional(),
|
||||||
typeVersion: z.int().nullish(),
|
typeVersion: z.int().nullish(),
|
||||||
searchText: z.string().nullish(),
|
|
||||||
metadata: z.unknown().nullish(),
|
metadata: z.unknown().nullish(),
|
||||||
})
|
})
|
||||||
.meta({
|
.meta({
|
||||||
id: 'DocumentUpsert',
|
id: 'DocumentUpsert',
|
||||||
example: {
|
example: {
|
||||||
content: 'the cat is yellow',
|
text: 'the cat is yellow',
|
||||||
contentType: 'text/plain',
|
contentType: 'text/plain',
|
||||||
source: 'test',
|
source: 'test',
|
||||||
sourceId: 'test',
|
sourceId: 'test',
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import { EventEmitter } from '../../utils/utils.event-emitter.js';
|
|||||||
import type { Services } from '../../utils/utils.services.js';
|
import type { Services } from '../../utils/utils.services.js';
|
||||||
import { compareObjectKeys } from '../../utils/utils.compare.js';
|
import { compareObjectKeys } from '../../utils/utils.compare.js';
|
||||||
import { applyQueryFilter } from '../../utils/utils.query.js';
|
import { applyQueryFilter } from '../../utils/utils.query.js';
|
||||||
|
import { base64ToMaybeBuffer } from '../../utils/utils.binary.js';
|
||||||
|
|
||||||
import { mapFromDocumentRow } from './documents.mapping.js';
|
import { mapFromDocumentRow } from './documents.mapping.js';
|
||||||
import type {
|
import type {
|
||||||
@@ -85,7 +86,6 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
|||||||
'source',
|
'source',
|
||||||
'content',
|
'content',
|
||||||
'contentType',
|
'contentType',
|
||||||
'searchText',
|
|
||||||
'type',
|
'type',
|
||||||
'typeVersion',
|
'typeVersion',
|
||||||
'metadata',
|
'metadata',
|
||||||
@@ -100,6 +100,7 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
|||||||
await trx<TableRows['documents']>(tableNames.documents)
|
await trx<TableRows['documents']>(tableNames.documents)
|
||||||
.update({
|
.update({
|
||||||
...document,
|
...document,
|
||||||
|
content: base64ToMaybeBuffer(document.content),
|
||||||
id,
|
id,
|
||||||
updatedAt: now,
|
updatedAt: now,
|
||||||
})
|
})
|
||||||
@@ -107,6 +108,7 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
|||||||
const resultDocument: Document = mapFromDocumentRow({
|
const resultDocument: Document = mapFromDocumentRow({
|
||||||
...current,
|
...current,
|
||||||
...document,
|
...document,
|
||||||
|
content: base64ToMaybeBuffer(document.content ?? current.content) || null,
|
||||||
id,
|
id,
|
||||||
});
|
});
|
||||||
this.emit('updated', resultDocument);
|
this.emit('updated', resultDocument);
|
||||||
@@ -121,21 +123,22 @@ class DocumentsService extends EventEmitter<DocumentsServiceEvents> {
|
|||||||
metadata: {},
|
metadata: {},
|
||||||
type: 'raw',
|
type: 'raw',
|
||||||
...document,
|
...document,
|
||||||
|
content: base64ToMaybeBuffer(document.content),
|
||||||
id,
|
id,
|
||||||
createdAt: now,
|
createdAt: now,
|
||||||
updatedAt: now,
|
updatedAt: now,
|
||||||
});
|
});
|
||||||
const resultDocument: Document = mapFromDocumentRow({
|
const resultDocument: Document = mapFromDocumentRow({
|
||||||
type: 'raw',
|
type: 'raw',
|
||||||
|
text: null,
|
||||||
owner: null,
|
owner: null,
|
||||||
contentType: null,
|
contentType: null,
|
||||||
content: null,
|
|
||||||
source: null,
|
source: null,
|
||||||
sourceId: null,
|
sourceId: null,
|
||||||
typeVersion: null,
|
typeVersion: null,
|
||||||
searchText: null,
|
|
||||||
metadata: {},
|
metadata: {},
|
||||||
...document,
|
...document,
|
||||||
|
content: base64ToMaybeBuffer(document.content) || null,
|
||||||
deletedAt: null,
|
deletedAt: null,
|
||||||
id,
|
id,
|
||||||
createdAt: now,
|
createdAt: now,
|
||||||
|
|||||||
@@ -3,13 +3,13 @@ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|||||||
import type { Splitter } from '../splitter.types.js';
|
import type { Splitter } from '../splitter.types.js';
|
||||||
|
|
||||||
const textSplitter: Splitter = {
|
const textSplitter: Splitter = {
|
||||||
match: (document) => !!document.content,
|
match: (document) => !!document.text,
|
||||||
chunk: async (document) => {
|
chunk: async (document) => {
|
||||||
if (!document.content) {
|
if (!document.text) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 });
|
const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 100, chunkOverlap: 0 });
|
||||||
const texts = await splitter.splitText(document.content);
|
const texts = await splitter.splitText(document.text);
|
||||||
return texts;
|
return texts;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|||||||
14
packages/runtime/src/utils/utils.binary.ts
Normal file
14
packages/runtime/src/utils/utils.binary.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
const base64ToMaybeBuffer = (input?: string | null | Buffer) => {
|
||||||
|
if (input === null) {
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
if (!input) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
if (typeof input === 'object') {
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
return Buffer.from(input, 'base64');
|
||||||
|
};
|
||||||
|
|
||||||
|
export { base64ToMaybeBuffer };
|
||||||
@@ -1,9 +1,13 @@
|
|||||||
import deepEqual from 'deep-equal';
|
import deepEqual from 'deep-equal';
|
||||||
|
|
||||||
const compareObjectKeys = <T extends Record<string, unknown>>(a: T, b: T, keys: (keyof T)[]) => {
|
const compareObjectKeys = <A extends Record<string, unknown>, B extends Record<string, unknown>>(
|
||||||
|
a: A,
|
||||||
|
b: B,
|
||||||
|
keys: (keyof (A & B))[],
|
||||||
|
) => {
|
||||||
for (const key of keys) {
|
for (const key of keys) {
|
||||||
const avalue = a[key];
|
const avalue = a[key as keyof A];
|
||||||
const bvalue = b[key];
|
const bvalue = b[key as keyof B];
|
||||||
if (!deepEqual(avalue, bvalue)) {
|
if (!deepEqual(avalue, bvalue)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,21 +11,21 @@ const documents: DocumentUpsert[] = [
|
|||||||
foo: 'bar',
|
foo: 'bar',
|
||||||
},
|
},
|
||||||
type: 'demo',
|
type: 'demo',
|
||||||
content: 'the cat is yellow',
|
text: 'the cat is yellow',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
metadata: {
|
metadata: {
|
||||||
foo: 'bar',
|
foo: 'bar',
|
||||||
},
|
},
|
||||||
type: 'demo',
|
type: 'demo',
|
||||||
content: 'the dog is blue',
|
text: 'the dog is blue',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
metadata: {
|
metadata: {
|
||||||
foo: 'baz',
|
foo: 'baz',
|
||||||
},
|
},
|
||||||
source: 'test',
|
source: 'test',
|
||||||
content: 'the pig says hi',
|
text: 'the pig says hi',
|
||||||
type: 'demo',
|
type: 'demo',
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|||||||
Reference in New Issue
Block a user