diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..31994d8 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +node_modules/ +packages/*/dist/ diff --git a/.github/release-drafter-config.yml b/.github/release-drafter-config.yml new file mode 100644 index 0000000..748f0f5 --- /dev/null +++ b/.github/release-drafter-config.yml @@ -0,0 +1,48 @@ +name-template: "$RESOLVED_VERSION 🌈" +tag-template: "$RESOLVED_VERSION" +categories: + - title: "🚀 Features" + labels: + - "feature" + - "enhancement" + - title: "🐛 Bug Fixes" + labels: + - "fix" + - "bugfix" + - "bug" + - title: "🧰 Maintenance" + label: "chore" +change-template: "- $TITLE @$AUTHOR (#$NUMBER)" +change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. +version-resolver: + major: + labels: + - "major" + minor: + labels: + - "minor" + patch: + labels: + - "patch" + default: patch +autolabeler: + - label: "chore" + files: + - "*.md" + branch: + - '/docs{0,1}\/.+/' + - label: "bug" + branch: + - '/fix\/.+/' + title: + - "/fix/i" + - label: "enhancement" + branch: + - '/feature\/.+/' + - '/feat\/.+/' + title: + - "/feat:.+/" +template: | + ## Changes + + $CHANGES diff --git "a/.github/workflows/auto-labeler.yaml\nauto-labeler.yaml\nauto-labeler.yaml\nauto-labeler.yaml" "b/.github/workflows/auto-labeler.yaml\nauto-labeler.yaml\nauto-labeler.yaml\nauto-labeler.yaml" new file mode 100644 index 0000000..f134cf8 --- /dev/null +++ "b/.github/workflows/auto-labeler.yaml\nauto-labeler.yaml\nauto-labeler.yaml\nauto-labeler.yaml" @@ -0,0 +1,21 @@ +name: Auto Labeler +on: + pull_request: + types: [opened, reopened, synchronize] + +permissions: + contents: read + +jobs: + auto-labeler: + permissions: + contents: write + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: release-drafter/release-drafter@v6 + with: + config-name: release-drafter-config.yml + disable-releaser: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/job-build.yaml b/.github/workflows/job-build.yaml new file mode 100644 index 0000000..fea48a8 --- /dev/null +++ b/.github/workflows/job-build.yaml @@ -0,0 +1,55 @@ +name: Build +on: + workflow_call: +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: "${{ env.NODE_VERSION }}" + registry-url: "${{ env.NODE_REGISTRY }}" + + - uses: pnpm/action-setup@v4 + name: Install pnpm + with: + version: ${{ env.PNPM_VERSION }} + run_install: false + + - name: Get pnpm store directory + shell: bash + run: | + echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV + + - uses: actions/cache@v4 + name: Setup pnpm cache + with: + path: ${{ env.STORE_PATH }} + key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-pnpm-store- + + - name: Install dependencies + run: pnpm install + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Build + run: pnpm build + + - name: Run tests + run: pnpm test + + - uses: actions/upload-artifact@v4 + with: + name: lib + retention-days: 5 + path: | + packages/*/dist + extensions/*/dist + server/*/dist + package.json + README.md diff --git a/.github/workflows/job-draft-release.yaml b/.github/workflows/job-draft-release.yaml new file mode 100644 index 0000000..852935d --- /dev/null +++ b/.github/workflows/job-draft-release.yaml @@ -0,0 +1,18 @@ +name: Draft release +on: + workflow_call: +jobs: + draft-release: + name: Update release drafter + permissions: + contents: write + pull-requests: write + environment: release + runs-on: ubuntu-latest + steps: + - uses: release-drafter/release-drafter@v6 + with: + config-name: release-drafter-config.yml + publish: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/pipeline-default.yaml b/.github/workflows/pipeline-default.yaml new file mode 100644 index 0000000..52e10fc --- /dev/null +++ b/.github/workflows/pipeline-default.yaml @@ -0,0 +1,115 @@ +name: Build and release + +on: + push: + branches: + - main + pull_request: + types: + - opened + - synchronize + +env: + environment: test + release_channel: latest + DO_NOT_TRACK: "1" + NODE_VERSION: "23.x" + NODE_REGISTRY: "https://registry.npmjs.org" + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + DOCKER_REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + PNPM_VERSION: 10.6.0 + +permissions: + contents: write + packages: read + pull-requests: write + id-token: write + actions: read + security-events: write +jobs: + build: + uses: ./.github/workflows/job-build.yaml + name: Build + + update-release-draft: + needs: build + if: github.ref == 'refs/heads/main' + uses: ./.github/workflows/job-draft-release.yaml + + release: + permissions: + contents: read + packages: write + attestations: write + id-token: write + pages: write + name: Release + if: github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + needs: update-release-draft + environment: release + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # - uses: actions/setup-node@v4 + # with: + # node-version: '${{ env.NODE_VERSION }}' + # registry-url: '${{ env.NODE_REGISTRY }}' + # + # - uses: pnpm/action-setup@v4 + # name: Install pnpm + # with: + # version: ${{ env.PNPM_VERSION }} + # run_install: false + # + # - name: Install dependencies + # run: pnpm install + # env: + # NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + # + # - uses: actions/download-artifact@v4 + # with: + # name: lib + # path: ./ + # + # - name: Publish to npm + # run: | + # git config user.name "Github Actions Bot" + # git config user.email "<>" + # node ./scripts/set-version.mjs $(git describe --tag --abbrev=0) + # pnpm publish -r --no-git-checks --access public + # env: + # NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.DOCKER_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + id: push + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: . + file: ./packages/server/Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + # - name: Generate artifact attestation + # uses: actions/attest-build-provenance@v2 + # with: + # subject-name: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME}} + # subject-digest: ${{ steps.push.outputs.digest }} + # push-to-registry: true diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..9f39a32 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,9 @@ +name: stash +services: + app: + build: + context: . + dockerfile: ./packages/server/Dockerfile + read_only: true + ports: + - 3400:3400 diff --git a/packages/client/package.json b/packages/client/package.json index b3b1aa3..1f993c7 100644 --- a/packages/client/package.json +++ b/packages/client/package.json @@ -25,9 +25,6 @@ }, "name": "@morten-olsen/stash-client", "version": "1.0.0", - "imports": { - "#root/*": "./src/*" - }, "dependencies": { "openapi-fetch": "^0.15.0" } diff --git a/packages/query-dsl/package.json b/packages/query-dsl/package.json index f16e715..2bbf035 100644 --- a/packages/query-dsl/package.json +++ b/packages/query-dsl/package.json @@ -23,9 +23,6 @@ }, "name": "@morten-olsen/stash-query-dsl", "version": "1.0.0", - "imports": { - "#root/*": "./src/*" - }, "dependencies": { "chevrotain": "^11.0.3", "zod": "4.1.13" diff --git a/packages/query-dsl/src/query-parser.parser.ts b/packages/query-dsl/src/query-parser.parser.ts index 51300b2..438d15a 100644 --- a/packages/query-dsl/src/query-parser.parser.ts +++ b/packages/query-dsl/src/query-parser.parser.ts @@ -1,5 +1,4 @@ import { createToken, Lexer, EmbeddedActionsParser } from 'chevrotain'; -import type { ZodType } from 'zod'; import { type QueryFilter, type QueryCondition, queryFilterSchema } from './query-parser.schemas.js'; diff --git a/packages/runtime/package.json b/packages/runtime/package.json index 13f8668..468056e 100644 --- a/packages/runtime/package.json +++ b/packages/runtime/package.json @@ -24,9 +24,6 @@ }, "name": "@morten-olsen/stash-runtime", "version": "1.0.0", - "imports": { - "#root/*": "./src/*" - }, "dependencies": { "@electric-sql/pglite": "^0.3.14", "@huggingface/transformers": "^3.8.1", diff --git a/packages/runtime/src/exports.ts b/packages/runtime/src/exports.ts index 00680f5..a1d03b4 100644 --- a/packages/runtime/src/exports.ts +++ b/packages/runtime/src/exports.ts @@ -1,4 +1,17 @@ +import { env, pipeline } from '@huggingface/transformers'; + +import { EMBEDDING_MODEL } from './utils/utils.consts.js'; + +const setModelLocation = (location: string) => { + env.cacheDir = location; +}; + +const preloadModel = async () => { + await pipeline('feature-extraction', EMBEDDING_MODEL); +}; + export { Services } from './utils/utils.services.js'; export { StashRuntime } from './runtime.js'; export * from './services/documents/documents.js'; export * from './services/document-chunks/document-chunks.js'; +export { setModelLocation, preloadModel }; diff --git a/packages/runtime/src/services/database/database.ts b/packages/runtime/src/services/database/database.ts index 5865f3a..da18ddc 100644 --- a/packages/runtime/src/services/database/database.ts +++ b/packages/runtime/src/services/database/database.ts @@ -3,9 +3,9 @@ import ClientPgLite from 'knex-pglite'; import { PGlite } from '@electric-sql/pglite'; import { vector } from '@electric-sql/pglite/vector'; -import { migrationSource } from './migrations/migrations.js'; +import { destroy, type Services } from '../../utils/utils.services.js'; -import { destroy, Services } from '#root/utils/utils.services.js'; +import { migrationSource } from './migrations/migrations.js'; class DatabaseService { #services: Services; diff --git a/packages/runtime/src/services/database/migrations/migrations.001-init.ts b/packages/runtime/src/services/database/migrations/migrations.001-init.ts index 08da637..46c83f1 100644 --- a/packages/runtime/src/services/database/migrations/migrations.001-init.ts +++ b/packages/runtime/src/services/database/migrations/migrations.001-init.ts @@ -1,7 +1,7 @@ -import type { Migration } from './migrations.types.js'; +import { EMBEDDING_MODEL } from '../../../utils/utils.consts.js'; +import { EmbeddingsService } from '../../embeddings/embeddings.js'; -import { EmbeddingsService } from '#root/services/embeddings/embeddings.js'; -import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; +import type { Migration } from './migrations.types.js'; const tableNames = { documents: 'documents', diff --git a/packages/runtime/src/services/database/migrations/migrations.ts b/packages/runtime/src/services/database/migrations/migrations.ts index e83094f..c25b194 100644 --- a/packages/runtime/src/services/database/migrations/migrations.ts +++ b/packages/runtime/src/services/database/migrations/migrations.ts @@ -1,10 +1,10 @@ import type { Knex } from 'knex'; +import type { Services } from '../../../utils/utils.services.js'; + import type { Migration } from './migrations.types.js'; import { init } from './migrations.001-init.js'; -import type { Services } from '#root/utils/utils.services.js'; - const migrations = [init] satisfies Migration[]; type MigrationSourceOptions = { diff --git a/packages/runtime/src/services/database/migrations/migrations.types.ts b/packages/runtime/src/services/database/migrations/migrations.types.ts index 6897776..75898d3 100644 --- a/packages/runtime/src/services/database/migrations/migrations.types.ts +++ b/packages/runtime/src/services/database/migrations/migrations.types.ts @@ -1,6 +1,6 @@ import type { Knex } from 'knex'; -import type { Services } from '#root/utils/utils.services.js'; +import type { Services } from '../../../utils/utils.services.js'; type MigrationOptions = { knex: Knex; diff --git a/packages/runtime/src/services/document-chunks/document-chunks.schemas.ts b/packages/runtime/src/services/document-chunks/document-chunks.schemas.ts index cf4e356..1fe5932 100644 --- a/packages/runtime/src/services/document-chunks/document-chunks.schemas.ts +++ b/packages/runtime/src/services/document-chunks/document-chunks.schemas.ts @@ -1,7 +1,7 @@ import { z } from 'zod'; import { queryFilterSchema } from '@morten-olsen/stash-query-dsl'; -import { createListResultSchema, queryDSLSchema } from '#root/utils/utils.schema.js'; +import { createListResultSchema, queryDSLSchema } from '../../utils/utils.schema.js'; const documentChunkSchema = z .object({ diff --git a/packages/runtime/src/services/document-chunks/document-chunks.ts b/packages/runtime/src/services/document-chunks/document-chunks.ts index fac2845..3715e06 100644 --- a/packages/runtime/src/services/document-chunks/document-chunks.ts +++ b/packages/runtime/src/services/document-chunks/document-chunks.ts @@ -2,15 +2,14 @@ import { QueryParser } from '@morten-olsen/stash-query-dsl'; import { DatabaseService, tableNames, type TableRows } from '../database/database.js'; import { EmbeddingsService } from '../embeddings/embeddings.js'; +import type { Services } from '../../utils/utils.services.js'; +import { EMBEDDING_MODEL } from '../../utils/utils.consts.js'; +import type { ExplicitAny } from '../../global.js'; +import { applyQueryFilter } from '../../utils/utils.query.js'; import type { DocumentChunkFilter, DocumentChunksFindResult } from './document-chunks.schemas.js'; import { mapFromDocumentChunkRow } from './document-chunks.mappings.js'; -import type { Services } from '#root/utils/utils.services.js'; -import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; -import type { ExplicitAny } from '#root/global.js'; -import { applyQueryFilter } from '#root/utils/utils.query.js'; - const baseFields = [ `${tableNames.documentChunks}.*`, `${tableNames.documents}.metadata`, diff --git a/packages/runtime/src/services/documents/documents.schemas.ts b/packages/runtime/src/services/documents/documents.schemas.ts index 902d4fb..c12001c 100644 --- a/packages/runtime/src/services/documents/documents.schemas.ts +++ b/packages/runtime/src/services/documents/documents.schemas.ts @@ -1,7 +1,7 @@ import { z } from 'zod'; import { queryFilterSchema } from '@morten-olsen/stash-query-dsl'; -import { createListResultSchema, queryDSLSchema } from '#root/utils/utils.schema.js'; +import { createListResultSchema, queryDSLSchema } from '../../utils/utils.schema.js'; const documentSchema = z .object({ diff --git a/packages/runtime/src/services/documents/documents.ts b/packages/runtime/src/services/documents/documents.ts index ea5b6f8..ef0332b 100644 --- a/packages/runtime/src/services/documents/documents.ts +++ b/packages/runtime/src/services/documents/documents.ts @@ -2,7 +2,12 @@ import { QueryParser } from '@morten-olsen/stash-query-dsl'; import { DatabaseService, tableNames, type TableRows } from '../database/database.js'; import { SplittingService } from '../splitter/splitter.js'; +import { EventEmitter } from '../../utils/utils.event-emitter.js'; +import type { Services } from '../../utils/utils.services.js'; +import { compareObjectKeys } from '../../utils/utils.compare.js'; +import { applyQueryFilter } from '../../utils/utils.query.js'; +import { mapFromDocumentRow } from './documents.mapping.js'; import type { Document, DocumentFilter, @@ -10,12 +15,6 @@ import type { DocumentUpsert, DocumentUpsertResult, } from './documents.schemas.ts'; -import { mapFromDocumentRow } from './documents.mapping.js'; - -import { EventEmitter } from '#root/utils/utils.event-emitter.js'; -import type { Services } from '#root/utils/utils.services.js'; -import { compareObjectKeys } from '#root/utils/utils.compare.js'; -import { applyQueryFilter } from '#root/utils/utils.query.js'; type DocumentsServiceEvents = { upserted: (document: Document) => void; diff --git a/packages/runtime/src/services/embeddings/embeddings.ts b/packages/runtime/src/services/embeddings/embeddings.ts index debdc41..c86a574 100644 --- a/packages/runtime/src/services/embeddings/embeddings.ts +++ b/packages/runtime/src/services/embeddings/embeddings.ts @@ -1,8 +1,8 @@ import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers'; -import { Vector } from './embeddings.vector.js'; +import type { ExplicitAny } from '../../global.js'; -import type { ExplicitAny } from '#root/global.js'; +import { Vector } from './embeddings.vector.js'; type ExtractOptions = { input: string[]; diff --git a/packages/runtime/src/services/splitter/splitter.ts b/packages/runtime/src/services/splitter/splitter.ts index 9a42242..a5aacc4 100644 --- a/packages/runtime/src/services/splitter/splitter.ts +++ b/packages/runtime/src/services/splitter/splitter.ts @@ -1,12 +1,11 @@ import { EmbeddingsService } from '../embeddings/embeddings.js'; import type { Document } from '../documents/documents.schemas.js'; +import type { Services } from '../../utils/utils.services.js'; +import { EMBEDDING_MODEL } from '../../utils/utils.consts.js'; import type { Chunk, Splitter } from './splitter.types.js'; import { textSplitter } from './splitters/splitters.text.js'; -import type { Services } from '#root/utils/utils.services.js'; -import { EMBEDDING_MODEL } from '#root/utils/utils.consts.js'; - class SplittingService { #services: Services; #chunkers: Set; diff --git a/packages/runtime/src/services/warmup/warmup.ts b/packages/runtime/src/services/warmup/warmup.ts index 4c5af93..6473ecb 100644 --- a/packages/runtime/src/services/warmup/warmup.ts +++ b/packages/runtime/src/services/warmup/warmup.ts @@ -1,6 +1,5 @@ import { DatabaseService } from '../database/database.js'; - -import { Services } from '#root/utils/utils.services.js'; +import { Services } from '../../utils/utils.services.js'; class WarmupService { #services: Services; diff --git a/packages/runtime/src/utils/utils.event-emitter.ts b/packages/runtime/src/utils/utils.event-emitter.ts index 65af8dc..3986995 100644 --- a/packages/runtime/src/utils/utils.event-emitter.ts +++ b/packages/runtime/src/utils/utils.event-emitter.ts @@ -1,4 +1,4 @@ -import type { ExplicitAny } from '#root/global.js'; +import type { ExplicitAny } from '../global.js'; type EventListener = (...args: T) => void | Promise; diff --git a/packages/server/Dockerfile b/packages/server/Dockerfile new file mode 100644 index 0000000..5a0ca03 --- /dev/null +++ b/packages/server/Dockerfile @@ -0,0 +1,32 @@ +FROM node:23-slim AS base +ENV \ + MODEL_DIR=/models +RUN corepack enable +WORKDIR /app + +FROM base AS builder +RUN npm i -g turbo +COPY . . +RUN turbo prune @morten-olsen/stash-server --docker + +FROM base AS installer +COPY --from=builder /app/out/json/ . +RUN pnpm install --frozen-lockfile +COPY --from=builder /app/out/full/ . +RUN \ + pnpm build \ + && node /app/packages/server/dist/preload-data.js + +FROM base AS runner +ENV \ + SERVER_HOST=0.0.0.0 +RUN \ + addgroup --system --gid 1001 nodejs \ + && adduser --system --uid 1001 nodejs \ + && mkdir /data \ + && chown nodejs:nodejs /data +COPY --from=installer /models /models +COPY --from=installer /app /app +USER nodejs + +CMD ["node", "/app/packages/server/dist/start.js"] diff --git a/packages/server/package.json b/packages/server/package.json index 36590cb..f6a3c61 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -27,9 +27,6 @@ }, "name": "@morten-olsen/stash-server", "version": "1.0.0", - "imports": { - "#root/*": "./src/*" - }, "dependencies": { "@fastify/cors": "11.1.0", "@fastify/swagger": "9.6.1", diff --git a/packages/server/src/preload-data.ts b/packages/server/src/preload-data.ts new file mode 100644 index 0000000..6369828 --- /dev/null +++ b/packages/server/src/preload-data.ts @@ -0,0 +1,11 @@ +import { mkdir } from 'fs/promises'; +import { resolve } from 'path'; + +import { preloadModel, setModelLocation } from '@morten-olsen/stash-runtime'; + +if (process.env.MODEL_DIR) { + const modelDir = resolve(process.env.MODEL_DIR); + await mkdir(modelDir, { recursive: true }); + setModelLocation(modelDir); + await preloadModel(); +} diff --git a/packages/server/src/start.ts b/packages/server/src/start.ts index 81526b9..1ce77e7 100644 --- a/packages/server/src/start.ts +++ b/packages/server/src/start.ts @@ -1,7 +1,19 @@ +import { resolve } from 'path'; +import { mkdir } from 'fs/promises'; + +import { setModelLocation } from '@morten-olsen/stash-runtime'; + import { createApi } from './api.js'; -const server = await createApi(); +if (process.env.MODEL_DIR) { + const modelDir = resolve(process.env.MODEL_DIR); + await mkdir(modelDir, { recursive: true }); + setModelLocation(modelDir); +} +const server = await createApi(); await server.listen({ port: 3400, + host: process.env.SERVER_HOST, }); +console.log('Server started');