mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-08-11 19:22:31 +08:00
create embedding services
This commit is contained in:
parent
9f84a84f96
commit
b248a7a2b5
@ -11,7 +11,7 @@ export abstract class BaseAIService implements AIService {
|
|||||||
abstract generateChatCompletion(messages: Message[], options?: ChatCompletionOptions): Promise<ChatResponse>;
|
abstract generateChatCompletion(messages: Message[], options?: ChatCompletionOptions): Promise<ChatResponse>;
|
||||||
|
|
||||||
isAvailable(): boolean {
|
isAvailable(): boolean {
|
||||||
return options.getOption('aiEnabled') === 'true'; // Base check if AI is enabled globally
|
return options.getOptionBool('aiEnabled'); // Base check if AI is enabled globally
|
||||||
}
|
}
|
||||||
|
|
||||||
getName(): string {
|
getName(): string {
|
||||||
|
299
src/services/llm/embeddings/providers.ts
Normal file
299
src/services/llm/embeddings/providers.ts
Normal file
@ -0,0 +1,299 @@
|
|||||||
|
import options from "../../options.js";
|
||||||
|
import log from "../../log.js";
|
||||||
|
import sql from "../../sql.js";
|
||||||
|
import dateUtils from "../../date_utils.js";
|
||||||
|
import { randomString } from "../../utils.js";
|
||||||
|
import type { EmbeddingProvider, EmbeddingConfig } from "./embeddings_interface.js";
|
||||||
|
import { OpenAIEmbeddingProvider } from "./providers/openai.js";
|
||||||
|
import { OllamaEmbeddingProvider } from "./providers/ollama.js";
|
||||||
|
import { AnthropicEmbeddingProvider } from "./providers/anthropic.js";
|
||||||
|
import { LocalEmbeddingProvider } from "./providers/local.js";
|
||||||
|
|
||||||
|
const providers = new Map<string, EmbeddingProvider>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register a new embedding provider
|
||||||
|
*/
|
||||||
|
export function registerEmbeddingProvider(provider: EmbeddingProvider) {
|
||||||
|
providers.set(provider.name, provider);
|
||||||
|
log.info(`Registered embedding provider: ${provider.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all registered embedding providers
|
||||||
|
*/
|
||||||
|
export function getEmbeddingProviders(): EmbeddingProvider[] {
|
||||||
|
return Array.from(providers.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a specific embedding provider by name
|
||||||
|
*/
|
||||||
|
export function getEmbeddingProvider(name: string): EmbeddingProvider | undefined {
|
||||||
|
return providers.get(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all enabled embedding providers
|
||||||
|
*/
|
||||||
|
export async function getEnabledEmbeddingProviders(): Promise<EmbeddingProvider[]> {
|
||||||
|
if (!(await options.getOptionBool('aiEnabled'))) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get enabled providers from database
|
||||||
|
const enabledProviders = await sql.getRows(`
|
||||||
|
SELECT providerId, name, config
|
||||||
|
FROM embedding_providers
|
||||||
|
WHERE isEnabled = 1
|
||||||
|
ORDER BY priority DESC`
|
||||||
|
);
|
||||||
|
|
||||||
|
const result: EmbeddingProvider[] = [];
|
||||||
|
|
||||||
|
for (const row of enabledProviders) {
|
||||||
|
const rowData = row as any;
|
||||||
|
const provider = providers.get(rowData.name);
|
||||||
|
|
||||||
|
if (provider) {
|
||||||
|
result.push(provider);
|
||||||
|
} else {
|
||||||
|
// Use error instead of warn if warn is not available
|
||||||
|
log.error(`Enabled embedding provider ${rowData.name} not found in registered providers`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new embedding provider configuration in the database
|
||||||
|
*/
|
||||||
|
export async function createEmbeddingProviderConfig(
|
||||||
|
name: string,
|
||||||
|
config: EmbeddingConfig,
|
||||||
|
isEnabled = false,
|
||||||
|
priority = 0
|
||||||
|
): Promise<string> {
|
||||||
|
const providerId = randomString(16);
|
||||||
|
const now = dateUtils.localNowDateTime();
|
||||||
|
const utcNow = dateUtils.utcNowDateTime();
|
||||||
|
|
||||||
|
await sql.execute(`
|
||||||
|
INSERT INTO embedding_providers
|
||||||
|
(providerId, name, isEnabled, priority, config,
|
||||||
|
dateCreated, utcDateCreated, dateModified, utcDateModified)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
|
[providerId, name, isEnabled ? 1 : 0, priority, JSON.stringify(config),
|
||||||
|
now, utcNow, now, utcNow]
|
||||||
|
);
|
||||||
|
|
||||||
|
return providerId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update an existing embedding provider configuration
|
||||||
|
*/
|
||||||
|
export async function updateEmbeddingProviderConfig(
|
||||||
|
providerId: string,
|
||||||
|
isEnabled?: boolean,
|
||||||
|
priority?: number,
|
||||||
|
config?: EmbeddingConfig
|
||||||
|
): Promise<boolean> {
|
||||||
|
const now = dateUtils.localNowDateTime();
|
||||||
|
const utcNow = dateUtils.utcNowDateTime();
|
||||||
|
|
||||||
|
// Get existing provider
|
||||||
|
const provider = await sql.getRow(
|
||||||
|
"SELECT * FROM embedding_providers WHERE providerId = ?",
|
||||||
|
[providerId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!provider) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build update query parts
|
||||||
|
const updates = [];
|
||||||
|
const params: any[] = [];
|
||||||
|
|
||||||
|
if (isEnabled !== undefined) {
|
||||||
|
updates.push("isEnabled = ?");
|
||||||
|
params.push(isEnabled ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (priority !== undefined) {
|
||||||
|
updates.push("priority = ?");
|
||||||
|
params.push(priority);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config) {
|
||||||
|
updates.push("config = ?");
|
||||||
|
params.push(JSON.stringify(config));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (updates.length === 0) {
|
||||||
|
return true; // Nothing to update
|
||||||
|
}
|
||||||
|
|
||||||
|
updates.push("dateModified = ?");
|
||||||
|
updates.push("utcDateModified = ?");
|
||||||
|
params.push(now, utcNow);
|
||||||
|
|
||||||
|
params.push(providerId);
|
||||||
|
|
||||||
|
// Execute update
|
||||||
|
await sql.execute(
|
||||||
|
`UPDATE embedding_providers SET ${updates.join(", ")} WHERE providerId = ?`,
|
||||||
|
params
|
||||||
|
);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete an embedding provider configuration
|
||||||
|
*/
|
||||||
|
export async function deleteEmbeddingProviderConfig(providerId: string): Promise<boolean> {
|
||||||
|
const result = await sql.execute(
|
||||||
|
"DELETE FROM embedding_providers WHERE providerId = ?",
|
||||||
|
[providerId]
|
||||||
|
);
|
||||||
|
|
||||||
|
return result.changes > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all embedding provider configurations from the database
|
||||||
|
*/
|
||||||
|
export async function getEmbeddingProviderConfigs() {
|
||||||
|
return await sql.getRows("SELECT * FROM embedding_providers ORDER BY priority DESC");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the default embedding providers
|
||||||
|
*/
|
||||||
|
export async function initializeDefaultProviders() {
|
||||||
|
// Register built-in providers
|
||||||
|
try {
|
||||||
|
// Register OpenAI provider if API key is configured
|
||||||
|
const openaiApiKey = await options.getOption('openaiApiKey');
|
||||||
|
if (openaiApiKey) {
|
||||||
|
const openaiModel = await options.getOption('openaiDefaultModel') || 'text-embedding-3-small';
|
||||||
|
const openaiBaseUrl = await options.getOption('openaiBaseUrl') || 'https://api.openai.com/v1';
|
||||||
|
|
||||||
|
registerEmbeddingProvider(new OpenAIEmbeddingProvider({
|
||||||
|
model: openaiModel,
|
||||||
|
dimension: 1536, // OpenAI's typical dimension
|
||||||
|
type: 'float32',
|
||||||
|
apiKey: openaiApiKey,
|
||||||
|
baseUrl: openaiBaseUrl
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Create OpenAI provider config if it doesn't exist
|
||||||
|
const existingOpenAI = await sql.getRow(
|
||||||
|
"SELECT * FROM embedding_providers WHERE name = ?",
|
||||||
|
['openai']
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!existingOpenAI) {
|
||||||
|
await createEmbeddingProviderConfig('openai', {
|
||||||
|
model: openaiModel,
|
||||||
|
dimension: 1536,
|
||||||
|
type: 'float32'
|
||||||
|
}, true, 100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register Anthropic provider if API key is configured
|
||||||
|
const anthropicApiKey = await options.getOption('anthropicApiKey');
|
||||||
|
if (anthropicApiKey) {
|
||||||
|
const anthropicModel = await options.getOption('anthropicDefaultModel') || 'claude-3-haiku-20240307';
|
||||||
|
const anthropicBaseUrl = await options.getOption('anthropicBaseUrl') || 'https://api.anthropic.com/v1';
|
||||||
|
|
||||||
|
registerEmbeddingProvider(new AnthropicEmbeddingProvider({
|
||||||
|
model: anthropicModel,
|
||||||
|
dimension: 1024, // Anthropic's embedding dimension
|
||||||
|
type: 'float32',
|
||||||
|
apiKey: anthropicApiKey,
|
||||||
|
baseUrl: anthropicBaseUrl
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Create Anthropic provider config if it doesn't exist
|
||||||
|
const existingAnthropic = await sql.getRow(
|
||||||
|
"SELECT * FROM embedding_providers WHERE name = ?",
|
||||||
|
['anthropic']
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!existingAnthropic) {
|
||||||
|
await createEmbeddingProviderConfig('anthropic', {
|
||||||
|
model: anthropicModel,
|
||||||
|
dimension: 1024,
|
||||||
|
type: 'float32'
|
||||||
|
}, true, 75);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register Ollama provider if enabled
|
||||||
|
if (await options.getOptionBool('ollamaEnabled')) {
|
||||||
|
const ollamaModel = await options.getOption('ollamaDefaultModel') || 'llama3';
|
||||||
|
const ollamaBaseUrl = await options.getOption('ollamaBaseUrl') || 'http://localhost:11434';
|
||||||
|
|
||||||
|
registerEmbeddingProvider(new OllamaEmbeddingProvider({
|
||||||
|
model: ollamaModel,
|
||||||
|
dimension: 4096, // Typical for Ollama models
|
||||||
|
type: 'float32',
|
||||||
|
baseUrl: ollamaBaseUrl
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Create Ollama provider config if it doesn't exist
|
||||||
|
const existingOllama = await sql.getRow(
|
||||||
|
"SELECT * FROM embedding_providers WHERE name = ?",
|
||||||
|
['ollama']
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!existingOllama) {
|
||||||
|
await createEmbeddingProviderConfig('ollama', {
|
||||||
|
model: ollamaModel,
|
||||||
|
dimension: 4096,
|
||||||
|
type: 'float32'
|
||||||
|
}, true, 50);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always register local provider as fallback
|
||||||
|
registerEmbeddingProvider(new LocalEmbeddingProvider({
|
||||||
|
model: 'local',
|
||||||
|
dimension: 384,
|
||||||
|
type: 'float32'
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Create local provider config if it doesn't exist
|
||||||
|
const existingLocal = await sql.getRow(
|
||||||
|
"SELECT * FROM embedding_providers WHERE name = ?",
|
||||||
|
['local']
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!existingLocal) {
|
||||||
|
await createEmbeddingProviderConfig('local', {
|
||||||
|
model: 'local',
|
||||||
|
dimension: 384,
|
||||||
|
type: 'float32'
|
||||||
|
}, true, 10);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error initializing default embedding providers: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default {
|
||||||
|
registerEmbeddingProvider,
|
||||||
|
getEmbeddingProviders,
|
||||||
|
getEmbeddingProvider,
|
||||||
|
getEnabledEmbeddingProviders,
|
||||||
|
createEmbeddingProviderConfig,
|
||||||
|
updateEmbeddingProviderConfig,
|
||||||
|
deleteEmbeddingProviderConfig,
|
||||||
|
getEmbeddingProviderConfigs,
|
||||||
|
initializeDefaultProviders
|
||||||
|
};
|
78
src/services/llm/embeddings/providers/anthropic.ts
Normal file
78
src/services/llm/embeddings/providers/anthropic.ts
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
import { BaseEmbeddingProvider } from "../base_embeddings.js";
|
||||||
|
import type { EmbeddingConfig } from "../embeddings_interface.js";
|
||||||
|
import axios from "axios";
|
||||||
|
import log from "../../../log.js";
|
||||||
|
|
||||||
|
interface AnthropicEmbeddingConfig extends EmbeddingConfig {
|
||||||
|
apiKey: string;
|
||||||
|
baseUrl: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Anthropic (Claude) embedding provider implementation
|
||||||
|
*/
|
||||||
|
export class AnthropicEmbeddingProvider extends BaseEmbeddingProvider {
|
||||||
|
name = "anthropic";
|
||||||
|
private apiKey: string;
|
||||||
|
private baseUrl: string;
|
||||||
|
|
||||||
|
constructor(config: AnthropicEmbeddingConfig) {
|
||||||
|
super(config);
|
||||||
|
this.apiKey = config.apiKey;
|
||||||
|
this.baseUrl = config.baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate embeddings for a single text
|
||||||
|
*/
|
||||||
|
async generateEmbeddings(text: string): Promise<Float32Array> {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${this.baseUrl}/embeddings`,
|
||||||
|
{
|
||||||
|
model: this.config.model || "claude-3-haiku-20240307",
|
||||||
|
input: text,
|
||||||
|
encoding_format: "float"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"x-api-key": this.apiKey,
|
||||||
|
"anthropic-version": "2023-06-01"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data && response.data.embedding) {
|
||||||
|
const embedding = response.data.embedding;
|
||||||
|
return new Float32Array(embedding);
|
||||||
|
} else {
|
||||||
|
throw new Error("Unexpected response structure from Anthropic API");
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
|
||||||
|
log.error(`Anthropic embedding error: ${errorMessage}`);
|
||||||
|
throw new Error(`Anthropic embedding error: ${errorMessage}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate embeddings for multiple texts in a single batch
|
||||||
|
*
|
||||||
|
* Note: Anthropic doesn't currently support batch embedding, so we process each text individually
|
||||||
|
*/
|
||||||
|
async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
|
||||||
|
if (texts.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const results: Float32Array[] = [];
|
||||||
|
|
||||||
|
for (const text of texts) {
|
||||||
|
const embedding = await this.generateEmbeddings(text);
|
||||||
|
results.push(embedding);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
73
src/services/llm/embeddings/providers/local.ts
Normal file
73
src/services/llm/embeddings/providers/local.ts
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
import { BaseEmbeddingProvider } from "../base_embeddings.js";
|
||||||
|
import type { EmbeddingConfig } from "../embeddings_interface.js";
|
||||||
|
import crypto from "crypto";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Local embedding provider implementation
|
||||||
|
*
|
||||||
|
* This is a fallback provider that generates simple deterministic embeddings
|
||||||
|
* using cryptographic hashing. These are not semantic vectors but can be used
|
||||||
|
* for exact matches when no other providers are available.
|
||||||
|
*/
|
||||||
|
export class LocalEmbeddingProvider extends BaseEmbeddingProvider {
|
||||||
|
name = "local";
|
||||||
|
|
||||||
|
constructor(config: EmbeddingConfig) {
|
||||||
|
super(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a simple embedding by hashing the text
|
||||||
|
*/
|
||||||
|
async generateEmbeddings(text: string): Promise<Float32Array> {
|
||||||
|
const dimension = this.config.dimension || 384;
|
||||||
|
const result = new Float32Array(dimension);
|
||||||
|
|
||||||
|
// Generate a hash of the input text
|
||||||
|
const hash = crypto.createHash('sha256').update(text).digest();
|
||||||
|
|
||||||
|
// Use the hash to seed a deterministic PRNG
|
||||||
|
let seed = 0;
|
||||||
|
for (let i = 0; i < hash.length; i += 4) {
|
||||||
|
seed = (seed * 65536 + hash.readUInt32LE(i % (hash.length - 3))) >>> 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate pseudo-random but deterministic values for the embedding
|
||||||
|
for (let i = 0; i < dimension; i++) {
|
||||||
|
// Generate next pseudo-random number
|
||||||
|
seed = (seed * 1664525 + 1013904223) >>> 0;
|
||||||
|
|
||||||
|
// Convert to a float between -1 and 1
|
||||||
|
result[i] = (seed / 2147483648) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize the vector
|
||||||
|
let magnitude = 0;
|
||||||
|
for (let i = 0; i < dimension; i++) {
|
||||||
|
magnitude += result[i] * result[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
magnitude = Math.sqrt(magnitude);
|
||||||
|
if (magnitude > 0) {
|
||||||
|
for (let i = 0; i < dimension; i++) {
|
||||||
|
result[i] /= magnitude;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate embeddings for multiple texts
|
||||||
|
*/
|
||||||
|
async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
|
||||||
|
const results: Float32Array[] = [];
|
||||||
|
|
||||||
|
for (const text of texts) {
|
||||||
|
const embedding = await this.generateEmbeddings(text);
|
||||||
|
results.push(embedding);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
77
src/services/llm/embeddings/providers/ollama.ts
Normal file
77
src/services/llm/embeddings/providers/ollama.ts
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
import { BaseEmbeddingProvider } from "../base_embeddings.js";
|
||||||
|
import type { EmbeddingConfig } from "../embeddings_interface.js";
|
||||||
|
import axios from "axios";
|
||||||
|
import log from "../../../log.js";
|
||||||
|
|
||||||
|
interface OllamaEmbeddingConfig extends EmbeddingConfig {
|
||||||
|
baseUrl: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ollama embedding provider implementation
|
||||||
|
*/
|
||||||
|
export class OllamaEmbeddingProvider extends BaseEmbeddingProvider {
|
||||||
|
name = "ollama";
|
||||||
|
private baseUrl: string;
|
||||||
|
|
||||||
|
constructor(config: OllamaEmbeddingConfig) {
|
||||||
|
super(config);
|
||||||
|
this.baseUrl = config.baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate embeddings for a single text
|
||||||
|
*/
|
||||||
|
async generateEmbeddings(text: string): Promise<Float32Array> {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${this.baseUrl}/api/embeddings`,
|
||||||
|
{
|
||||||
|
model: this.config.model || "llama3",
|
||||||
|
prompt: text
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data && Array.isArray(response.data.embedding)) {
|
||||||
|
return new Float32Array(response.data.embedding);
|
||||||
|
} else {
|
||||||
|
throw new Error("Unexpected response structure from Ollama API");
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
|
||||||
|
log.error(`Ollama embedding error: ${errorMessage}`);
|
||||||
|
throw new Error(`Ollama embedding error: ${errorMessage}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate embeddings for multiple texts
|
||||||
|
*
|
||||||
|
* Note: Ollama API doesn't support batch embedding, so we process them sequentially
|
||||||
|
*/
|
||||||
|
async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
|
||||||
|
if (texts.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const results: Float32Array[] = [];
|
||||||
|
|
||||||
|
for (const text of texts) {
|
||||||
|
try {
|
||||||
|
const embedding = await this.generateEmbeddings(text);
|
||||||
|
results.push(embedding);
|
||||||
|
} catch (error: any) {
|
||||||
|
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
|
||||||
|
log.error(`Ollama batch embedding error: ${errorMessage}`);
|
||||||
|
throw new Error(`Ollama batch embedding error: ${errorMessage}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
107
src/services/llm/embeddings/providers/openai.ts
Normal file
107
src/services/llm/embeddings/providers/openai.ts
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
import { BaseEmbeddingProvider } from "../base_embeddings.js";
|
||||||
|
import type { EmbeddingConfig } from "../embeddings_interface.js";
|
||||||
|
import axios from "axios";
|
||||||
|
import log from "../../../log.js";
|
||||||
|
|
||||||
|
interface OpenAIEmbeddingConfig extends EmbeddingConfig {
|
||||||
|
apiKey: string;
|
||||||
|
baseUrl: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* OpenAI embedding provider implementation
|
||||||
|
*/
|
||||||
|
export class OpenAIEmbeddingProvider extends BaseEmbeddingProvider {
|
||||||
|
name = "openai";
|
||||||
|
private apiKey: string;
|
||||||
|
private baseUrl: string;
|
||||||
|
|
||||||
|
constructor(config: OpenAIEmbeddingConfig) {
|
||||||
|
super(config);
|
||||||
|
this.apiKey = config.apiKey;
|
||||||
|
this.baseUrl = config.baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate embeddings for a single text
|
||||||
|
*/
|
||||||
|
async generateEmbeddings(text: string): Promise<Float32Array> {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${this.baseUrl}/embeddings`,
|
||||||
|
{
|
||||||
|
input: text,
|
||||||
|
model: this.config.model || "text-embedding-3-small",
|
||||||
|
encoding_format: "float"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": `Bearer ${this.apiKey}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data && response.data.data && response.data.data[0] && response.data.data[0].embedding) {
|
||||||
|
const embedding = response.data.data[0].embedding;
|
||||||
|
return new Float32Array(embedding);
|
||||||
|
} else {
|
||||||
|
throw new Error("Unexpected response structure from OpenAI API");
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
|
||||||
|
log.error(`OpenAI embedding error: ${errorMessage}`);
|
||||||
|
throw new Error(`OpenAI embedding error: ${errorMessage}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate embeddings for multiple texts in a single batch
|
||||||
|
*/
|
||||||
|
async generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]> {
|
||||||
|
if (texts.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const batchSize = this.config.batchSize || 10;
|
||||||
|
const results: Float32Array[] = [];
|
||||||
|
|
||||||
|
// Process in batches to avoid API limits
|
||||||
|
for (let i = 0; i < texts.length; i += batchSize) {
|
||||||
|
const batch = texts.slice(i, i + batchSize);
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${this.baseUrl}/embeddings`,
|
||||||
|
{
|
||||||
|
input: batch,
|
||||||
|
model: this.config.model || "text-embedding-3-small",
|
||||||
|
encoding_format: "float"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": `Bearer ${this.apiKey}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data && response.data.data) {
|
||||||
|
// Sort the embeddings by index to ensure they match the input order
|
||||||
|
const sortedEmbeddings = response.data.data
|
||||||
|
.sort((a: any, b: any) => a.index - b.index)
|
||||||
|
.map((item: any) => new Float32Array(item.embedding));
|
||||||
|
|
||||||
|
results.push(...sortedEmbeddings);
|
||||||
|
} else {
|
||||||
|
throw new Error("Unexpected response structure from OpenAI API");
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
const errorMessage = error.response?.data?.error?.message || error.message || "Unknown error";
|
||||||
|
log.error(`OpenAI batch embedding error: ${errorMessage}`);
|
||||||
|
throw new Error(`OpenAI batch embedding error: ${errorMessage}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
483
src/services/llm/embeddings/vector_store.ts
Normal file
483
src/services/llm/embeddings/vector_store.ts
Normal file
@ -0,0 +1,483 @@
|
|||||||
|
import sql from "../../sql.js";
|
||||||
|
import { randomString } from "../../utils.js";
|
||||||
|
import options from "../../options.js";
|
||||||
|
import dateUtils from "../../date_utils.js";
|
||||||
|
import log from "../../log.js";
|
||||||
|
import becca from "../../../becca/becca.js";
|
||||||
|
import type { NoteEmbeddingContext } from "./embeddings_interface.js";
|
||||||
|
import { getEmbeddingProviders, getEnabledEmbeddingProviders } from "./providers.js";
|
||||||
|
|
||||||
|
// Type definition for embedding result
|
||||||
|
interface EmbeddingResult {
|
||||||
|
embedId: string;
|
||||||
|
noteId: string;
|
||||||
|
providerId: string;
|
||||||
|
modelId: string;
|
||||||
|
dimension: number;
|
||||||
|
embedding: Float32Array;
|
||||||
|
version: number;
|
||||||
|
dateCreated: string;
|
||||||
|
utcDateCreated: string;
|
||||||
|
dateModified: string;
|
||||||
|
utcDateModified: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Type for queue item
|
||||||
|
interface QueueItem {
|
||||||
|
noteId: string;
|
||||||
|
operation: string;
|
||||||
|
attempts: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the cosine similarity between two vectors
|
||||||
|
*/
|
||||||
|
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||||
|
if (a.length !== b.length) {
|
||||||
|
throw new Error(`Vector dimensions don't match: ${a.length} vs ${b.length}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
let dotProduct = 0;
|
||||||
|
let aMagnitude = 0;
|
||||||
|
let bMagnitude = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
dotProduct += a[i] * b[i];
|
||||||
|
aMagnitude += a[i] * a[i];
|
||||||
|
bMagnitude += b[i] * b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
aMagnitude = Math.sqrt(aMagnitude);
|
||||||
|
bMagnitude = Math.sqrt(bMagnitude);
|
||||||
|
|
||||||
|
if (aMagnitude === 0 || bMagnitude === 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dotProduct / (aMagnitude * bMagnitude);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts embedding Float32Array to Buffer for storage in SQLite
|
||||||
|
*/
|
||||||
|
export function embeddingToBuffer(embedding: Float32Array): Buffer {
|
||||||
|
return Buffer.from(embedding.buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts Buffer from SQLite back to Float32Array
|
||||||
|
*/
|
||||||
|
export function bufferToEmbedding(buffer: Buffer, dimension: number): Float32Array {
|
||||||
|
return new Float32Array(buffer.buffer, buffer.byteOffset, dimension);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates or updates an embedding for a note
|
||||||
|
*/
|
||||||
|
export async function storeNoteEmbedding(
|
||||||
|
noteId: string,
|
||||||
|
providerId: string,
|
||||||
|
modelId: string,
|
||||||
|
embedding: Float32Array
|
||||||
|
): Promise<string> {
|
||||||
|
const dimension = embedding.length;
|
||||||
|
const embeddingBlob = embeddingToBuffer(embedding);
|
||||||
|
const now = dateUtils.localNowDateTime();
|
||||||
|
const utcNow = dateUtils.utcNowDateTime();
|
||||||
|
|
||||||
|
// Check if an embedding already exists for this note and provider/model
|
||||||
|
const existingEmbed = await getEmbeddingForNote(noteId, providerId, modelId);
|
||||||
|
|
||||||
|
if (existingEmbed) {
|
||||||
|
// Update existing embedding
|
||||||
|
await sql.execute(`
|
||||||
|
UPDATE note_embeddings
|
||||||
|
SET embedding = ?, dimension = ?, version = version + 1,
|
||||||
|
dateModified = ?, utcDateModified = ?
|
||||||
|
WHERE embedId = ?`,
|
||||||
|
[embeddingBlob, dimension, now, utcNow, existingEmbed.embedId]
|
||||||
|
);
|
||||||
|
return existingEmbed.embedId;
|
||||||
|
} else {
|
||||||
|
// Create new embedding
|
||||||
|
const embedId = randomString(16);
|
||||||
|
await sql.execute(`
|
||||||
|
INSERT INTO note_embeddings
|
||||||
|
(embedId, noteId, providerId, modelId, dimension, embedding,
|
||||||
|
dateCreated, utcDateCreated, dateModified, utcDateModified)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
|
[embedId, noteId, providerId, modelId, dimension, embeddingBlob,
|
||||||
|
now, utcNow, now, utcNow]
|
||||||
|
);
|
||||||
|
return embedId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves embedding for a specific note
|
||||||
|
*/
|
||||||
|
export async function getEmbeddingForNote(noteId: string, providerId: string, modelId: string): Promise<EmbeddingResult | null> {
|
||||||
|
const row = await sql.getRow(`
|
||||||
|
SELECT embedId, noteId, providerId, modelId, dimension, embedding, version,
|
||||||
|
dateCreated, utcDateCreated, dateModified, utcDateModified
|
||||||
|
FROM note_embeddings
|
||||||
|
WHERE noteId = ? AND providerId = ? AND modelId = ?`,
|
||||||
|
[noteId, providerId, modelId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!row) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Need to cast row to any as it doesn't have type information
|
||||||
|
const rowData = row as any;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...rowData,
|
||||||
|
embedding: bufferToEmbedding(rowData.embedding, rowData.dimension)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds similar notes based on vector similarity
|
||||||
|
*/
|
||||||
|
export async function findSimilarNotes(
|
||||||
|
embedding: Float32Array,
|
||||||
|
providerId: string,
|
||||||
|
modelId: string,
|
||||||
|
limit = 10,
|
||||||
|
threshold = 0.7
|
||||||
|
): Promise<{noteId: string, similarity: number}[]> {
|
||||||
|
// Get all embeddings for the given provider and model
|
||||||
|
const rows = await sql.getRows(`
|
||||||
|
SELECT embedId, noteId, providerId, modelId, dimension, embedding
|
||||||
|
FROM note_embeddings
|
||||||
|
WHERE providerId = ? AND modelId = ?`,
|
||||||
|
[providerId, modelId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!rows.length) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate similarity for each embedding
|
||||||
|
const similarities = rows.map(row => {
|
||||||
|
const rowData = row as any;
|
||||||
|
const rowEmbedding = bufferToEmbedding(rowData.embedding, rowData.dimension);
|
||||||
|
return {
|
||||||
|
noteId: rowData.noteId,
|
||||||
|
similarity: cosineSimilarity(embedding, rowEmbedding)
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// Filter by threshold and sort by similarity (descending)
|
||||||
|
return similarities
|
||||||
|
.filter(item => item.similarity >= threshold)
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets context for a note to be embedded
|
||||||
|
*/
|
||||||
|
export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbeddingContext> {
|
||||||
|
const note = becca.getNote(noteId);
|
||||||
|
|
||||||
|
if (!note) {
|
||||||
|
throw new Error(`Note ${noteId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get parent note titles
|
||||||
|
const parentNotes = note.getParentNotes();
|
||||||
|
const parentTitles = parentNotes.map(note => note.title);
|
||||||
|
|
||||||
|
// Get child note titles
|
||||||
|
const childNotes = note.getChildNotes();
|
||||||
|
const childTitles = childNotes.map(note => note.title);
|
||||||
|
|
||||||
|
// Get attributes
|
||||||
|
const attributes = note.getOwnedAttributes().map(attr => ({
|
||||||
|
type: attr.type,
|
||||||
|
name: attr.name,
|
||||||
|
value: attr.value
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Get attachments
|
||||||
|
const attachments = note.getAttachments().map(att => ({
|
||||||
|
title: att.title,
|
||||||
|
mime: att.mime
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Get content
|
||||||
|
let content = "";
|
||||||
|
if (note.type === 'text') {
|
||||||
|
content = String(await note.getContent());
|
||||||
|
} else if (note.type === 'code') {
|
||||||
|
content = String(await note.getContent());
|
||||||
|
} else if (note.type === 'image' || note.type === 'file') {
|
||||||
|
content = `[${note.type} attachment: ${note.mime}]`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
noteId: note.noteId,
|
||||||
|
title: note.title,
|
||||||
|
content: content,
|
||||||
|
type: note.type,
|
||||||
|
mime: note.mime,
|
||||||
|
dateCreated: note.dateCreated || "",
|
||||||
|
dateModified: note.dateModified || "",
|
||||||
|
attributes,
|
||||||
|
parentTitles,
|
||||||
|
childTitles,
|
||||||
|
attachments
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queues a note for embedding update
|
||||||
|
*/
|
||||||
|
export async function queueNoteForEmbedding(noteId: string, operation = 'UPDATE') {
|
||||||
|
const now = dateUtils.localNowDateTime();
|
||||||
|
const utcNow = dateUtils.utcNowDateTime();
|
||||||
|
|
||||||
|
// Check if note is already in queue
|
||||||
|
const existing = await sql.getValue(
|
||||||
|
"SELECT 1 FROM embedding_queue WHERE noteId = ?",
|
||||||
|
[noteId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (existing) {
|
||||||
|
// Update existing queue entry
|
||||||
|
await sql.execute(`
|
||||||
|
UPDATE embedding_queue
|
||||||
|
SET operation = ?, dateQueued = ?, utcDateQueued = ?, attempts = 0, error = NULL
|
||||||
|
WHERE noteId = ?`,
|
||||||
|
[operation, now, utcNow, noteId]
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// Add new queue entry
|
||||||
|
await sql.execute(`
|
||||||
|
INSERT INTO embedding_queue
|
||||||
|
(noteId, operation, dateQueued, utcDateQueued)
|
||||||
|
VALUES (?, ?, ?, ?)`,
|
||||||
|
[noteId, operation, now, utcNow]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes all embeddings for a note
|
||||||
|
*/
|
||||||
|
export async function deleteNoteEmbeddings(noteId: string) {
|
||||||
|
await sql.execute(
|
||||||
|
"DELETE FROM note_embeddings WHERE noteId = ?",
|
||||||
|
[noteId]
|
||||||
|
);
|
||||||
|
|
||||||
|
// Remove from queue if present
|
||||||
|
await sql.execute(
|
||||||
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
|
[noteId]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process the embedding queue
|
||||||
|
*/
|
||||||
|
export async function processEmbeddingQueue() {
|
||||||
|
if (!(await options.getOptionBool('aiEnabled'))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const batchSize = parseInt(await options.getOption('embeddingBatchSize') || '10', 10);
|
||||||
|
const enabledProviders = await getEnabledEmbeddingProviders();
|
||||||
|
|
||||||
|
if (enabledProviders.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get notes from queue
|
||||||
|
const notes = await sql.getRows(`
|
||||||
|
SELECT noteId, operation, attempts
|
||||||
|
FROM embedding_queue
|
||||||
|
ORDER BY priority DESC, utcDateQueued ASC
|
||||||
|
LIMIT ?`,
|
||||||
|
[batchSize]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (notes.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const note of notes) {
|
||||||
|
try {
|
||||||
|
const noteData = note as unknown as QueueItem;
|
||||||
|
|
||||||
|
// Skip if note no longer exists
|
||||||
|
if (!becca.getNote(noteData.noteId)) {
|
||||||
|
await sql.execute(
|
||||||
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
|
[noteData.noteId]
|
||||||
|
);
|
||||||
|
await deleteNoteEmbeddings(noteData.noteId);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noteData.operation === 'DELETE') {
|
||||||
|
await deleteNoteEmbeddings(noteData.noteId);
|
||||||
|
await sql.execute(
|
||||||
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
|
[noteData.noteId]
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get note context for embedding
|
||||||
|
const context = await getNoteEmbeddingContext(noteData.noteId);
|
||||||
|
|
||||||
|
// Process with each enabled provider
|
||||||
|
for (const provider of enabledProviders) {
|
||||||
|
try {
|
||||||
|
// Generate embedding
|
||||||
|
const embedding = await provider.generateNoteEmbeddings(context);
|
||||||
|
|
||||||
|
// Store embedding
|
||||||
|
const config = provider.getConfig();
|
||||||
|
await storeNoteEmbedding(noteData.noteId, provider.name, config.model, embedding);
|
||||||
|
} catch (providerError: any) {
|
||||||
|
log.error(`Error generating embedding with provider ${provider.name} for note ${noteData.noteId}: ${providerError.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove from queue on success
|
||||||
|
await sql.execute(
|
||||||
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
|
[noteData.noteId]
|
||||||
|
);
|
||||||
|
} catch (error: any) {
|
||||||
|
const noteData = note as unknown as QueueItem;
|
||||||
|
|
||||||
|
// Update attempt count and log error
|
||||||
|
await sql.execute(`
|
||||||
|
UPDATE embedding_queue
|
||||||
|
SET attempts = attempts + 1,
|
||||||
|
lastAttempt = ?,
|
||||||
|
error = ?
|
||||||
|
WHERE noteId = ?`,
|
||||||
|
[dateUtils.utcNowDateTime(), error.message || 'Unknown error', noteData.noteId]
|
||||||
|
);
|
||||||
|
|
||||||
|
log.error(`Error processing embedding for note ${noteData.noteId}: ${error.message || 'Unknown error'}`);
|
||||||
|
|
||||||
|
// Remove from queue if too many attempts
|
||||||
|
if (noteData.attempts + 1 >= 3) {
|
||||||
|
await sql.execute(
|
||||||
|
"DELETE FROM embedding_queue WHERE noteId = ?",
|
||||||
|
[noteData.noteId]
|
||||||
|
);
|
||||||
|
log.error(`Removed note ${noteData.noteId} from embedding queue after multiple failures`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setup note event listeners to keep embeddings up to date
|
||||||
|
*/
|
||||||
|
export function setupEmbeddingEventListeners() {
|
||||||
|
require("../../../becca/entity_events.js").subscribe({
|
||||||
|
entityName: "notes",
|
||||||
|
eventType: "created",
|
||||||
|
handler: (note: { noteId: string }) => queueNoteForEmbedding(note.noteId, 'CREATE')
|
||||||
|
});
|
||||||
|
|
||||||
|
require("../../../becca/entity_events.js").subscribe({
|
||||||
|
entityName: "notes",
|
||||||
|
eventType: "updated",
|
||||||
|
handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE')
|
||||||
|
});
|
||||||
|
|
||||||
|
require("../../../becca/entity_events.js").subscribe({
|
||||||
|
entityName: "notes",
|
||||||
|
eventType: "deleted",
|
||||||
|
handler: (note: { noteId: string }) => queueNoteForEmbedding(note.noteId, 'DELETE')
|
||||||
|
});
|
||||||
|
|
||||||
|
require("../../../becca/entity_events.js").subscribe({
|
||||||
|
entityName: "attributes",
|
||||||
|
eventType: ["created", "updated", "deleted"],
|
||||||
|
handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE')
|
||||||
|
});
|
||||||
|
|
||||||
|
require("../../../becca/entity_events.js").subscribe({
|
||||||
|
entityName: "branches",
|
||||||
|
eventType: ["created", "updated", "deleted"],
|
||||||
|
handler: ({entity}: { entity: { noteId: string } }) => queueNoteForEmbedding(entity.noteId, 'UPDATE')
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setup background processing of the embedding queue
|
||||||
|
*/
|
||||||
|
export async function setupEmbeddingBackgroundProcessing() {
|
||||||
|
const interval = parseInt(await options.getOption('embeddingUpdateInterval') || '5000', 10);
|
||||||
|
|
||||||
|
setInterval(async () => {
|
||||||
|
try {
|
||||||
|
await processEmbeddingQueue();
|
||||||
|
} catch (error: any) {
|
||||||
|
log.error(`Error in background embedding processing: ${error.message || 'Unknown error'}`);
|
||||||
|
}
|
||||||
|
}, interval);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize embeddings system
|
||||||
|
*/
|
||||||
|
export async function initEmbeddings() {
|
||||||
|
if (await options.getOptionBool('aiEnabled')) {
|
||||||
|
setupEmbeddingEventListeners();
|
||||||
|
await setupEmbeddingBackgroundProcessing();
|
||||||
|
log.info("Embeddings system initialized");
|
||||||
|
} else {
|
||||||
|
log.info("Embeddings system disabled");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reprocess all notes to update embeddings
|
||||||
|
*/
|
||||||
|
export async function reprocessAllNotes() {
|
||||||
|
if (!(await options.getOptionBool('aiEnabled'))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Queueing all notes for embedding updates");
|
||||||
|
|
||||||
|
const noteIds = await sql.getColumn(
|
||||||
|
"SELECT noteId FROM notes WHERE isDeleted = 0"
|
||||||
|
);
|
||||||
|
|
||||||
|
log.info(`Adding ${noteIds.length} notes to embedding queue`);
|
||||||
|
|
||||||
|
for (const noteId of noteIds) {
|
||||||
|
await queueNoteForEmbedding(noteId as string, 'UPDATE');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default {
|
||||||
|
cosineSimilarity,
|
||||||
|
embeddingToBuffer,
|
||||||
|
bufferToEmbedding,
|
||||||
|
storeNoteEmbedding,
|
||||||
|
getEmbeddingForNote,
|
||||||
|
findSimilarNotes,
|
||||||
|
getNoteEmbeddingContext,
|
||||||
|
queueNoteForEmbedding,
|
||||||
|
deleteNoteEmbeddings,
|
||||||
|
processEmbeddingQueue,
|
||||||
|
setupEmbeddingEventListeners,
|
||||||
|
setupEmbeddingBackgroundProcessing,
|
||||||
|
initEmbeddings,
|
||||||
|
reprocessAllNotes
|
||||||
|
};
|
Loading…
x
Reference in New Issue
Block a user