break up the huge context_extractor into smaller files

2025-08-10 10:22:29 +08:00 · 2025-03-11 18:39:59 +00:00 · 2025-03-11 18:39:59 +00:00 · 71b3b04c53
commit 71b3b04c53
parent 0985cec8d6
12 changed files with 2212 additions and 882 deletions
--- a/src/services/llm/ai_service_manager.ts
+++ b/src/services/llm/ai_service_manager.ts
@ -4,7 +4,7 @@ import { OpenAIService } from './providers/openai_service.js';
 import { AnthropicService } from './providers/anthropic_service.js';
 import { OllamaService } from './providers/ollama_service.js';
 import log from '../log.js';
-import contextExtractor from './context_extractor.js';
+import { ContextExtractor } from './context/index.js';
 import semanticContextService from './semantic_context_service.js';

 type ServiceProviders = 'openai' | 'anthropic' | 'ollama';
@ -216,3 +216,6 @@ export default {
        return getInstance().getSemanticContextService();
    }
 };
+
+// Create an instance of ContextExtractor for backward compatibility
+const contextExtractor = new ContextExtractor();
--- a/src/services/llm/chat_service.ts
+++ b/src/services/llm/chat_service.ts
@ -1,7 +1,10 @@
 import type { Message, ChatCompletionOptions } from './ai_interface.js';
 import aiServiceManager from './ai_service_manager.js';
 import chatStorageService from './chat_storage_service.js';
-import contextExtractor from './context_extractor.js';
+import { ContextExtractor } from './context/index.js';
+
+// Create an instance of ContextExtractor for backward compatibility
+const contextExtractor = new ContextExtractor();

 export interface ChatSession {
    id: string;
--- a/src/services/llm/context/chunking.ts
+++ b/src/services/llm/context/chunking.ts
@ -0,0 +1,288 @@
+/**
+ * Contains functions for chunking content into smaller pieces for processing
+ * These functions are used to properly prepare content for LLM context windows
+ */
+
+/**
+ * Interface for chunked content
+ */
+export interface ContentChunk {
+    content: string;
+    prefix: string;
+    noteId?: string;
+    title?: string;
+    path?: string;
+    metadata?: Record<string, any>;
+}
+
+/**
+ * Options for the chunking process
+ */
+export interface ChunkOptions {
+    /**
+     * Maximum size of each chunk in characters
+     * Defaults to LLM context window size (typically around 2048)
+     */
+    maxChunkSize?: number;
+
+    /**
+     * How much chunks should overlap to maintain context
+     */
+    overlapSize?: number;
+
+    /**
+     * Whether to respect sentence and paragraph boundaries
+     */
+    respectBoundaries?: boolean;
+
+    /**
+     * Whether to add metadata to chunks
+     */
+    includeMetadata?: boolean;
+
+    /**
+     * Additional information to include in chunk metadata
+     */
+    metadata?: Record<string, any>;
+}
+
+/**
+ * Default options for chunking
+ */
+const DEFAULT_CHUNK_OPTIONS: Required<ChunkOptions> = {
+    maxChunkSize: 1500,  // Characters per chunk
+    overlapSize: 100,    // Overlap between chunks
+    respectBoundaries: true,
+    includeMetadata: true,
+    metadata: {}
+};
+
+/**
+ * Chunk content into smaller pieces
+ * Used for processing large documents and preparing them for LLMs
+ */
+export function chunkContent(
+    content: string,
+    title: string = '',
+    noteId: string = '',
+    options: ChunkOptions = {}
+): ContentChunk[] {
+    // Merge provided options with defaults
+    const config: Required<ChunkOptions> = { ...DEFAULT_CHUNK_OPTIONS, ...options };
+
+    // If content is small enough, return as a single chunk
+    if (content.length <= config.maxChunkSize) {
+        return [{
+            content,
+            prefix: title,
+            noteId,
+            title,
+            metadata: config.metadata
+        }];
+    }
+
+    const chunks: ContentChunk[] = [];
+
+    if (config.respectBoundaries) {
+        // Try to split on paragraph boundaries first
+        const paragraphs = content.split(/\n\s*\n/);
+
+        let currentChunk = '';
+        let currentPrefix = title ? title : '';
+
+        for (const paragraph of paragraphs) {
+            // If adding this paragraph would exceed max size, create a new chunk
+            if (currentChunk.length + paragraph.length > config.maxChunkSize) {
+                // If current chunk is not empty, add it to chunks
+                if (currentChunk.length > 0) {
+                    chunks.push({
+                        content: currentChunk,
+                        prefix: currentPrefix,
+                        noteId,
+                        title,
+                        metadata: config.metadata
+                    });
+                }
+
+                // Start a new chunk, use the overlap if possible
+                if (config.overlapSize > 0 && currentChunk.length > 0) {
+                    // For overlap, take the last N characters
+                    const overlapText = currentChunk.slice(-config.overlapSize);
+                    currentChunk = overlapText + paragraph;
+                    currentPrefix = `${title} (continued)`;
+                } else {
+                    currentChunk = paragraph;
+                    currentPrefix = `${title} (continued)`;
+                }
+            } else {
+                // Add paragraph to current chunk
+                if (currentChunk.length > 0) {
+                    currentChunk += '\n\n';
+                }
+                currentChunk += paragraph;
+            }
+        }
+
+        // Add the last chunk if it's not empty
+        if (currentChunk.length > 0) {
+            chunks.push({
+                content: currentChunk,
+                prefix: currentPrefix,
+                noteId,
+                title,
+                metadata: config.metadata
+            });
+        }
+    } else {
+        // Simple chunking by character count
+        let currentPosition = 0;
+
+        while (currentPosition < content.length) {
+            const chunkEnd = Math.min(currentPosition + config.maxChunkSize, content.length);
+
+            const chunk = content.substring(currentPosition, chunkEnd);
+            const prefix = currentPosition === 0 ? title : `${title} (continued)`;
+
+            chunks.push({
+                content: chunk,
+                prefix,
+                noteId,
+                title,
+                metadata: config.metadata
+            });
+
+            // Move position, considering overlap
+            currentPosition = chunkEnd - (config.overlapSize || 0);
+
+            // Prevent infinite loop if overlap is too large
+            if (currentPosition <= 0 || currentPosition >= content.length) {
+                break;
+            }
+        }
+    }
+
+    return chunks;
+}
+
+/**
+ * Smarter chunking that tries to respect semantic boundaries like headers and sections
+ */
+export function semanticChunking(
+    content: string,
+    title: string = '',
+    noteId: string = '',
+    options: ChunkOptions = {}
+): ContentChunk[] {
+    // Merge provided options with defaults
+    const config: Required<ChunkOptions> = { ...DEFAULT_CHUNK_OPTIONS, ...options };
+
+    // If content is small enough, return as a single chunk
+    if (content.length <= config.maxChunkSize) {
+        return [{
+            content,
+            prefix: title,
+            noteId,
+            title,
+            metadata: config.metadata
+        }];
+    }
+
+    const chunks: ContentChunk[] = [];
+
+    // Try to split on headers first
+    const headerPattern = /#{1,6}\s+.+|<h[1-6][^>]*>.*?<\/h[1-6]>/g;
+    const sections = [];
+
+    let lastIndex = 0;
+    let match;
+
+    // First, find all headers and split content into sections
+    while ((match = headerPattern.exec(content)) !== null) {
+        if (match.index > lastIndex) {
+            // Add the content before this header
+            sections.push(content.substring(lastIndex, match.index));
+        }
+
+        // Start a new section with this header
+        lastIndex = match.index;
+    }
+
+    // Add the last section
+    if (lastIndex < content.length) {
+        sections.push(content.substring(lastIndex));
+    }
+
+    // If no headers were found, fall back to regular chunking
+    if (sections.length <= 1) {
+        return chunkContent(content, title, noteId, options);
+    }
+
+    // Process each section
+    let currentChunk = '';
+    let currentPrefix = title;
+
+    for (const section of sections) {
+        // If adding this section would exceed max size, create a new chunk
+        if (currentChunk.length + section.length > config.maxChunkSize) {
+            // If this single section is too big, it needs to be chunked further
+            if (section.length > config.maxChunkSize) {
+                // First add the current chunk if not empty
+                if (currentChunk.length > 0) {
+                    chunks.push({
+                        content: currentChunk,
+                        prefix: currentPrefix,
+                        noteId,
+                        title,
+                        metadata: config.metadata
+                    });
+                }
+
+                // Chunk this section separately
+                const sectionChunks = chunkContent(
+                    section,
+                    title,
+                    noteId,
+                    options
+                );
+
+                chunks.push(...sectionChunks);
+
+                // Reset current chunk
+                currentChunk = '';
+                currentPrefix = `${title} (continued)`;
+            } else {
+                // Add current chunk to chunks
+                chunks.push({
+                    content: currentChunk,
+                    prefix: currentPrefix,
+                    noteId,
+                    title,
+                    metadata: config.metadata
+                });
+
+                // Start a new chunk with this section
+                currentChunk = section;
+                currentPrefix = `${title} (continued)`;
+            }
+        } else {
+            // Add section to current chunk
+            if (currentChunk.length > 0 && !currentChunk.endsWith('\n')) {
+                currentChunk += '\n\n';
+            }
+            currentChunk += section;
+        }
+    }
+
+    // Add the last chunk if it's not empty
+    if (currentChunk.length > 0) {
+        chunks.push({
+            content: currentChunk,
+            prefix: currentPrefix,
+            noteId,
+            title,
+            metadata: config.metadata
+        });
+    }
+
+    return chunks;
+}
--- a/src/services/llm/context/code_handlers.ts
+++ b/src/services/llm/context/code_handlers.ts
@ -0,0 +1,433 @@
+/**
+ * Helper functions for processing code notes, including language detection and structure extraction
+ */
+
+/**
+ * Attempt to detect the programming language from code content or note attributes
+ */
+export function detectLanguage(content: string, mime: string): string {
+    // First check MIME type for hints
+    if (mime) {
+        const mimeLower = mime.toLowerCase();
+
+        // Map of mime types to language names
+        const mimeMap: {[key: string]: string} = {
+            'text/javascript': 'javascript',
+            'application/javascript': 'javascript',
+            'text/typescript': 'typescript',
+            'application/typescript': 'typescript',
+            'text/x-python': 'python',
+            'text/x-java': 'java',
+            'text/x-c': 'c',
+            'text/x-c++': 'cpp',
+            'text/x-csharp': 'csharp',
+            'text/x-go': 'go',
+            'text/x-ruby': 'ruby',
+            'text/x-php': 'php',
+            'text/x-rust': 'rust',
+            'text/x-swift': 'swift',
+            'text/x-kotlin': 'kotlin',
+            'text/x-scala': 'scala',
+            'text/x-perl': 'perl',
+            'text/x-lua': 'lua',
+            'text/x-r': 'r',
+            'text/x-dart': 'dart',
+            'text/html': 'html',
+            'text/css': 'css',
+            'application/json': 'json',
+            'application/xml': 'xml',
+            'text/markdown': 'markdown',
+            'text/yaml': 'yaml',
+            'text/x-sql': 'sql'
+        };
+
+        if (mimeMap[mimeLower]) {
+            return mimeMap[mimeLower];
+        }
+    }
+
+    // Check for common language patterns in the first few lines
+    const firstLines = content.split('\n').slice(0, 10).join('\n');
+
+    // Simple heuristics for common languages
+    if (firstLines.includes('<?php')) return 'php';
+    if (firstLines.includes('#!/usr/bin/python') || firstLines.includes('import ') && firstLines.includes('def ')) return 'python';
+    if (firstLines.includes('#!/bin/bash') || firstLines.includes('#!/usr/bin/bash')) return 'bash';
+    if (firstLines.includes('#!/usr/bin/perl')) return 'perl';
+    if (firstLines.includes('#!/usr/bin/ruby')) return 'ruby';
+    if (firstLines.includes('package ') && firstLines.includes('import ') && firstLines.includes('public class ')) return 'java';
+    if (firstLines.includes('using System;') && firstLines.includes('namespace ')) return 'csharp';
+    if (firstLines.includes('package main') && firstLines.includes('import (') && firstLines.includes('func ')) return 'go';
+    if (firstLines.includes('#include <') && (firstLines.includes('int main(') || firstLines.includes('void main('))) {
+        if (firstLines.includes('std::')) return 'cpp';
+        return 'c';
+    }
+    if (firstLines.includes('fn main()') && firstLines.includes('let ') && firstLines.includes('impl ')) return 'rust';
+    if (firstLines.includes('<!DOCTYPE html>') || firstLines.includes('<html>')) return 'html';
+    if (firstLines.includes('function ') && firstLines.includes('var ') && firstLines.includes('const ')) return 'javascript';
+    if (firstLines.includes('interface ') && firstLines.includes('export class ')) return 'typescript';
+    if (firstLines.includes('@Component') || firstLines.includes('import { Component }')) return 'typescript';
+
+    // Default to 'text' if language can't be determined
+    return 'text';
+}
+
+/**
+ * Extract structure from code to create a summary
+ */
+export function extractCodeStructure(content: string, language: string): string {
+    // Avoid processing very large code files
+    if (content.length > 100000) {
+        return "Code content too large for structure extraction";
+    }
+
+    let structure = "";
+
+    try {
+        switch (language.toLowerCase()) {
+            case 'javascript':
+            case 'typescript':
+                structure = extractJsStructure(content);
+                break;
+
+            case 'python':
+                structure = extractPythonStructure(content);
+                break;
+
+            case 'java':
+            case 'csharp':
+            case 'cpp':
+                structure = extractClassBasedStructure(content);
+                break;
+
+            case 'go':
+                structure = extractGoStructure(content);
+                break;
+
+            case 'rust':
+                structure = extractRustStructure(content);
+                break;
+
+            case 'html':
+                structure = extractHtmlStructure(content);
+                break;
+
+            default:
+                // For other languages, just return a summary of the file size and a few lines
+                const lines = content.split('\n');
+                structure = `Code file with ${lines.length} lines.\n`;
+
+                // Add first few non-empty lines that aren't comments
+                const firstCodeLines = lines.filter(line =>
+                    line.trim() !== '' &&
+                    !line.trim().startsWith('//') &&
+                    !line.trim().startsWith('#') &&
+                    !line.trim().startsWith('*') &&
+                    !line.trim().startsWith('<!--')
+                ).slice(0, 5);
+
+                if (firstCodeLines.length > 0) {
+                    structure += "First few code lines:\n" + firstCodeLines.join('\n');
+                }
+        }
+    } catch (e: any) {
+        return `Error extracting code structure: ${e.message}`;
+    }
+
+    return structure;
+}
+
+/**
+ * Extract structure from JavaScript/TypeScript code
+ */
+function extractJsStructure(content: string): string {
+    const lines = content.split('\n');
+    let structure = "";
+
+    // Look for imports/requires
+    const imports = lines.filter(line =>
+        line.trim().startsWith('import ') ||
+        line.includes('require(')
+    ).slice(0, 10);
+
+    if (imports.length > 0) {
+        structure += "Imports:\n" + imports.join('\n') + '\n\n';
+    }
+
+    // Look for class declarations
+    const classes = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.startsWith('class ') || line.includes(' class ')) {
+            classes.push(line);
+        }
+    }
+
+    if (classes.length > 0) {
+        structure += "Classes:\n" + classes.join('\n') + '\n\n';
+    }
+
+    // Look for function declarations
+    const functions = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.startsWith('function ') ||
+            line.match(/^(const|let|var)\s+\w+\s*=\s*function/) ||
+            line.match(/^(const|let|var)\s+\w+\s*=\s*\(/)) {
+            functions.push(line);
+        }
+    }
+
+    if (functions.length > 0) {
+        structure += "Functions:\n" + functions.slice(0, 15).join('\n');
+        if (functions.length > 15) {
+            structure += `\n... and ${functions.length - 15} more functions`;
+        }
+        structure += '\n\n';
+    }
+
+    return structure;
+}
+
+/**
+ * Extract structure from Python code
+ */
+function extractPythonStructure(content: string): string {
+    const lines = content.split('\n');
+    let structure = "";
+
+    // Look for imports
+    const imports = lines.filter(line =>
+        line.trim().startsWith('import ') ||
+        line.trim().startsWith('from ')
+    ).slice(0, 10);
+
+    if (imports.length > 0) {
+        structure += "Imports:\n" + imports.join('\n') + '\n\n';
+    }
+
+    // Look for class declarations
+    const classes = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.startsWith('class ')) {
+            classes.push(line);
+        }
+    }
+
+    if (classes.length > 0) {
+        structure += "Classes:\n" + classes.join('\n') + '\n\n';
+    }
+
+    // Look for function declarations
+    const functions = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.startsWith('def ')) {
+            functions.push(line);
+        }
+    }
+
+    if (functions.length > 0) {
+        structure += "Functions:\n" + functions.slice(0, 15).join('\n');
+        if (functions.length > 15) {
+            structure += `\n... and ${functions.length - 15} more functions`;
+        }
+        structure += '\n\n';
+    }
+
+    return structure;
+}
+
+/**
+ * Extract structure from class-based languages like Java, C#, C++
+ */
+function extractClassBasedStructure(content: string): string {
+    const lines = content.split('\n');
+    let structure = "";
+
+    // Look for package/namespace declarations
+    const packageLines = lines.filter(line =>
+        line.trim().startsWith('package ') ||
+        line.trim().startsWith('namespace ') ||
+        line.trim().startsWith('using ')
+    ).slice(0, 5);
+
+    if (packageLines.length > 0) {
+        structure += "Package/Imports:\n" + packageLines.join('\n') + '\n\n';
+    }
+
+    // Look for class declarations
+    const classes = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.match(/^(public|private|protected)?\s*(class|interface|enum)\s+\w+/)) {
+            classes.push(line);
+        }
+    }
+
+    if (classes.length > 0) {
+        structure += "Classes/Interfaces:\n" + classes.join('\n') + '\n\n';
+    }
+
+    // Look for method declarations
+    const methods = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.match(/^(public|private|protected)?\s*(static)?\s*[\w<>[\]]+\s+\w+\s*\(/)) {
+            methods.push(line);
+        }
+    }
+
+    if (methods.length > 0) {
+        structure += "Methods:\n" + methods.slice(0, 15).join('\n');
+        if (methods.length > 15) {
+            structure += `\n... and ${methods.length - 15} more methods`;
+        }
+        structure += '\n\n';
+    }
+
+    return structure;
+}
+
+/**
+ * Extract structure from Go code
+ */
+function extractGoStructure(content: string): string {
+    const lines = content.split('\n');
+    let structure = "";
+
+    // Look for package declarations
+    const packageLines = lines.filter(line => line.trim().startsWith('package ')).slice(0, 1);
+
+    if (packageLines.length > 0) {
+        structure += "Package:\n" + packageLines.join('\n') + '\n\n';
+    }
+
+    // Look for imports
+    const importStart = lines.findIndex(line => line.trim() === 'import (');
+    if (importStart !== -1) {
+        let importEnd = lines.findIndex((line, i) => i > importStart && line.trim() === ')');
+        if (importEnd !== -1) {
+            structure += "Imports:\n" + lines.slice(importStart, importEnd + 1).join('\n') + '\n\n';
+        }
+    }
+
+    // Look for type declarations (structs, interfaces)
+    const types = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.startsWith('type ') && (line.includes(' struct ') || line.includes(' interface '))) {
+            types.push(line);
+        }
+    }
+
+    if (types.length > 0) {
+        structure += "Types:\n" + types.join('\n') + '\n\n';
+    }
+
+    // Look for function declarations
+    const functions = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.startsWith('func ')) {
+            functions.push(line);
+        }
+    }
+
+    if (functions.length > 0) {
+        structure += "Functions:\n" + functions.slice(0, 15).join('\n');
+        if (functions.length > 15) {
+            structure += `\n... and ${functions.length - 15} more functions`;
+        }
+        structure += '\n\n';
+    }
+
+    return structure;
+}
+
+/**
+ * Extract structure from Rust code
+ */
+function extractRustStructure(content: string): string {
+    const lines = content.split('\n');
+    let structure = "";
+
+    // Look for module declarations
+    const moduleLines = lines.filter(line => line.trim().startsWith('mod ') || line.trim().startsWith('use ')).slice(0, 10);
+
+    if (moduleLines.length > 0) {
+        structure += "Modules/Imports:\n" + moduleLines.join('\n') + '\n\n';
+    }
+
+    // Look for struct/enum/trait declarations
+    const types = [];
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.startsWith('struct ') || line.startsWith('enum ') || line.startsWith('trait ')) {
+            types.push(line);
+        }
+    }
+
+    if (types.length > 0) {
+        structure += "Types:\n" + types.join('\n') + '\n\n';
+    }
+
+    // Look for function/impl declarations
+    const functions = [];
+    const impls = [];
+
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i].trim();
+        if (line.startsWith('fn ')) {
+            functions.push(line);
+        }
+        if (line.startsWith('impl ')) {
+            impls.push(line);
+        }
+    }
+
+    if (impls.length > 0) {
+        structure += "Implementations:\n" + impls.join('\n') + '\n\n';
+    }
+
+    if (functions.length > 0) {
+        structure += "Functions:\n" + functions.slice(0, 15).join('\n');
+        if (functions.length > 15) {
+            structure += `\n... and ${functions.length - 15} more functions`;
+        }
+        structure += '\n\n';
+    }
+
+    return structure;
+}
+
+/**
+ * Extract structure from HTML
+ */
+function extractHtmlStructure(content: string): string {
+    const lines = content.split('\n');
+
+    // Extract title
+    const titleMatch = content.match(/<title>(.*?)<\/title>/i);
+    const title = titleMatch ? titleMatch[1] : "No title";
+
+    // Count main elements
+    const headings = content.match(/<h[1-6].*?>.*?<\/h[1-6]>/gi) || [];
+    const divs = content.match(/<div.*?>/gi) || [];
+    const scripts = content.match(/<script.*?>.*?<\/script>/gis) || [];
+    const links = content.match(/<a.*?>.*?<\/a>/gi) || [];
+    const images = content.match(/<img.*?>/gi) || [];
+
+    // Extract some key elements
+    const structure = `HTML Document: "${title}"
+Document structure:
+- Contains ${headings.length} headings
+- Contains ${divs.length} div elements
+- Contains ${scripts.length} script blocks
+- Contains ${links.length} links
+- Contains ${images.length} images
+`;
+
+    return structure;
+}
--- a/src/services/llm/context/hierarchy.ts
+++ b/src/services/llm/context/hierarchy.ts
@ -0,0 +1,243 @@
+import becca from '../../../becca/becca.js';
+import { sanitizeHtmlContent } from './note_content.js';
+
+/**
+ * Get a list of parent notes for a given note
+ */
+export async function getParentNotes(noteId: string, maxParents: number = 5): Promise<{id: string, title: string}[]> {
+    const note = becca.getNote(noteId);
+
+    if (!note) {
+        return [];
+    }
+
+    try {
+        // Use Becca API to get parent branches and notes
+        const parentBranches = note.getParentBranches();
+
+        if (!parentBranches || parentBranches.length === 0) {
+            return [];
+        }
+
+        // Map to get parent notes, limiting to maxParents
+        const parentNotes = parentBranches
+            .slice(0, maxParents)
+            .map(branch => {
+                if (!branch.parentNote) {
+                    return null;
+                }
+
+                return {
+                    id: branch.parentNote.noteId,
+                    title: branch.parentNote.title
+                };
+            })
+            .filter(note => note !== null) as {id: string, title: string}[];
+
+        return parentNotes;
+    } catch (error) {
+        console.error(`Error getting parent notes for ${noteId}:`, error);
+        return [];
+    }
+}
+
+/**
+ * Get hierarchical context of parent notes
+ * This function builds a representation of the note hierarchy to provide context
+ */
+export async function getParentContext(
+    noteId: string,
+    maxDepth: number = 3,
+    maxParents: number = 3,
+    includeCurrentNote: boolean = true
+): Promise<string> {
+    // Note: getParentNotes has been updated to use Becca API
+    const note = becca.getNote(noteId);
+
+    if (!note) {
+        return "";
+    }
+
+    const visited = new Set<string>();
+    let context = "";
+
+    // Helper function to build the hierarchical context recursively
+    async function buildHierarchy(currentNoteId: string, depth: number, prefix: string = ""): Promise<void> {
+        if (depth > maxDepth || visited.has(currentNoteId)) {
+            return;
+        }
+
+        visited.add(currentNoteId);
+        const parentNotes = await getParentNotes(currentNoteId, maxParents);
+
+        for (const parent of parentNotes) {
+            // Add parent with proper indentation
+            context += `${prefix}- ${parent.title}\n`;
+
+            // Recursively add parents of this parent with increased indentation
+            await buildHierarchy(parent.id, depth + 1, prefix + "  ");
+        }
+    }
+
+    // Build the hierarchy starting from the current note
+    await buildHierarchy(noteId, 1);
+
+    // Add the current note at the end with appropriate indentation
+    if (includeCurrentNote) {
+        // Determine the indentation level based on hierarchy depth
+        let indentation = "";
+        if (context) {
+            // If we have parent context, add the current note with proper indentation
+            indentation = "  ".repeat(1); // One level deeper than parents
+            context += `${indentation}> ${note.title} (current note)\n`;
+        } else {
+            // If no parents, just add the current note
+            context += `> ${note.title} (current note)\n`;
+        }
+    }
+
+    if (!context) {
+        return "No parent context available.";
+    }
+
+    return context;
+}
+
+/**
+ * Get context from child notes
+ */
+export async function getChildContext(
+    noteId: string,
+    maxChildren: number = 10,
+    includeContent: boolean = false
+): Promise<string> {
+    const note = becca.getNote(noteId);
+
+    if (!note) {
+        return "";
+    }
+
+    try {
+        // Get child notes using Becca API
+        const childNotes = note.getChildNotes();
+
+        if (!childNotes || childNotes.length === 0) {
+            return "No child notes.";
+        }
+
+        let context = `Child notes (${childNotes.length} total):\n`;
+
+        // Limit the number of children included in context
+        const limitedChildren = childNotes.slice(0, maxChildren);
+
+        for (const childNote of limitedChildren) {
+            context += `- ${childNote.title}\n`;
+
+            // Optionally include a snippet of content
+            if (includeContent) {
+                try {
+                    const content = String(await childNote.getContent() || "");
+
+                    // Truncate and sanitize content
+                    const truncatedContent = sanitizeHtmlContent(content)
+                        .substring(0, 100)
+                        .trim()
+                        .replace(/\n/g, ' ');
+
+                    if (truncatedContent) {
+                        context += `  Summary: ${truncatedContent}${truncatedContent.length >= 100 ? '...' : ''}\n`;
+                    }
+                } catch (e) {
+                    // Silently skip content errors
+                }
+            }
+        }
+
+        // Add note about truncation if needed
+        if (childNotes.length > maxChildren) {
+            context += `... and ${childNotes.length - maxChildren} more child notes not shown\n`;
+        }
+
+        return context;
+    } catch (error) {
+        console.error(`Error getting child context for ${noteId}:`, error);
+        return "Error retrieving child notes.";
+    }
+}
+
+/**
+ * Get context from linked notes (relations)
+ */
+export async function getLinkedNotesContext(
+    noteId: string,
+    maxRelations: number = 10
+): Promise<string> {
+    const note = becca.getNote(noteId);
+
+    if (!note) {
+        return "";
+    }
+
+    try {
+        // Get all relations using Becca API
+        const relations = note.getRelations();
+
+        if (!relations || relations.length === 0) {
+            return "No linked notes.";
+        }
+
+        // Get incoming relations as well
+        const incomingRelations = note.getTargetRelations();
+
+        let context = "";
+
+        // Handle outgoing relations
+        if (relations.length > 0) {
+            context += `Outgoing relations (${relations.length} total):\n`;
+
+            // Limit the number of relations included in context
+            const limitedRelations = relations.slice(0, maxRelations);
+
+            for (const relation of limitedRelations) {
+                const targetNote = becca.getNote(relation.value || "");
+                if (targetNote) {
+                    const relationName = relation.name || 'relates to';
+                    context += `- ${relationName} → ${targetNote.title}\n`;
+                }
+            }
+
+            // Add note about truncation if needed
+            if (relations.length > maxRelations) {
+                context += `... and ${relations.length - maxRelations} more outgoing relations not shown\n`;
+            }
+        }
+
+        // Handle incoming relations
+        if (incomingRelations && incomingRelations.length > 0) {
+            if (context) context += "\n";
+
+            context += `Incoming relations (${incomingRelations.length} total):\n`;
+
+            // Limit the number of relations included in context
+            const limitedIncoming = incomingRelations.slice(0, maxRelations);
+
+            for (const relation of limitedIncoming) {
+                const sourceNote = becca.getNote(relation.value || "");
+                if (sourceNote) {
+                    const relationName = relation.name || 'relates to';
+                    context += `- ${sourceNote.title} → ${relationName}\n`;
+                }
+            }
+
+            // Add note about truncation if needed
+            if (incomingRelations.length > maxRelations) {
+                context += `... and ${incomingRelations.length - maxRelations} more incoming relations not shown\n`;
+            }
+        }
+
+        return context || "No linked notes.";
+    } catch (error) {
+        console.error(`Error getting linked notes context for ${noteId}:`, error);
+        return "Error retrieving linked notes.";
+    }
+}
--- a/src/services/llm/context/index.ts
+++ b/src/services/llm/context/index.ts
@ -0,0 +1,616 @@
+/**
+ * Context extraction module for LLM features
+ * Provides methods to extract relevant context from notes for LLM processing
+ */
+
+import becca from '../../../becca/becca.js';
+import { getNoteContent, formatNoteContent, sanitizeHtmlContent } from './note_content.js';
+import { detectLanguage, extractCodeStructure } from './code_handlers.js';
+import { chunkContent, semanticChunking } from './chunking.js';
+import type { ContentChunk, ChunkOptions } from './chunking.js';
+import { summarizeContent, extractKeyPoints } from './summarization.js';
+import { getParentNotes, getParentContext, getChildContext, getLinkedNotesContext } from './hierarchy.js';
+import { getSemanticContext } from './semantic_context.js';
+
+/**
+ * Options for context extraction
+ */
+export interface ContextOptions {
+    /**
+     * Include parent context
+     */
+    includeParents?: boolean;
+
+    /**
+     * Include child notes in context
+     */
+    includeChildren?: boolean;
+
+    /**
+     * Include linked notes in context
+     */
+    includeLinks?: boolean;
+
+    /**
+     * Include semantically similar notes
+     */
+    includeSimilar?: boolean;
+
+    /**
+     * Include note content in context
+     */
+    includeContent?: boolean;
+
+    /**
+     * Maximum depth for parent hierarchy
+     */
+    maxParentDepth?: number;
+
+    /**
+     * Maximum number of children to include
+     */
+    maxChildren?: number;
+
+    /**
+     * Maximum number of linked notes to include
+     */
+    maxLinks?: number;
+
+    /**
+     * Maximum number of similar notes to include
+     */
+    maxSimilarNotes?: number;
+
+    /**
+     * Maximum content length
+     */
+    maxContentLength?: number;
+}
+
+/**
+ * Default options for context extraction
+ */
+const DEFAULT_CONTEXT_OPTIONS: Required<ContextOptions> = {
+    includeParents: true,
+    includeChildren: true,
+    includeLinks: true,
+    includeSimilar: false,
+    includeContent: true,
+    maxParentDepth: 3,
+    maxChildren: 10,
+    maxLinks: 10,
+    maxSimilarNotes: 5,
+    maxContentLength: 2000
+};
+
+/**
+ * Context Extractor class
+ * Handles extraction of context from notes for LLM processing
+ */
+export class ContextExtractor {
+    /**
+     * Get content of a note
+     */
+    static async getNoteContent(noteId: string): Promise<string | null> {
+        return getNoteContent(noteId);
+    }
+
+    /**
+     * Get content of a note - instance method
+     */
+    async getNoteContent(noteId: string): Promise<string | null> {
+        return ContextExtractor.getNoteContent(noteId);
+    }
+
+    /**
+     * Format note content based on its type
+     */
+    static formatNoteContent(content: string, type: string, mime: string, title: string): string {
+        return formatNoteContent(content, type, mime, title);
+    }
+
+    /**
+     * Format note content based on its type - instance method
+     */
+    formatNoteContent(content: string, type: string, mime: string, title: string): string {
+        return ContextExtractor.formatNoteContent(content, type, mime, title);
+    }
+
+    /**
+     * Sanitize HTML content to plain text
+     */
+    static sanitizeHtmlContent(html: string): string {
+        return sanitizeHtmlContent(html);
+    }
+
+    /**
+     * Sanitize HTML content to plain text - instance method
+     */
+    sanitizeHtmlContent(html: string): string {
+        return ContextExtractor.sanitizeHtmlContent(html);
+    }
+
+    /**
+     * Detect programming language from content
+     */
+    static detectLanguage(content: string, mime: string): string {
+        return detectLanguage(content, mime);
+    }
+
+    /**
+     * Detect programming language from content - instance method
+     */
+    detectLanguage(content: string, mime: string): string {
+        return ContextExtractor.detectLanguage(content, mime);
+    }
+
+    /**
+     * Extract structure from code
+     */
+    static extractCodeStructure(content: string, language: string): string {
+        return extractCodeStructure(content, language);
+    }
+
+    /**
+     * Extract structure from code - instance method
+     */
+    extractCodeStructure(content: string, language: string): string {
+        return ContextExtractor.extractCodeStructure(content, language);
+    }
+
+    /**
+     * Chunk content into smaller pieces
+     */
+    static chunkContent(
+        content: string,
+        title: string = '',
+        noteId: string = '',
+        options: ChunkOptions = {}
+    ): ContentChunk[] {
+        return chunkContent(content, title, noteId, options);
+    }
+
+    /**
+     * Chunk content into smaller pieces - instance method
+     */
+    chunkContent(
+        content: string,
+        title: string = '',
+        noteId: string = '',
+        options: ChunkOptions = {}
+    ): ContentChunk[] {
+        return ContextExtractor.chunkContent(content, title, noteId, options);
+    }
+
+    /**
+     * Smarter chunking that respects semantic boundaries
+     */
+    static semanticChunking(
+        content: string,
+        title: string = '',
+        noteId: string = '',
+        options: ChunkOptions = {}
+    ): ContentChunk[] {
+        return semanticChunking(content, title, noteId, options);
+    }
+
+    /**
+     * Smarter chunking that respects semantic boundaries - instance method
+     */
+    semanticChunking(
+        content: string,
+        title: string = '',
+        noteId: string = '',
+        options: ChunkOptions = {}
+    ): ContentChunk[] {
+        return ContextExtractor.semanticChunking(content, title, noteId, options);
+    }
+
+    /**
+     * Summarize content
+     */
+    static summarizeContent(
+        content: string,
+        title: string = ''
+    ): string {
+        return summarizeContent(content, title);
+    }
+
+    /**
+     * Summarize content - instance method
+     */
+    summarizeContent(
+        content: string,
+        title: string = ''
+    ): string {
+        return ContextExtractor.summarizeContent(content, title);
+    }
+
+    /**
+     * Extract key points from content
+     */
+    static extractKeyPoints(
+        content: string,
+        maxPoints: number = 5
+    ): string[] {
+        return extractKeyPoints(content, maxPoints);
+    }
+
+    /**
+     * Extract key points from content - instance method
+     */
+    extractKeyPoints(
+        content: string,
+        maxPoints: number = 5
+    ): string[] {
+        return ContextExtractor.extractKeyPoints(content, maxPoints);
+    }
+
+    /**
+     * Get parent notes
+     */
+    static async getParentNotes(
+        noteId: string,
+        maxParents: number = 5
+    ): Promise<{id: string, title: string}[]> {
+        return getParentNotes(noteId, maxParents);
+    }
+
+    /**
+     * Get parent notes - instance method
+     */
+    async getParentNotes(
+        noteId: string,
+        maxParents: number = 5
+    ): Promise<{id: string, title: string}[]> {
+        return ContextExtractor.getParentNotes(noteId, maxParents);
+    }
+
+    /**
+     * Get hierarchical parent context
+     */
+    static async getParentContext(
+        noteId: string,
+        maxDepth: number = 3,
+        maxParents: number = 3
+    ): Promise<string> {
+        return getParentContext(noteId, maxDepth, maxParents);
+    }
+
+    /**
+     * Get hierarchical parent context - instance method
+     */
+    async getParentContext(
+        noteId: string,
+        maxDepth: number = 3,
+        maxParents: number = 3
+    ): Promise<string> {
+        return ContextExtractor.getParentContext(noteId, maxDepth, maxParents);
+    }
+
+    /**
+     * Get child context
+     */
+    static async getChildContext(
+        noteId: string,
+        maxChildren: number = 10,
+        includeContent: boolean = false
+    ): Promise<string> {
+        return getChildContext(noteId, maxChildren, includeContent);
+    }
+
+    /**
+     * Get child context - instance method
+     */
+    async getChildContext(
+        noteId: string,
+        maxChildren: number = 10,
+        includeContent: boolean = false
+    ): Promise<string> {
+        return ContextExtractor.getChildContext(noteId, maxChildren, includeContent);
+    }
+
+    /**
+     * Get linked notes context
+     */
+    static async getLinkedNotesContext(
+        noteId: string,
+        maxRelations: number = 10
+    ): Promise<string> {
+        return getLinkedNotesContext(noteId, maxRelations);
+    }
+
+    /**
+     * Get linked notes context - instance method
+     */
+    async getLinkedNotesContext(
+        noteId: string,
+        maxRelations: number = 10
+    ): Promise<string> {
+        return ContextExtractor.getLinkedNotesContext(noteId, maxRelations);
+    }
+
+    /**
+     * Get semantic context
+     */
+    static async getSemanticContext(
+        noteId: string,
+        maxSimilarNotesOrQuery: number | string = 5
+    ): Promise<string> {
+        // Handle both the new (number) and old (string query) parameter types
+        if (typeof maxSimilarNotesOrQuery === 'string') {
+            // Old API: The second parameter was a query string
+            // For backward compatibility, we'll still accept this
+            return getSemanticContext(noteId, { maxSimilarNotes: 5 });
+        } else {
+            // New API: The second parameter is maxSimilarNotes
+            return getSemanticContext(noteId, { maxSimilarNotes: maxSimilarNotesOrQuery });
+        }
+    }
+
+    /**
+     * Get semantic context - instance method
+     */
+    async getSemanticContext(
+        noteId: string,
+        maxSimilarNotesOrQuery: number | string = 5
+    ): Promise<string> {
+        return ContextExtractor.getSemanticContext(noteId, maxSimilarNotesOrQuery);
+    }
+
+    /**
+     * Extract full context for a note
+     * This combines various context sources based on provided options
+     */
+    static async extractContext(
+        noteId: string,
+        options: ContextOptions = {}
+    ): Promise<string> {
+        const config: Required<ContextOptions> = { ...DEFAULT_CONTEXT_OPTIONS, ...options };
+        const note = becca.getNote(noteId);
+
+        if (!note) {
+            return "Note not found.";
+        }
+
+        let context = `# Context for note: ${note.title}\n\n`;
+
+        // Include parent context
+        if (config.includeParents) {
+            const parentContext = await ContextExtractor.getParentContext(
+                noteId,
+                config.maxParentDepth,
+                3 // Default to 3 parents per level
+            );
+
+            if (parentContext) {
+                context += `## Parent Hierarchy\n${parentContext}\n\n`;
+            }
+        }
+
+        // Include note content
+        if (config.includeContent) {
+            const content = await ContextExtractor.getNoteContent(noteId);
+
+            if (content) {
+                // If content is too large, summarize it
+                let contentSection = '';
+
+                if (content.length > config.maxContentLength) {
+                    contentSection = ContextExtractor.summarizeContent(content, note.title);
+                    contentSection += "\n\n[Content summarized due to length]";
+                } else {
+                    contentSection = content;
+                }
+
+                context += `## Note Content\n${contentSection}\n\n`;
+            }
+        }
+
+        // Include child context
+        if (config.includeChildren) {
+            const childContext = await ContextExtractor.getChildContext(
+                noteId,
+                config.maxChildren,
+                false // Don't include child content by default
+            );
+
+            if (childContext && childContext !== "No child notes.") {
+                context += `## Child Notes\n${childContext}\n\n`;
+            }
+        }
+
+        // Include linked notes
+        if (config.includeLinks) {
+            const linkedContext = await ContextExtractor.getLinkedNotesContext(
+                noteId,
+                config.maxLinks
+            );
+
+            if (linkedContext && linkedContext !== "No linked notes.") {
+                context += `## Linked Notes\n${linkedContext}\n\n`;
+            }
+        }
+
+        // Include semantically similar notes
+        if (config.includeSimilar) {
+            const semanticContext = await ContextExtractor.getSemanticContext(
+                noteId,
+                config.maxSimilarNotes
+            );
+
+            if (semanticContext && !semanticContext.includes("No semantically similar notes found.")) {
+                context += `## Similar Notes\n${semanticContext}\n\n`;
+            }
+        }
+
+        return context;
+    }
+
+    /**
+     * Extract full context for a note - instance method
+     */
+    async extractContext(
+        noteId: string,
+        options: ContextOptions = {}
+    ): Promise<string> {
+        return ContextExtractor.extractContext(noteId, options);
+    }
+
+    /**
+     * Get progressively loaded context based on depth level
+     * This provides different levels of context detail depending on the depth parameter
+     *
+     * @param noteId - The ID of the note to get context for
+     * @param depth - Depth level (1-4) determining how much context to include
+     * @returns Context appropriate for the requested depth
+     */
+    static async getProgressiveContext(noteId: string, depth = 1): Promise<string> {
+        try {
+            // This requires the semantic context service to be available
+            // We're using a dynamic import to avoid circular dependencies
+            const { default: aiServiceManager } = await import('../ai_service_manager.js');
+            const semanticContext = aiServiceManager.getInstance().getSemanticContextService();
+
+            if (!semanticContext) {
+                return ContextExtractor.extractContext(noteId);
+            }
+
+            return await semanticContext.getProgressiveContext(noteId, depth);
+        } catch (error) {
+            // Fall back to regular context if progressive loading fails
+            console.error('Error in progressive context loading:', error);
+            return ContextExtractor.extractContext(noteId);
+        }
+    }
+
+    /**
+     * Get progressively loaded context based on depth level - instance method
+     */
+    async getProgressiveContext(noteId: string, depth = 1): Promise<string> {
+        return ContextExtractor.getProgressiveContext(noteId, depth);
+    }
+
+    /**
+     * Get smart context based on the query complexity
+     * This automatically selects the appropriate context depth and relevance
+     *
+     * @param noteId - The ID of the note to get context for
+     * @param query - The user's query for semantic relevance matching
+     * @returns The optimal context for answering the query
+     */
+    static async getSmartContext(noteId: string, query: string): Promise<string> {
+        try {
+            // This requires the semantic context service to be available
+            // We're using a dynamic import to avoid circular dependencies
+            const { default: aiServiceManager } = await import('../ai_service_manager.js');
+            const semanticContext = aiServiceManager.getInstance().getSemanticContextService();
+
+            if (!semanticContext) {
+                return ContextExtractor.extractContext(noteId);
+            }
+
+            return await semanticContext.getSmartContext(noteId, query);
+        } catch (error) {
+            // Fall back to regular context if smart context fails
+            console.error('Error in smart context selection:', error);
+            return ContextExtractor.extractContext(noteId);
+        }
+    }
+
+    /**
+     * Get smart context based on the query complexity - instance method
+     */
+    async getSmartContext(noteId: string, query: string): Promise<string> {
+        return ContextExtractor.getSmartContext(noteId, query);
+    }
+
+    /**
+     * Get the full context for a note, including parent hierarchy, content, and children
+     * Legacy method for backwards compatibility
+     */
+    static async getFullContext(noteId: string): Promise<string> {
+        // Use extractContext with default options
+        return ContextExtractor.extractContext(noteId);
+    }
+
+    /**
+     * Get the full context for a note - instance method
+     */
+    async getFullContext(noteId: string): Promise<string> {
+        return ContextExtractor.getFullContext(noteId);
+    }
+
+    /**
+     * Get note summary - for backward compatibility
+     */
+    static async getNoteSummary(noteId: string, maxLength = 5000): Promise<string> {
+        const note = becca.getNote(noteId);
+        if (!note) return '';
+
+        const content = await getNoteContent(noteId);
+        if (!content || content.length < maxLength) return content || '';
+
+        // For larger content, generate a summary
+        return summarizeContent(content, note.title);
+    }
+
+    /**
+     * Get note summary - instance method
+     */
+    async getNoteSummary(noteId: string, maxLength = 5000): Promise<string> {
+        return ContextExtractor.getNoteSummary(noteId, maxLength);
+    }
+
+    /**
+     * Split a large note into smaller, semantically meaningful chunks
+     * This is useful for handling large notes that exceed the context window of LLMs
+     * For backward compatibility
+     */
+    static async getChunkedNoteContent(noteId: string, maxChunkSize = 2000): Promise<string[]> {
+        const content = await getNoteContent(noteId);
+        if (!content) return [];
+
+        // Use the new chunking functionality
+        const chunks = chunkContent(
+            content,
+            '',
+            noteId,
+            { maxChunkSize, respectBoundaries: true }
+        );
+
+        // Convert to the old API format which was an array of strings
+        return chunks.map(chunk => chunk.content);
+    }
+
+    /**
+     * Split a large note into smaller chunks - instance method
+     */
+    async getChunkedNoteContent(noteId: string, maxChunkSize = 2000): Promise<string[]> {
+        return ContextExtractor.getChunkedNoteContent(noteId, maxChunkSize);
+    }
+}
+
+// Export all modules
+export {
+    getNoteContent,
+    formatNoteContent,
+    sanitizeHtmlContent,
+    detectLanguage,
+    extractCodeStructure,
+    chunkContent,
+    semanticChunking,
+    summarizeContent,
+    extractKeyPoints,
+    getParentNotes,
+    getParentContext,
+    getChildContext,
+    getLinkedNotesContext,
+    getSemanticContext
+};
+
+// Export types
+export type {
+    ContentChunk,
+    ChunkOptions
+};
--- a/src/services/llm/context/note_content.ts
+++ b/src/services/llm/context/note_content.ts
@ -0,0 +1,223 @@
+import sanitizeHtml from 'sanitize-html';
+import becca from '../../../becca/becca.js';
+
+/**
+ * Get the content of a note
+ */
+export async function getNoteContent(noteId: string): Promise<string | null> {
+    // Use Becca API to get note data
+    const note = becca.getNote(noteId);
+
+    if (!note) {
+        return null;
+    }
+
+    try {
+        // Get content using Becca API
+        const content = String(await note.getContent() || "");
+
+        return formatNoteContent(
+            content,
+            note.type,
+            note.mime,
+            note.title
+        );
+    } catch (error) {
+        console.error(`Error getting content for note ${noteId}:`, error);
+        return null;
+    }
+}
+
+/**
+ * Format the content of a note based on its type
+ * Enhanced with better handling for large and specialized content types
+ */
+export function formatNoteContent(content: string, type: string, mime: string, title: string): string {
+    let formattedContent = `# ${title}\n\n`;
+
+    switch (type) {
+        case 'text':
+            // Remove HTML formatting for text notes
+            formattedContent += sanitizeHtml(content);
+            break;
+
+        case 'code':
+            // For code, we'll handle this in code_handlers.ts
+            // Just use basic formatting here
+            formattedContent += '```\n' + content + '\n```';
+            break;
+
+        case 'canvas':
+            if (mime === 'application/json') {
+                try {
+                    // Parse JSON content
+                    const jsonContent = JSON.parse(content);
+
+                    // Extract text elements from canvas
+                    if (jsonContent.elements && Array.isArray(jsonContent.elements)) {
+                        const texts = jsonContent.elements
+                            .filter((element: any) => element.type === 'text' && element.text)
+                            .map((element: any) => element.text);
+
+                        formattedContent += 'Canvas content:\n' + texts.join('\n');
+                    } else {
+                        formattedContent += '[Empty canvas]';
+                    }
+                }
+                catch (e: any) {
+                    formattedContent += `[Error parsing canvas content: ${e.message}]`;
+                }
+            } else {
+                formattedContent += '[Canvas content]';
+            }
+            break;
+
+        case 'mindMap':
+            if (mime === 'application/json') {
+                try {
+                    // Parse JSON content
+                    const jsonContent = JSON.parse(content);
+
+                    // Extract node text from mind map
+                    const extractMindMapNodes = (node: any): string[] => {
+                        let texts: string[] = [];
+                        if (node.text) {
+                            texts.push(node.text);
+                        }
+                        if (node.children && Array.isArray(node.children)) {
+                            for (const child of node.children) {
+                                texts = texts.concat(extractMindMapNodes(child));
+                            }
+                        }
+                        return texts;
+                    };
+
+                    if (jsonContent.root) {
+                        formattedContent += 'Mind map content:\n' + extractMindMapNodes(jsonContent.root).join('\n');
+                    } else {
+                        formattedContent += '[Empty mind map]';
+                    }
+                }
+                catch (e: any) {
+                    formattedContent += `[Error parsing mind map content: ${e.message}]`;
+                }
+            } else {
+                formattedContent += '[Mind map content]';
+            }
+            break;
+
+        case 'relationMap':
+            if (mime === 'application/json') {
+                try {
+                    // Parse JSON content
+                    const jsonContent = JSON.parse(content);
+
+                    // Extract relation map entities and connections
+                    let result = 'Relation map content:\n';
+
+                    if (jsonContent.notes && Array.isArray(jsonContent.notes)) {
+                        result += 'Notes: ' + jsonContent.notes
+                            .map((note: any) => note.title || note.name)
+                            .filter(Boolean)
+                            .join(', ') + '\n';
+                    }
+
+                    if (jsonContent.relations && Array.isArray(jsonContent.relations)) {
+                        result += 'Relations: ' + jsonContent.relations
+                            .map((rel: any) => {
+                                const sourceNote = jsonContent.notes.find((n: any) => n.noteId === rel.sourceNoteId);
+                                const targetNote = jsonContent.notes.find((n: any) => n.noteId === rel.targetNoteId);
+                                const source = sourceNote ? (sourceNote.title || sourceNote.name) : 'unknown';
+                                const target = targetNote ? (targetNote.title || targetNote.name) : 'unknown';
+                                return `${source} → ${rel.name || ''} → ${target}`;
+                            })
+                            .join('; ');
+                    }
+
+                    formattedContent += result;
+                }
+                catch (e: any) {
+                    formattedContent += `[Error parsing relation map content: ${e.message}]`;
+                }
+            } else {
+                formattedContent += '[Relation map content]';
+            }
+            break;
+
+        case 'geoMap':
+            if (mime === 'application/json') {
+                try {
+                    // Parse JSON content
+                    const jsonContent = JSON.parse(content);
+
+                    let result = 'Geographic map content:\n';
+
+                    if (jsonContent.markers && Array.isArray(jsonContent.markers)) {
+                        if (jsonContent.markers.length > 0) {
+                            result += jsonContent.markers
+                                .map((marker: any) => {
+                                    return `Location: ${marker.title || ''} (${marker.lat}, ${marker.lng})${marker.description ? ' - ' + marker.description : ''}`;
+                                })
+                                .join('\n');
+                        } else {
+                            result += 'Empty geographic map';
+                        }
+                    } else {
+                        result += 'Empty geographic map';
+                    }
+
+                    formattedContent += result;
+                }
+                catch (e: any) {
+                    formattedContent += `[Error parsing geographic map content: ${e.message}]`;
+                }
+            } else {
+                formattedContent += '[Geographic map content]';
+            }
+            break;
+
+        case 'mermaid':
+            // Format mermaid diagrams as code blocks
+            formattedContent += '```mermaid\n' + content + '\n```';
+            break;
+
+        case 'image':
+        case 'file':
+            formattedContent += `[${type} attachment]`;
+            break;
+
+        default:
+            // For other notes, just use the content as is
+            formattedContent += sanitizeHtml(content);
+    }
+
+    return formattedContent;
+}
+
+/**
+ * Sanitize HTML content to plain text
+ */
+export function sanitizeHtmlContent(html: string): string {
+    if (!html) return '';
+
+    // Use sanitizeHtml to remove all HTML tags
+    let content = sanitizeHtml(html, {
+        allowedTags: [],
+        allowedAttributes: {},
+        textFilter: (text) => {
+            // Replace multiple newlines with a single one
+            return text.replace(/\n\s*\n/g, '\n\n');
+        }
+    });
+
+    // Additional cleanup for any remaining HTML entities
+    content = content
+        .replace(/&nbsp;/g, ' ')
+        .replace(/&lt;/g, '<')
+        .replace(/&gt;/g, '>')
+        .replace(/&amp;/g, '&')
+        .replace(/&quot;/g, '"')
+        .replace(/&#39;/g, "'");
+
+    return content;
+}
--- a/src/services/llm/context/semantic_context.ts
+++ b/src/services/llm/context/semantic_context.ts
@ -0,0 +1,225 @@
+/**
+ * Contains functions for semantic context extraction
+ * Uses more intelligent methods to determine relevant context
+ */
+
+import { sanitizeHtmlContent } from './note_content.js';
+import becca from '../../../becca/becca.js';
+import { getNoteContent } from './note_content.js';
+
+/**
+ * Options for semantic context extraction
+ */
+export interface SemanticContextOptions {
+    /**
+     * Maximum number of similar notes to include
+     */
+    maxSimilarNotes?: number;
+
+    /**
+     * Whether to include note content snippets
+     */
+    includeContent?: boolean;
+
+    /**
+     * Maximum length of content snippets
+     */
+    snippetLength?: number;
+
+    /**
+     * Minimum similarity score (0-1) to include a note
+     */
+    minSimilarity?: number;
+}
+
+/**
+ * Default options for semantic context extraction
+ */
+const DEFAULT_SEMANTIC_CONTEXT_OPTIONS: Required<SemanticContextOptions> = {
+    maxSimilarNotes: 5,
+    includeContent: true,
+    snippetLength: 200,
+    minSimilarity: 0.7
+};
+
+/**
+ * Retrieve semantically similar notes to provide context
+ * This is a simplified version without vector store integration
+ * Use vector_store for actual semantic search
+ */
+export async function getSemanticContext(
+    noteId: string,
+    options: SemanticContextOptions = {}
+): Promise<string> {
+    // Merge provided options with defaults
+    const config: Required<SemanticContextOptions> = {
+        ...DEFAULT_SEMANTIC_CONTEXT_OPTIONS,
+        ...options
+    };
+
+    try {
+        // Get the current note
+        const note = becca.getNote(noteId);
+
+        if (!note) {
+            return "Note not found.";
+        }
+
+        // Get note content for comparison
+        const noteContent = await getNoteContent(noteId);
+
+        if (!noteContent) {
+            return "No content available for similarity comparison.";
+        }
+
+        // Get potential related notes (simplified method)
+        // In real implementation, this would use vector_store.similarity methods
+        const relatedNotes = await findRelatedNotes(noteId, noteContent, config);
+
+        // Format the semantic context result
+        let context = `Semantically related notes to "${note.title}":\n\n`;
+
+        if (relatedNotes.length === 0) {
+            context += "No semantically similar notes found.";
+            return context;
+        }
+
+        // Add each related note to the context
+        for (const relatedNote of relatedNotes) {
+            context += `## ${relatedNote.title}\n`;
+
+            if (config.includeContent && relatedNote.snippet) {
+                context += `${relatedNote.snippet}\n\n`;
+            }
+        }
+
+        return context;
+    } catch (error) {
+        console.error(`Error getting semantic context for ${noteId}:`, error);
+        return "Error retrieving semantic context.";
+    }
+}
+
+/**
+ * Find related notes based on simple heuristics
+ * This is a placeholder for semantic search that would normally use vector embeddings
+ */
+async function findRelatedNotes(
+    noteId: string,
+    noteContent: string,
+    options: Required<SemanticContextOptions>
+): Promise<{ id: string, title: string, snippet: string | null, score: number }[]> {
+    const results: { id: string, title: string, snippet: string | null, score: number }[] = [];
+    const note = becca.getNote(noteId);
+
+    if (!note) {
+        return results;
+    }
+
+    // 1. Check siblings (notes with the same parent)
+    const parentBranches = note.getParentBranches();
+    const processedNotes = new Set<string>();
+    processedNotes.add(noteId); // Don't include the current note
+
+    // Process parent branches to find siblings
+    for (const branch of parentBranches) {
+        if (!branch.parentNote) {
+            continue;
+        }
+
+        const parentNote = branch.parentNote;
+        const siblingNotes = parentNote.getChildNotes().filter(n => n.noteId !== noteId);
+
+        for (const siblingNote of siblingNotes) {
+            if (processedNotes.has(siblingNote.noteId)) {
+                continue;
+            }
+
+            processedNotes.add(siblingNote.noteId);
+
+            const siblingContent = await getNoteContent(siblingNote.noteId);
+            if (!siblingContent) {
+                continue;
+            }
+
+            // Calculate a very simple similarity score
+            const score = calculateSimpleTextSimilarity(noteContent, siblingContent);
+
+            if (score >= options.minSimilarity) {
+                results.push({
+                    id: siblingNote.noteId,
+                    title: siblingNote.title,
+                    snippet: siblingContent.substring(0, options.snippetLength) + '...',
+                    score
+                });
+            }
+        }
+    }
+
+    // 2. Check notes connected by relations
+    const relations = note.getRelations();
+    for (const relation of relations) {
+        const targetNoteId = relation.value;
+
+        if (!targetNoteId || processedNotes.has(targetNoteId)) {
+            continue;
+        }
+
+        processedNotes.add(targetNoteId);
+
+        const targetNote = becca.getNote(targetNoteId);
+        if (!targetNote) {
+            continue;
+        }
+
+        const targetContent = await getNoteContent(targetNoteId);
+        if (!targetContent) {
+            continue;
+        }
+
+        // Relations are already semantically connected, so give them a boost
+        const score = calculateSimpleTextSimilarity(noteContent, targetContent) + 0.2;
+
+        results.push({
+            id: targetNoteId,
+            title: targetNote.title,
+            snippet: targetContent.substring(0, options.snippetLength) + '...',
+            score: Math.min(score, 1.0) // Cap at 1.0
+        });
+    }
+
+    // Sort by similarity score (highest first) and limit
+    return results
+        .sort((a, b) => b.score - a.score)
+        .slice(0, options.maxSimilarNotes);
+}
+
+/**
+ * Calculate a simple text similarity based on shared words
+ * This is a very basic implementation and should be replaced with actual embedding similarity
+ */
+function calculateSimpleTextSimilarity(text1: string, text2: string): number {
+    // Clean and tokenize the texts
+    const cleanText1 = sanitizeHtmlContent(text1).toLowerCase();
+    const cleanText2 = sanitizeHtmlContent(text2).toLowerCase();
+
+    // Get unique words (case insensitive)
+    const words1 = new Set(cleanText1.split(/\W+/).filter(w => w.length > 3));
+    const words2 = new Set(cleanText2.split(/\W+/).filter(w => w.length > 3));
+
+    // No meaningful comparison possible if either text has no significant words
+    if (words1.size === 0 || words2.size === 0) {
+        return 0;
+    }
+
+    // Count shared words
+    let sharedCount = 0;
+    for (const word of words1) {
+        if (words2.has(word)) {
+            sharedCount++;
+        }
+    }
+
+    // Jaccard similarity: intersection size / union size
+    return sharedCount / (words1.size + words2.size - sharedCount);
+}
--- a/src/services/llm/context/summarization.ts
+++ b/src/services/llm/context/summarization.ts
@ -0,0 +1,162 @@
+/**
+ * Contains functions for generating summaries of note content
+ * Used to provide concise context for LLM processing
+ */
+
+import { sanitizeHtmlContent } from './note_content.js';
+
+/**
+ * Options for summarization
+ */
+export interface SummarizationOptions {
+    /**
+     * Maximum length of the summary in characters
+     */
+    maxLength?: number;
+
+    /**
+     * Whether to include title in the summary
+     */
+    includeTitle?: boolean;
+
+    /**
+     * Minimum content length to trigger summarization
+     */
+    minContentLengthForSummarization?: number;
+}
+
+/**
+ * Default summarization options
+ */
+const DEFAULT_SUMMARIZATION_OPTIONS: Required<SummarizationOptions> = {
+    maxLength: 500,
+    includeTitle: true,
+    minContentLengthForSummarization: 1000
+};
+
+/**
+ * Summarize note content
+ * If the content is smaller than minContentLengthForSummarization, returns trimmed content
+ * This is a local implementation that doesn't require API calls
+ */
+export function summarizeContent(
+    content: string,
+    title: string = '',
+    options: SummarizationOptions = {}
+): string {
+    // Merge provided options with defaults
+    const config: Required<SummarizationOptions> = {
+        ...DEFAULT_SUMMARIZATION_OPTIONS,
+        ...options
+    };
+
+    // Clean up the content
+    const cleanedContent = sanitizeHtmlContent(content);
+
+    // If content is small enough, no need to summarize
+    if (cleanedContent.length < config.minContentLengthForSummarization) {
+        // Just truncate if needed
+        if (cleanedContent.length > config.maxLength) {
+            return cleanedContent.substring(0, config.maxLength) + '...';
+        }
+        return cleanedContent;
+    }
+
+    // Use local summarization
+    return generateLocalSummary(cleanedContent, config);
+}
+
+/**
+ * Generate a simple summary locally without using LLM API
+ */
+function generateLocalSummary(content: string, options: Required<SummarizationOptions>): string {
+    // Simple heuristic approach - extract first paragraph and some key sentences
+
+    // First, try to get the first paragraph that has reasonable length
+    const paragraphs = content.split(/\n\s*\n/);
+    let summary = '';
+
+    for (const paragraph of paragraphs) {
+        if (paragraph.length > 30 && !paragraph.startsWith('#') && !paragraph.startsWith('!')) {
+            summary = paragraph;
+            break;
+        }
+    }
+
+    // If no good paragraph found, use the first X characters
+    if (!summary) {
+        summary = content.substring(0, options.maxLength * 0.8);
+    }
+
+    // Truncate if too long
+    if (summary.length > options.maxLength) {
+        summary = summary.substring(0, options.maxLength) + '...';
+    }
+
+    return summary;
+}
+
+/**
+ * Extract key points from content
+ * Returns a bulleted list of key points
+ * This is a local implementation that doesn't require API calls
+ */
+export function extractKeyPoints(
+    content: string,
+    maxPoints: number = 5
+): string[] {
+    // Clean up the content
+    const cleanedContent = sanitizeHtmlContent(content);
+
+    // Use local extraction
+    return generateLocalKeyPoints(cleanedContent, maxPoints);
+}
+
+/**
+ * Generate key points locally without using LLM API
+ */
+function generateLocalKeyPoints(content: string, maxPoints: number): string[] {
+    // Simple approach - look for sentences that might contain key information
+    const sentences = content
+        .replace(/\n+/g, ' ')
+        .split(/[.!?]/)
+        .map(s => s.trim())
+        .filter(s => s.length > 20);
+
+    // Heuristics for important sentences - look for indicator phrases
+    const importanceMarkers = [
+        'important', 'key', 'significant', 'essential', 'critical',
+        'main', 'primary', 'crucial', 'vital', 'fundamental',
+        'in summary', 'to summarize', 'in conclusion', 'conclude',
+        'therefore', 'thus', 'consequently', 'as a result'
+    ];
+
+    // Score sentences based on potential importance
+    const scoredSentences = sentences.map(sentence => {
+        let score = 0;
+
+        // Sentences at the beginning or end are often important
+        if (sentences.indexOf(sentence) < sentences.length * 0.1) score += 3;
+        if (sentences.indexOf(sentence) > sentences.length * 0.9) score += 4;
+
+        // Check for importance markers
+        for (const marker of importanceMarkers) {
+            if (sentence.toLowerCase().includes(marker)) {
+                score += 2;
+            }
+        }
+
+        // Prefer medium-length sentences
+        if (sentence.length > 40 && sentence.length < 150) score += 2;
+
+        return { sentence, score };
+    });
+
+    // Sort by score and take top N
+    const topSentences = scoredSentences
+        .sort((a, b) => b.score - a.score)
+        .slice(0, maxPoints)
+        .map(item => item.sentence + '.');
+
+    return topSentences;
+}
--- a/src/services/llm/context_extractor.ts
+++ b/src/services/llm/context_extractor.ts
@ -1,871 +0,0 @@
-import sql from '../sql.js';
-import sanitizeHtml from 'sanitize-html';
-import becca from '../../becca/becca.js';
-
-/**
- * Utility class for extracting context from notes to provide to AI models
- * Enhanced with advanced capabilities for handling large notes and specialized content
- */
-export class ContextExtractor {
-    /**
-     * Get the content of a note
-     */
-    async getNoteContent(noteId: string): Promise<string | null> {
-        // Use Becca API to get note data
-        const note = becca.getNote(noteId);
-
-        if (!note) {
-            return null;
-        }
-
-        try {
-            // Get content using Becca API
-            const content = String(await note.getContent() || "");
-
-            return this.formatNoteContent(
-                content,
-                note.type,
-                note.mime,
-                note.title
-            );
-        } catch (error) {
-            console.error(`Error getting content for note ${noteId}:`, error);
-            return null;
-        }
-    }
-
-    /**
-     * Split a large note into smaller, semantically meaningful chunks
-     * This is useful for handling large notes that exceed the context window of LLMs
-     *
-     * @param noteId - The ID of the note to chunk
-     * @param maxChunkSize - Maximum size of each chunk in characters
-     * @returns Array of content chunks, or empty array if note not found
-     */
-    async getChunkedNoteContent(noteId: string, maxChunkSize = 2000): Promise<string[]> {
-        const content = await this.getNoteContent(noteId);
-        if (!content) return [];
-
-        // Split into semantic chunks (paragraphs, sections, etc.)
-        return this.splitContentIntoChunks(content, maxChunkSize);
-    }
-
-    /**
-     * Split text content into semantically meaningful chunks based on natural boundaries
-     * like paragraphs, headings, and code blocks
-     *
-     * @param content - The text content to split
-     * @param maxChunkSize - Maximum size of each chunk in characters
-     * @returns Array of content chunks
-     */
-    private splitContentIntoChunks(content: string, maxChunkSize: number): string[] {
-        // Look for semantic boundaries (headings, blank lines, etc.)
-        const headingPattern = /^(#+)\s+(.+)$/gm;
-        const codeBlockPattern = /```[\s\S]+?```/gm;
-
-        // Replace code blocks with placeholders to avoid splitting inside them
-        const codeBlocks: string[] = [];
-        let contentWithPlaceholders = content.replace(codeBlockPattern, (match) => {
-            const placeholder = `__CODE_BLOCK_${codeBlocks.length}__`;
-            codeBlocks.push(match);
-            return placeholder;
-        });
-
-        // Split content at headings and paragraphs
-        const sections: string[] = [];
-        let currentSection = '';
-
-        // First split by headings
-        const lines = contentWithPlaceholders.split('\n');
-        for (const line of lines) {
-            const isHeading = headingPattern.test(line);
-            headingPattern.lastIndex = 0; // Reset regex
-
-            // If this is a heading and we already have content, start a new section
-            if (isHeading && currentSection.trim().length > 0) {
-                sections.push(currentSection.trim());
-                currentSection = line;
-            } else {
-                currentSection += (currentSection ? '\n' : '') + line;
-            }
-        }
-
-        // Add the last section if there's any content
-        if (currentSection.trim().length > 0) {
-            sections.push(currentSection.trim());
-        }
-
-        // Now combine smaller sections to respect maxChunkSize
-        const chunks: string[] = [];
-        let currentChunk = '';
-
-        for (const section of sections) {
-            // If adding this section exceeds maxChunkSize and we already have content,
-            // finalize the current chunk and start a new one
-            if ((currentChunk + section).length > maxChunkSize && currentChunk.length > 0) {
-                chunks.push(currentChunk);
-                currentChunk = section;
-            } else {
-                currentChunk += (currentChunk ? '\n\n' : '') + section;
-            }
-        }
-
-        // Add the last chunk if there's any content
-        if (currentChunk.length > 0) {
-            chunks.push(currentChunk);
-        }
-
-        // Restore code blocks in all chunks
-        return chunks.map(chunk => {
-            return chunk.replace(/__CODE_BLOCK_(\d+)__/g, (_, index) => {
-                return codeBlocks[parseInt(index)];
-            });
-        });
-    }
-
-    /**
-     * Generate a summary of a note's content
-     * Useful for providing a condensed version of very large notes
-     *
-     * @param noteId - The ID of the note to summarize
-     * @param maxLength - Cut-off length to trigger summarization
-     * @returns Summary of the note or the original content if small enough
-     */
-    async getNoteSummary(noteId: string, maxLength = 5000): Promise<string> {
-        const content = await this.getNoteContent(noteId);
-        if (!content || content.length < maxLength) return content || '';
-
-        // For larger content, generate a summary
-        return this.summarizeContent(content);
-    }
-
-    /**
-     * Summarize content by extracting key information
-     * This uses a heuristic approach to find important sentences and paragraphs
-     *
-     * @param content - The content to summarize
-     * @returns A summarized version of the content
-     */
-    private summarizeContent(content: string): string {
-        // Extract title/heading if present
-        const titleMatch = content.match(/^# (.+)$/m);
-        const title = titleMatch ? titleMatch[1] : 'Untitled Note';
-
-        // Extract all headings for an outline
-        const headings: string[] = [];
-        const headingMatches = content.matchAll(/^(#+)\s+(.+)$/gm);
-        for (const match of headingMatches) {
-            const level = match[1].length;
-            const text = match[2];
-            headings.push(`${'  '.repeat(level-1)}- ${text}`);
-        }
-
-        // Extract first sentence of each paragraph for a summary
-        const paragraphs = content.split(/\n\s*\n/);
-        const firstSentences = paragraphs
-            .filter(p => p.trim().length > 0 && !p.trim().startsWith('#') && !p.trim().startsWith('```'))
-            .map(p => {
-                const sentenceMatch = p.match(/^[^.!?]+[.!?]/);
-                return sentenceMatch ? sentenceMatch[0].trim() : p.substring(0, Math.min(150, p.length)).trim() + '...';
-            })
-            .slice(0, 5); // Limit to 5 sentences
-
-        // Create the summary
-        let summary = `# Summary of: ${title}\n\n`;
-
-        if (headings.length > 0) {
-            summary += `## Document Outline\n${headings.join('\n')}\n\n`;
-        }
-
-        if (firstSentences.length > 0) {
-            summary += `## Key Points\n${firstSentences.map(s => `- ${s}`).join('\n')}\n\n`;
-        }
-
-        summary += `(Note: This is an automatically generated summary of a larger document with ${content.length} characters)`;
-
-        return summary;
-    }
-
-    /**
-     * Get a set of parent notes to provide hierarchical context
-     */
-    async getParentContext(noteId: string, maxDepth = 3): Promise<string> {
-        // Note: getParentNotes has already been updated to use Becca
-        const parents = await this.getParentNotes(noteId, maxDepth);
-        if (!parents.length) return '';
-
-        let context = 'Here is the hierarchical context for the current note:\n\n';
-
-        // Create a hierarchical view of the parents using indentation
-        // to show the proper parent-child relationship
-        let indentLevel = 0;
-        for (let i = 0; i < parents.length; i++) {
-            const parent = parents[i];
-            const indent = '  '.repeat(indentLevel);
-            context += `${indent}- ${parent.title}\n`;
-            indentLevel++;
-        }
-
-        // Now add the current note with proper indentation
-        const note = becca.getNote(noteId);
-        if (note) {
-            const indent = '  '.repeat(indentLevel);
-            context += `${indent}- ${note.title} (current note)\n`;
-        }
-
-        return context + '\n';
-    }
-
-    /**
-     * Get child notes to provide additional context
-     */
-    async getChildContext(noteId: string, maxChildren = 5): Promise<string> {
-        const note = becca.getNote(noteId);
-
-        if (!note) {
-            return '';
-        }
-
-        // Use Becca API to get child notes
-        const childNotes = note.getChildNotes();
-
-        if (!childNotes || childNotes.length === 0) {
-            return '';
-        }
-
-        let context = 'The current note has these child notes:\n\n';
-
-        // Limit to maxChildren
-        const childrenToShow = childNotes.slice(0, maxChildren);
-
-        for (const child of childrenToShow) {
-            context += `- ${child.title}\n`;
-        }
-
-        // If there are more children than we're showing, indicate that
-        if (childNotes.length > maxChildren) {
-            context += `\n(+ ${childNotes.length - maxChildren} more child notes)\n`;
-        }
-
-        return context + '\n';
-    }
-
-    /**
-     * Get notes linked to this note
-     */
-    async getLinkedNotesContext(noteId: string, maxLinks = 5): Promise<string> {
-        const note = becca.getNote(noteId);
-
-        if (!note) {
-            return '';
-        }
-
-        // Use Becca API to get relations
-        const relations = note.getRelations();
-
-        if (!relations || relations.length === 0) {
-            return '';
-        }
-
-        // Get the target notes from relations
-        const linkedNotes = relations
-            .map(relation => relation.targetNote)
-            .filter(note => note !== null && note !== undefined);
-
-        if (linkedNotes.length === 0) {
-            return '';
-        }
-
-        let context = 'This note has relationships with these notes:\n\n';
-
-        // Limit to maxLinks
-        const notesToShow = linkedNotes.slice(0, maxLinks);
-
-        for (const linked of notesToShow) {
-            context += `- ${linked.title}\n`;
-        }
-
-        // If there are more linked notes than we're showing, indicate that
-        if (linkedNotes.length > maxLinks) {
-            context += `\n(+ ${linkedNotes.length - maxLinks} more linked notes)\n`;
-        }
-
-        return context + '\n';
-    }
-
-    /**
-     * Format the content of a note based on its type
-     * Enhanced with better handling for large and specialized content types
-     */
-    private formatNoteContent(content: string, type: string, mime: string, title: string): string {
-        let formattedContent = `# ${title}\n\n`;
-
-        switch (type) {
-            case 'text':
-                // Remove HTML formatting for text notes
-                formattedContent += this.sanitizeHtml(content);
-                break;
-
-            case 'code':
-                // Improved code handling with language detection
-                const codeLanguage = this.detectCodeLanguage(content, mime);
-
-                // For large code files, extract structure rather than full content
-                if (content.length > 8000) {
-                    formattedContent += this.extractCodeStructure(content, codeLanguage);
-                } else {
-                    formattedContent += `\`\`\`${codeLanguage}\n${content}\n\`\`\``;
-                }
-                break;
-
-            case 'canvas':
-                if (mime === 'application/json') {
-                    try {
-                        // Parse JSON content
-                        const jsonContent = JSON.parse(content);
-
-                        // Extract text elements from canvas
-                        if (jsonContent.elements && Array.isArray(jsonContent.elements)) {
-                            const texts = jsonContent.elements
-                                .filter((element: any) => element.type === 'text' && element.text)
-                                .map((element: any) => element.text);
-
-                            formattedContent += 'Canvas content:\n' + texts.join('\n');
-                        } else {
-                            formattedContent += '[Empty canvas]';
-                        }
-                    }
-                    catch (e: any) {
-                        formattedContent += `[Error parsing canvas content: ${e.message}]`;
-                    }
-                } else {
-                    formattedContent += '[Canvas content]';
-                }
-                break;
-
-            case 'mindMap':
-                if (mime === 'application/json') {
-                    try {
-                        // Parse JSON content
-                        const jsonContent = JSON.parse(content);
-
-                        // Extract node text from mind map
-                        const extractMindMapNodes = (node: any): string[] => {
-                            let texts: string[] = [];
-                            if (node.text) {
-                                texts.push(node.text);
-                            }
-                            if (node.children && Array.isArray(node.children)) {
-                                for (const child of node.children) {
-                                    texts = texts.concat(extractMindMapNodes(child));
-                                }
-                            }
-                            return texts;
-                        };
-
-                        if (jsonContent.root) {
-                            formattedContent += 'Mind map content:\n' + extractMindMapNodes(jsonContent.root).join('\n');
-                        } else {
-                            formattedContent += '[Empty mind map]';
-                        }
-                    }
-                    catch (e: any) {
-                        formattedContent += `[Error parsing mind map content: ${e.message}]`;
-                    }
-                } else {
-                    formattedContent += '[Mind map content]';
-                }
-                break;
-
-            case 'relationMap':
-                if (mime === 'application/json') {
-                    try {
-                        // Parse JSON content
-                        const jsonContent = JSON.parse(content);
-
-                        // Extract relation map entities and connections
-                        let result = 'Relation map content:\n';
-
-                        if (jsonContent.notes && Array.isArray(jsonContent.notes)) {
-                            result += 'Notes: ' + jsonContent.notes
-                                .map((note: any) => note.title || note.name)
-                                .filter(Boolean)
-                                .join(', ') + '\n';
-                        }
-
-                        if (jsonContent.relations && Array.isArray(jsonContent.relations)) {
-                            result += 'Relations: ' + jsonContent.relations
-                                .map((rel: any) => {
-                                    const sourceNote = jsonContent.notes.find((n: any) => n.noteId === rel.sourceNoteId);
-                                    const targetNote = jsonContent.notes.find((n: any) => n.noteId === rel.targetNoteId);
-                                    const source = sourceNote ? (sourceNote.title || sourceNote.name) : 'unknown';
-                                    const target = targetNote ? (targetNote.title || targetNote.name) : 'unknown';
-                                    return `${source} → ${rel.name || ''} → ${target}`;
-                                })
-                                .join('; ');
-                        }
-
-                        formattedContent += result;
-                    }
-                    catch (e: any) {
-                        formattedContent += `[Error parsing relation map content: ${e.message}]`;
-                    }
-                } else {
-                    formattedContent += '[Relation map content]';
-                }
-                break;
-
-            case 'geoMap':
-                if (mime === 'application/json') {
-                    try {
-                        // Parse JSON content
-                        const jsonContent = JSON.parse(content);
-
-                        let result = 'Geographic map content:\n';
-
-                        if (jsonContent.markers && Array.isArray(jsonContent.markers)) {
-                            if (jsonContent.markers.length > 0) {
-                                result += jsonContent.markers
-                                    .map((marker: any) => {
-                                        return `Location: ${marker.title || ''} (${marker.lat}, ${marker.lng})${marker.description ? ' - ' + marker.description : ''}`;
-                                    })
-                                    .join('\n');
-                            } else {
-                                result += 'Empty geographic map';
-                            }
-                        } else {
-                            result += 'Empty geographic map';
-                        }
-
-                        formattedContent += result;
-                    }
-                    catch (e: any) {
-                        formattedContent += `[Error parsing geographic map content: ${e.message}]`;
-                    }
-                } else {
-                    formattedContent += '[Geographic map content]';
-                }
-                break;
-
-            case 'mermaid':
-                // Format mermaid diagrams as code blocks
-                formattedContent += '```mermaid\n' + content + '\n```';
-                break;
-
-            case 'image':
-            case 'file':
-                formattedContent += `[${type} attachment]`;
-                break;
-
-            default:
-                // For other notes, just use the content as is
-                formattedContent += this.sanitizeHtml(content);
-        }
-
-        return formattedContent;
-    }
-
-    /**
-     * Detect the programming language of code content
-     *
-     * @param content - The code content to analyze
-     * @param mime - MIME type (if available)
-     * @returns The detected language or empty string
-     */
-    private detectCodeLanguage(content: string, mime: string): string {
-        // First check if mime type provides a hint
-        if (mime) {
-            const mimeMap: Record<string, string> = {
-                'text/x-python': 'python',
-                'text/javascript': 'javascript',
-                'application/javascript': 'javascript',
-                'text/typescript': 'typescript',
-                'application/typescript': 'typescript',
-                'text/x-java': 'java',
-                'text/html': 'html',
-                'text/css': 'css',
-                'text/x-c': 'c',
-                'text/x-c++': 'cpp',
-                'text/x-csharp': 'csharp',
-                'text/x-go': 'go',
-                'text/x-ruby': 'ruby',
-                'text/x-php': 'php',
-                'text/x-swift': 'swift',
-                'text/x-rust': 'rust',
-                'text/markdown': 'markdown',
-                'text/x-sql': 'sql',
-                'text/x-yaml': 'yaml',
-                'application/json': 'json',
-                'text/x-shell': 'bash'
-            };
-
-            for (const [mimePattern, language] of Object.entries(mimeMap)) {
-                if (mime.includes(mimePattern)) {
-                    return language;
-                }
-            }
-        }
-
-        // Check for common language patterns in the content
-        const firstLines = content.split('\n', 20).join('\n');
-
-        const languagePatterns: Record<string, RegExp> = {
-            'python': /^(import\s+|from\s+\w+\s+import|def\s+\w+\s*\(|class\s+\w+\s*:)/m,
-            'javascript': /^(const\s+\w+\s*=|let\s+\w+\s*=|var\s+\w+\s*=|function\s+\w+\s*\(|import\s+.*from\s+)/m,
-            'typescript': /^(interface\s+\w+|type\s+\w+\s*=|class\s+\w+\s*{)/m,
-            'html': /^<!DOCTYPE html>|<html>|<head>|<body>/m,
-            'css': /^(\.\w+\s*{|\#\w+\s*{|@media|@import)/m,
-            'java': /^(public\s+class|import\s+java|package\s+)/m,
-            'cpp': /^(#include\s+<\w+>|namespace\s+\w+|void\s+\w+\s*\()/m,
-            'csharp': /^(using\s+System|namespace\s+\w+|public\s+class)/m,
-            'go': /^(package\s+\w+|import\s+\(|func\s+\w+\s*\()/m,
-            'ruby': /^(require\s+|class\s+\w+\s*<|def\s+\w+)/m,
-            'php': /^(<\?php|namespace\s+\w+|use\s+\w+)/m,
-            'sql': /^(SELECT|INSERT|UPDATE|DELETE|CREATE TABLE|ALTER TABLE)/im,
-            'bash': /^(#!\/bin\/sh|#!\/bin\/bash|function\s+\w+\s*\(\))/m,
-            'markdown': /^(#\s+|##\s+|###\s+|\*\s+|-\s+|>\s+)/m,
-            'json': /^({[\s\n]*"|[\s\n]*\[)/m,
-            'yaml': /^(---|\w+:\s+)/m
-        };
-
-        for (const [language, pattern] of Object.entries(languagePatterns)) {
-            if (pattern.test(firstLines)) {
-                return language;
-            }
-        }
-
-        // Default to empty string if we can't detect the language
-        return '';
-    }
-
-    /**
-     * Extract the structure of a code file rather than its full content
-     * Useful for providing high-level understanding of large code files
-     *
-     * @param content - The full code content
-     * @param language - The programming language
-     * @returns A structured representation of the code
-     */
-    private extractCodeStructure(content: string, language: string): string {
-        const lines = content.split('\n');
-        const maxLines = 8000;
-
-        // If it's not that much over the limit, just include the whole thing
-        if (lines.length <= maxLines * 1.2) {
-            return `\`\`\`${language}\n${content}\n\`\`\``;
-        }
-
-        // For large files, extract important structural elements based on language
-        let extractedStructure = '';
-        let importSection = '';
-        let classDefinitions = [];
-        let functionDefinitions = [];
-        let otherImportantLines = [];
-
-        // Extract imports/includes, class/function definitions based on language
-        if (['javascript', 'typescript', 'python', 'java', 'csharp'].includes(language)) {
-            // Find imports
-            for (let i = 0; i < Math.min(100, lines.length); i++) {
-                if (lines[i].match(/^(import|from|using|require|#include|package)\s+/)) {
-                    importSection += lines[i] + '\n';
-                }
-            }
-
-            // Find class definitions
-            for (let i = 0; i < lines.length; i++) {
-                if (lines[i].match(/^(class|interface|type)\s+\w+/)) {
-                    const endBracketLine = this.findMatchingEnd(lines, i, language);
-                    if (endBracketLine > i && endBracketLine <= i + 10) {
-                        // Include small class definitions entirely
-                        classDefinitions.push(lines.slice(i, endBracketLine + 1).join('\n'));
-                        i = endBracketLine;
-                    } else {
-                        // For larger classes, just show the definition and methods
-                        let className = lines[i];
-                        classDefinitions.push(className);
-
-                        // Look for methods in this class
-                        for (let j = i + 1; j < Math.min(endBracketLine, lines.length); j++) {
-                            if (lines[j].match(/^\s+(function|def|public|private|protected)\s+\w+/)) {
-                                classDefinitions.push('  ' + lines[j].trim());
-                            }
-                        }
-
-                        if (endBracketLine > 0 && endBracketLine < lines.length) {
-                            i = endBracketLine;
-                        }
-                    }
-                }
-            }
-
-            // Find function definitions not inside classes
-            for (let i = 0; i < lines.length; i++) {
-                if (lines[i].match(/^(function|def|const\s+\w+\s*=\s*\(|let\s+\w+\s*=\s*\(|var\s+\w+\s*=\s*\()/)) {
-                    functionDefinitions.push(lines[i]);
-                }
-            }
-        }
-
-        // Build the extracted structure
-        extractedStructure += `# Code Structure (${lines.length} lines total)\n\n`;
-
-        if (importSection) {
-            extractedStructure += "## Imports/Dependencies\n```" + language + "\n" + importSection + "```\n\n";
-        }
-
-        if (classDefinitions.length > 0) {
-            extractedStructure += "## Classes/Interfaces\n```" + language + "\n" + classDefinitions.join('\n\n') + "\n```\n\n";
-        }
-
-        if (functionDefinitions.length > 0) {
-            extractedStructure += "## Functions\n```" + language + "\n" + functionDefinitions.join('\n\n') + "\n```\n\n";
-        }
-
-        // Add beginning and end of the file for context
-        extractedStructure += "## Beginning of File\n```" + language + "\n" +
-            lines.slice(0, Math.min(50, lines.length)).join('\n') + "\n```\n\n";
-
-        if (lines.length > 100) {
-            extractedStructure += "## End of File\n```" + language + "\n" +
-                lines.slice(Math.max(0, lines.length - 50)).join('\n') + "\n```\n\n";
-        }
-
-        return extractedStructure;
-    }
-
-    /**
-     * Find the line number of the matching ending bracket/block
-     *
-     * @param lines - Array of code lines
-     * @param startLine - Starting line number
-     * @param language - Programming language
-     * @returns The line number of the matching end, or -1 if not found
-     */
-    private findMatchingEnd(lines: string[], startLine: number, language: string): number {
-        let depth = 0;
-        let inClass = false;
-
-        // Different languages have different ways to define blocks
-        if (['javascript', 'typescript', 'java', 'csharp', 'cpp'].includes(language)) {
-            // Curly brace languages
-            for (let i = startLine; i < lines.length; i++) {
-                const line = lines[i];
-                // Count opening braces
-                for (const char of line) {
-                    if (char === '{') depth++;
-                    if (char === '}') {
-                        depth--;
-                        if (depth === 0 && inClass) return i;
-                    }
-                }
-
-                // Check if this line contains the class declaration
-                if (i === startLine && line.includes('{')) {
-                    inClass = true;
-                } else if (i === startLine) {
-                    // If the first line doesn't have an opening brace, look at the next few lines
-                    if (i + 1 < lines.length && lines[i + 1].includes('{')) {
-                        inClass = true;
-                    }
-                }
-            }
-        } else if (language === 'python') {
-            // Indentation-based language
-            const baseIndentation = lines[startLine].match(/^\s*/)?.[0].length || 0;
-
-            for (let i = startLine + 1; i < lines.length; i++) {
-                // Skip empty lines
-                if (lines[i].trim() === '') continue;
-
-                const currentIndentation = lines[i].match(/^\s*/)?.[0].length || 0;
-
-                // If we're back to the same or lower indentation level, we've reached the end
-                if (currentIndentation <= baseIndentation) {
-                    return i - 1;
-                }
-            }
-        }
-
-        return -1;
-    }
-
-    /**
-     * Sanitize HTML content to plain text
-     */
-    private sanitizeHtml(html: string): string {
-        if (!html) return '';
-
-        // Use sanitizeHtml to remove all HTML tags
-        let content = sanitizeHtml(html, {
-            allowedTags: [],
-            allowedAttributes: {},
-            textFilter: (text) => {
-                // Replace multiple newlines with a single one
-                return text.replace(/\n\s*\n/g, '\n\n');
-            }
-        });
-
-        // Additional cleanup for any remaining HTML entities
-        content = content
-            .replace(/&nbsp;/g, ' ')
-            .replace(/&lt;/g, '<')
-            .replace(/&gt;/g, '>')
-            .replace(/&amp;/g, '&')
-            .replace(/&quot;/g, '"')
-            .replace(/&#39;/g, "'");
-
-        return content;
-    }
-
-    /**
-     * Get parent notes in the hierarchy
-     */
-    private async getParentNotes(noteId: string, maxDepth: number): Promise<{noteId: string, title: string}[]> {
-        const parentNotes: {noteId: string, title: string}[] = [];
-        const startNote = becca.getNote(noteId);
-
-        if (!startNote) {
-            return parentNotes;
-        }
-
-        // Use non-null assertion as we checked above
-        let currentNote: any = startNote;
-
-        for (let i = 0; i < maxDepth; i++) {
-            // Get parent branches (should be just one in most cases)
-            if (!currentNote) break;
-
-            const parentBranches: any[] = currentNote.getParentBranches();
-
-            if (!parentBranches || parentBranches.length === 0) {
-                break;
-            }
-
-            // Use the first parent branch
-            const branch: any = parentBranches[0];
-            if (!branch) break;
-
-            const parentNote: any = branch.getParentNote();
-
-            if (!parentNote || parentNote.noteId === 'root') {
-                break;
-            }
-
-            parentNotes.unshift({
-                noteId: parentNote.noteId,
-                title: parentNote.title
-            });
-
-            currentNote = parentNote;
-        }
-
-        return parentNotes;
-    }
-
-    /**
-     * Get the full context for a note, including parent hierarchy, content, and children
-     */
-    async getFullContext(noteId: string): Promise<string> {
-        const noteContent = await this.getNoteContent(noteId);
-        if (!noteContent) {
-            return 'Note not found';
-        }
-
-        const parentContext = await this.getParentContext(noteId);
-        const childContext = await this.getChildContext(noteId);
-        const linkedContext = await this.getLinkedNotesContext(noteId);
-
-        return [
-            parentContext,
-            noteContent,
-            childContext,
-            linkedContext
-        ].filter(Boolean).join('\n\n');
-    }
-
-    /**
-     * Get semantically ranked context based on semantic similarity to a query
-     * This method delegates to the semantic context service for the actual ranking
-     *
-     * @param noteId - The ID of the current note
-     * @param query - The user's query to compare against
-     * @param maxResults - Maximum number of related notes to include
-     * @returns Context with the most semantically relevant related notes
-     */
-    async getSemanticContext(noteId: string, query: string, maxResults = 5): Promise<string> {
-        try {
-            // This requires the semantic context service to be available
-            // We're using a dynamic import to avoid circular dependencies
-            const { default: aiServiceManager } = await import('./ai_service_manager.js');
-            const semanticContext = aiServiceManager.getInstance().getSemanticContextService();
-
-            if (!semanticContext) {
-                return this.getFullContext(noteId);
-            }
-
-            return await semanticContext.getSemanticContext(noteId, query, maxResults);
-        } catch (error) {
-            // Fall back to regular context if semantic ranking fails
-            console.error('Error in semantic context ranking:', error);
-            return this.getFullContext(noteId);
-        }
-    }
-
-    /**
-     * Get progressively loaded context based on depth level
-     * This provides different levels of context detail depending on the depth parameter
-     *
-     * @param noteId - The ID of the note to get context for
-     * @param depth - Depth level (1-4) determining how much context to include
-     * @returns Context appropriate for the requested depth
-     */
-    async getProgressiveContext(noteId: string, depth = 1): Promise<string> {
-        try {
-            // This requires the semantic context service to be available
-            // We're using a dynamic import to avoid circular dependencies
-            const { default: aiServiceManager } = await import('./ai_service_manager.js');
-            const semanticContext = aiServiceManager.getInstance().getSemanticContextService();
-
-            if (!semanticContext) {
-                return this.getFullContext(noteId);
-            }
-
-            return await semanticContext.getProgressiveContext(noteId, depth);
-        } catch (error) {
-            // Fall back to regular context if progressive loading fails
-            console.error('Error in progressive context loading:', error);
-            return this.getFullContext(noteId);
-        }
-    }
-
-    /**
-     * Get smart context based on the query complexity
-     * This automatically selects the appropriate context depth and relevance
-     *
-     * @param noteId - The ID of the note to get context for
-     * @param query - The user's query for semantic relevance matching
-     * @returns The optimal context for answering the query
-     */
-    async getSmartContext(noteId: string, query: string): Promise<string> {
-        try {
-            // This requires the semantic context service to be available
-            // We're using a dynamic import to avoid circular dependencies
-            const { default: aiServiceManager } = await import('./ai_service_manager.js');
-            const semanticContext = aiServiceManager.getInstance().getSemanticContextService();
-
-            if (!semanticContext) {
-                return this.getFullContext(noteId);
-            }
-
-            return await semanticContext.getSmartContext(noteId, query);
-        } catch (error) {
-            // Fall back to regular context if smart context fails
-            console.error('Error in smart context selection:', error);
-            return this.getFullContext(noteId);
-        }
-    }
-}
-
-// Singleton instance
-const contextExtractor = new ContextExtractor();
-export default contextExtractor;
--- a/src/services/llm/embeddings/vector_store.ts
+++ b/src/services/llm/embeddings/vector_store.ts
@ -412,7 +412,8 @@ export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbed
    try {
        // Use the enhanced context extractor for improved content extraction
        // We're using a dynamic import to avoid circular dependencies
-        const { default: contextExtractor } = await import('../../llm/context_extractor.js');
+        const { ContextExtractor } = await import('../../llm/context/index.js');
+        const contextExtractor = new ContextExtractor();

        // Get the content using the enhanced formatNoteContent method in context extractor
        const noteContent = await contextExtractor.getNoteContent(noteId);
@ -836,7 +837,8 @@ async function processNoteWithChunking(
 ): Promise<void> {
    try {
        // Get the context extractor dynamically to avoid circular dependencies
-        const { default: contextExtractor } = await import('../../llm/context_extractor.js');
+        const { ContextExtractor } = await import('../../llm/context/index.js');
+        const contextExtractor = new ContextExtractor();

        // Get chunks of the note content
        const chunks = await contextExtractor.getChunkedNoteContent(noteId);
--- a/src/services/llm/semantic_context_service.ts
+++ b/src/services/llm/semantic_context_service.ts
@ -1,4 +1,4 @@
-import contextExtractor from './context_extractor.js';
+import { ContextExtractor } from './context/index.js';
 import * as vectorStore from './embeddings/vector_store.js';
 import sql from '../sql.js';
 import { cosineSimilarity } from './embeddings/vector_store.js';
@ -58,6 +58,9 @@ import options from '../options.js';
 * knowledge bases when working with limited-context LLMs.
 */
 class SemanticContextService {
+    // Create an instance of ContextExtractor for backward compatibility
+    private contextExtractor = new ContextExtractor();
+
    /**
     * Get the preferred embedding provider based on user settings
     * Tries to use the most appropriate provider in this order:
@ -156,7 +159,7 @@ class SemanticContextService {

            if (!noteEmbedding) {
                // If note doesn't have an embedding yet, get content and generate one
-                const content = await contextExtractor.getNoteContent(note.noteId);
+                const content = await this.contextExtractor.getNoteContent(note.noteId);
                if (content && provider) {
                    try {
                        noteEmbedding = await provider.generateEmbeddings(content);
@ -225,7 +228,7 @@ class SemanticContextService {
        const mostRelevantNotes = rankedNotes.slice(0, maxResults);
        const relevantContent = await Promise.all(
            mostRelevantNotes.map(async note => {
-                const content = await contextExtractor.getNoteContent(note.noteId);
+                const content = await this.contextExtractor.getNoteContent(note.noteId);
                if (!content) return null;

                // Format with relevance score and title
@ -253,22 +256,22 @@ class SemanticContextService {
     */
    async getProgressiveContext(noteId: string, depth = 1): Promise<string> {
        // Start with the note content
-        const noteContent = await contextExtractor.getNoteContent(noteId);
+        const noteContent = await this.contextExtractor.getNoteContent(noteId);
        if (!noteContent) return 'Note not found';

        // If depth is 1, just return the note content
        if (depth <= 1) return noteContent;

        // Add parent context for depth >= 2
-        const parentContext = await contextExtractor.getParentContext(noteId);
+        const parentContext = await this.contextExtractor.getParentContext(noteId);
        if (depth <= 2) return `${parentContext}\n\n${noteContent}`;

        // Add child context for depth >= 3
-        const childContext = await contextExtractor.getChildContext(noteId);
+        const childContext = await this.contextExtractor.getChildContext(noteId);
        if (depth <= 3) return `${parentContext}\n\n${noteContent}\n\n${childContext}`;

        // Add linked notes for depth >= 4
-        const linkedContext = await contextExtractor.getLinkedNotesContext(noteId);
+        const linkedContext = await this.contextExtractor.getLinkedNotesContext(noteId);
        return `${parentContext}\n\n${noteContent}\n\n${childContext}\n\n${linkedContext}`;
    }