2025-03-28 22:50:15 +00:00
|
|
|
import type { Message } from '../ai_interface.js';
|
|
|
|
import { BaseMessageFormatter } from './base_formatter.js';
|
|
|
|
import sanitizeHtml from 'sanitize-html';
|
2025-03-28 23:07:02 +00:00
|
|
|
import { PROVIDER_PROMPTS, FORMATTING_PROMPTS } from '../constants/llm_prompt_constants.js';
|
2025-03-28 23:25:06 +00:00
|
|
|
import { LLM_CONSTANTS } from '../constants/provider_constants.js';
|
2025-04-01 19:33:53 +00:00
|
|
|
import {
|
|
|
|
HTML_ALLOWED_TAGS,
|
|
|
|
HTML_ALLOWED_ATTRIBUTES,
|
|
|
|
OLLAMA_CLEANING,
|
|
|
|
FORMATTER_LOGS
|
|
|
|
} from '../constants/formatter_constants.js';
|
2025-04-06 20:50:08 +00:00
|
|
|
import log from '../../log.js';
|
2025-03-28 22:50:15 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Ollama-specific message formatter
|
|
|
|
* Handles the unique requirements of the Ollama API
|
|
|
|
*/
|
|
|
|
export class OllamaMessageFormatter extends BaseMessageFormatter {
|
|
|
|
/**
|
|
|
|
* Maximum recommended context length for Ollama
|
|
|
|
* Smaller than other providers due to Ollama's handling of context
|
|
|
|
*/
|
2025-03-28 23:25:06 +00:00
|
|
|
private static MAX_CONTEXT_LENGTH = LLM_CONSTANTS.CONTEXT_WINDOW.OLLAMA;
|
2025-03-28 22:50:15 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Format messages for the Ollama API
|
2025-04-01 21:42:09 +00:00
|
|
|
* @param messages Messages to format
|
|
|
|
* @param systemPrompt Optional system prompt to use
|
|
|
|
* @param context Optional context to include
|
|
|
|
* @param preserveSystemPrompt When true, preserves existing system messages rather than replacing them
|
2025-03-28 22:50:15 +00:00
|
|
|
*/
|
2025-04-01 21:42:09 +00:00
|
|
|
formatMessages(messages: Message[], systemPrompt?: string, context?: string, preserveSystemPrompt?: boolean): Message[] {
|
2025-03-28 22:50:15 +00:00
|
|
|
const formattedMessages: Message[] = [];
|
|
|
|
|
2025-04-06 20:50:08 +00:00
|
|
|
// Log the input messages with all their properties
|
|
|
|
log.info(`Ollama formatter received ${messages.length} messages`);
|
|
|
|
messages.forEach((msg, index) => {
|
|
|
|
const msgKeys = Object.keys(msg);
|
|
|
|
log.info(`Message ${index} - role: ${msg.role}, keys: ${msgKeys.join(', ')}, content length: ${msg.content.length}`);
|
|
|
|
|
|
|
|
// Log special properties if present
|
|
|
|
if (msg.tool_calls) {
|
|
|
|
log.info(`Message ${index} has ${msg.tool_calls.length} tool_calls`);
|
|
|
|
}
|
|
|
|
if (msg.tool_call_id) {
|
|
|
|
log.info(`Message ${index} has tool_call_id: ${msg.tool_call_id}`);
|
|
|
|
}
|
|
|
|
if (msg.name) {
|
|
|
|
log.info(`Message ${index} has name: ${msg.name}`);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
// First identify user, system, and tool messages
|
2025-03-28 22:50:15 +00:00
|
|
|
const systemMessages = messages.filter(msg => msg.role === 'system');
|
2025-04-06 20:50:08 +00:00
|
|
|
const nonSystemMessages = messages.filter(msg => msg.role !== 'system');
|
2025-03-28 22:50:15 +00:00
|
|
|
|
2025-04-01 21:42:09 +00:00
|
|
|
// Determine if we should preserve the existing system message
|
|
|
|
if (preserveSystemPrompt && systemMessages.length > 0) {
|
|
|
|
// Preserve the existing system message
|
|
|
|
formattedMessages.push(systemMessages[0]);
|
2025-04-06 20:50:08 +00:00
|
|
|
log.info(`Preserving existing system message: ${systemMessages[0].content.substring(0, 50)}...`);
|
2025-04-01 21:42:09 +00:00
|
|
|
} else {
|
|
|
|
// Use provided systemPrompt or default
|
|
|
|
const basePrompt = systemPrompt || PROVIDER_PROMPTS.COMMON.DEFAULT_ASSISTANT_INTRO;
|
|
|
|
formattedMessages.push({
|
|
|
|
role: 'system',
|
|
|
|
content: basePrompt
|
|
|
|
});
|
2025-04-06 20:50:08 +00:00
|
|
|
log.info(`Using new system message: ${basePrompt.substring(0, 50)}...`);
|
2025-04-01 21:42:09 +00:00
|
|
|
}
|
2025-03-28 22:50:15 +00:00
|
|
|
|
|
|
|
// If we have context, inject it into the first user message
|
2025-04-06 20:50:08 +00:00
|
|
|
if (context && nonSystemMessages.length > 0) {
|
2025-03-28 22:50:15 +00:00
|
|
|
let injectedContext = false;
|
|
|
|
|
2025-04-06 20:50:08 +00:00
|
|
|
for (let i = 0; i < nonSystemMessages.length; i++) {
|
|
|
|
const msg = nonSystemMessages[i];
|
2025-03-28 22:50:15 +00:00
|
|
|
|
|
|
|
if (msg.role === 'user' && !injectedContext) {
|
|
|
|
// Simple context injection directly in the user's message
|
|
|
|
const cleanedContext = this.cleanContextContent(context);
|
2025-04-06 20:50:08 +00:00
|
|
|
log.info(`Injecting context (${cleanedContext.length} chars) into user message`);
|
2025-04-02 18:57:04 +00:00
|
|
|
|
2025-03-28 23:07:02 +00:00
|
|
|
const formattedContext = PROVIDER_PROMPTS.OLLAMA.CONTEXT_INJECTION(
|
|
|
|
cleanedContext,
|
|
|
|
msg.content
|
|
|
|
);
|
2025-03-28 22:50:15 +00:00
|
|
|
|
2025-04-06 20:50:08 +00:00
|
|
|
// Log what properties we're preserving
|
|
|
|
const msgKeys = Object.keys(msg);
|
|
|
|
const preservedKeys = msgKeys.filter(key => key !== 'role' && key !== 'content');
|
|
|
|
log.info(`Preserving additional properties in user message: ${preservedKeys.join(', ')}`);
|
2025-04-02 18:57:04 +00:00
|
|
|
|
2025-04-06 20:50:08 +00:00
|
|
|
// Create a new message with all original properties, but updated content
|
|
|
|
const newMessage = {
|
|
|
|
...msg, // Copy all properties
|
|
|
|
content: formattedContext // Override content with injected context
|
|
|
|
};
|
|
|
|
|
|
|
|
formattedMessages.push(newMessage);
|
|
|
|
log.info(`Created user message with context, final keys: ${Object.keys(newMessage).join(', ')}`);
|
2025-03-28 22:50:15 +00:00
|
|
|
|
|
|
|
injectedContext = true;
|
|
|
|
} else {
|
2025-04-06 20:50:08 +00:00
|
|
|
// For other messages, preserve all properties including any tool-related ones
|
|
|
|
log.info(`Preserving message with role ${msg.role}, keys: ${Object.keys(msg).join(', ')}`);
|
|
|
|
|
|
|
|
formattedMessages.push({
|
|
|
|
...msg // Copy all properties
|
|
|
|
});
|
2025-03-28 22:50:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// No context, just add all messages as-is
|
2025-04-06 20:50:08 +00:00
|
|
|
// Make sure to preserve all properties including tool_calls, tool_call_id, etc.
|
|
|
|
for (const msg of nonSystemMessages) {
|
|
|
|
log.info(`Adding message with role ${msg.role} without context injection, keys: ${Object.keys(msg).join(', ')}`);
|
|
|
|
formattedMessages.push({
|
|
|
|
...msg // Copy all properties
|
|
|
|
});
|
2025-03-28 22:50:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-04-06 20:50:08 +00:00
|
|
|
// Log the final formatted messages
|
|
|
|
log.info(`Ollama formatter produced ${formattedMessages.length} formatted messages`);
|
|
|
|
formattedMessages.forEach((msg, index) => {
|
|
|
|
const msgKeys = Object.keys(msg);
|
|
|
|
log.info(`Formatted message ${index} - role: ${msg.role}, keys: ${msgKeys.join(', ')}, content length: ${msg.content.length}`);
|
|
|
|
|
|
|
|
// Log special properties if present
|
|
|
|
if (msg.tool_calls) {
|
|
|
|
log.info(`Formatted message ${index} has ${msg.tool_calls.length} tool_calls`);
|
|
|
|
}
|
|
|
|
if (msg.tool_call_id) {
|
|
|
|
log.info(`Formatted message ${index} has tool_call_id: ${msg.tool_call_id}`);
|
|
|
|
}
|
|
|
|
if (msg.name) {
|
|
|
|
log.info(`Formatted message ${index} has name: ${msg.name}`);
|
|
|
|
}
|
|
|
|
});
|
2025-03-28 22:50:15 +00:00
|
|
|
|
|
|
|
return formattedMessages;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Clean up HTML and other problematic content before sending to Ollama
|
2025-04-02 18:57:04 +00:00
|
|
|
* Ollama needs a more aggressive cleaning than other models,
|
|
|
|
* but we want to preserve our XML tags for context
|
2025-03-28 22:50:15 +00:00
|
|
|
*/
|
|
|
|
override cleanContextContent(content: string): string {
|
|
|
|
if (!content) return '';
|
|
|
|
|
|
|
|
try {
|
2025-04-02 18:57:04 +00:00
|
|
|
// Store our XML tags so we can restore them after cleaning
|
|
|
|
const noteTagsRegex = /<\/?note>/g;
|
|
|
|
const notesTagsRegex = /<\/?notes>/g;
|
|
|
|
const queryTagsRegex = /<\/?query>[^<]*<\/query>/g;
|
|
|
|
|
|
|
|
// Capture tags to restore later
|
|
|
|
const noteTags = content.match(noteTagsRegex) || [];
|
|
|
|
const noteTagPositions: number[] = [];
|
|
|
|
let match;
|
|
|
|
const regex = /<\/?note>/g;
|
|
|
|
while ((match = regex.exec(content)) !== null) {
|
|
|
|
noteTagPositions.push(match.index);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remember the notes tags
|
|
|
|
const notesTagsMatch = content.match(notesTagsRegex) || [];
|
|
|
|
const notesTagPositions: number[] = [];
|
|
|
|
while ((match = notesTagsRegex.exec(content)) !== null) {
|
|
|
|
notesTagPositions.push(match.index);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remember the query tags
|
|
|
|
const queryTagsMatch = content.match(queryTagsRegex) || [];
|
|
|
|
|
|
|
|
// Temporarily replace XML tags with markers that won't be affected by sanitization
|
|
|
|
let modified = content
|
|
|
|
.replace(/<note>/g, '[NOTE_START]')
|
|
|
|
.replace(/<\/note>/g, '[NOTE_END]')
|
|
|
|
.replace(/<notes>/g, '[NOTES_START]')
|
|
|
|
.replace(/<\/notes>/g, '[NOTES_END]')
|
|
|
|
.replace(/<query>(.*?)<\/query>/g, '[QUERY]$1[/QUERY]');
|
|
|
|
|
2025-03-28 22:50:15 +00:00
|
|
|
// First use the parent class to do standard cleaning
|
2025-04-02 18:57:04 +00:00
|
|
|
let sanitized = super.cleanContextContent(modified);
|
2025-03-28 22:50:15 +00:00
|
|
|
|
|
|
|
// Then apply Ollama-specific aggressive cleaning
|
2025-04-02 18:57:04 +00:00
|
|
|
// Remove any remaining HTML using sanitizeHtml while keeping our markers
|
2025-03-28 22:50:15 +00:00
|
|
|
let plaintext = sanitizeHtml(sanitized, {
|
2025-04-01 19:33:53 +00:00
|
|
|
allowedTags: HTML_ALLOWED_TAGS.NONE,
|
|
|
|
allowedAttributes: HTML_ALLOWED_ATTRIBUTES.NONE,
|
2025-03-28 22:50:15 +00:00
|
|
|
textFilter: (text) => text
|
|
|
|
});
|
|
|
|
|
2025-04-01 19:33:53 +00:00
|
|
|
// Apply all Ollama-specific cleaning patterns
|
|
|
|
const ollamaPatterns = OLLAMA_CLEANING;
|
|
|
|
for (const pattern of Object.values(ollamaPatterns)) {
|
|
|
|
plaintext = plaintext.replace(pattern.pattern, pattern.replacement);
|
|
|
|
}
|
|
|
|
|
2025-04-02 18:57:04 +00:00
|
|
|
// Restore our XML tags
|
|
|
|
plaintext = plaintext
|
|
|
|
.replace(/\[NOTE_START\]/g, '<note>')
|
|
|
|
.replace(/\[NOTE_END\]/g, '</note>')
|
|
|
|
.replace(/\[NOTES_START\]/g, '<notes>')
|
|
|
|
.replace(/\[NOTES_END\]/g, '</notes>')
|
|
|
|
.replace(/\[QUERY\](.*?)\[\/QUERY\]/g, '<query>$1</query>');
|
|
|
|
|
2025-04-01 19:33:53 +00:00
|
|
|
return plaintext.trim();
|
2025-03-28 22:50:15 +00:00
|
|
|
} catch (error) {
|
2025-04-01 19:33:53 +00:00
|
|
|
console.error(FORMATTER_LOGS.ERROR.CONTEXT_CLEANING("Ollama"), error);
|
2025-03-28 22:50:15 +00:00
|
|
|
return content; // Return original if cleaning fails
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the maximum recommended context length for Ollama
|
|
|
|
*/
|
|
|
|
getMaxContextLength(): number {
|
|
|
|
return OllamaMessageFormatter.MAX_CONTEXT_LENGTH;
|
|
|
|
}
|
|
|
|
}
|