mirror of
https://github.com/TriliumNext/Notes.git
synced 2025-08-10 02:02:29 +08:00
actually shows useful responses now
This commit is contained in:
parent
ef6ecdc42d
commit
c1585c73da
@ -141,13 +141,39 @@ export default class LlmChatPanel extends BasicWidget {
|
||||
};
|
||||
|
||||
// First, send the message via POST request
|
||||
await server.post<any>(`llm/sessions/${this.sessionId}/messages`, messageParams);
|
||||
const postResponse = await server.post<any>(`llm/sessions/${this.sessionId}/messages`, messageParams);
|
||||
|
||||
// If the POST request returned content directly, display it
|
||||
if (postResponse && postResponse.content) {
|
||||
this.addMessageToChat('assistant', postResponse.content);
|
||||
|
||||
// If there are sources, show them
|
||||
if (postResponse.sources && postResponse.sources.length > 0) {
|
||||
this.showSources(postResponse.sources);
|
||||
}
|
||||
|
||||
this.hideLoadingIndicator();
|
||||
return;
|
||||
}
|
||||
|
||||
// Then set up streaming via EventSource
|
||||
const streamUrl = `./api/llm/sessions/${this.sessionId}/messages?format=stream&useAdvancedContext=${useAdvancedContext}`;
|
||||
const source = new EventSource(streamUrl);
|
||||
|
||||
let assistantResponse = '';
|
||||
let receivedAnyContent = false;
|
||||
let timeoutId: number | null = null;
|
||||
|
||||
// Set a timeout to handle case where streaming doesn't work properly
|
||||
timeoutId = window.setTimeout(() => {
|
||||
if (!receivedAnyContent) {
|
||||
// If we haven't received any content after a reasonable timeout (10 seconds),
|
||||
// add a fallback message and close the stream
|
||||
this.hideLoadingIndicator();
|
||||
this.addMessageToChat('assistant', 'I\'m having trouble generating a response right now. Please try again later.');
|
||||
source.close();
|
||||
}
|
||||
}, 10000);
|
||||
|
||||
// Handle streaming response
|
||||
source.onmessage = (event) => {
|
||||
@ -155,13 +181,29 @@ export default class LlmChatPanel extends BasicWidget {
|
||||
// Stream completed
|
||||
source.close();
|
||||
this.hideLoadingIndicator();
|
||||
|
||||
// Clear the timeout since we're done
|
||||
if (timeoutId !== null) {
|
||||
window.clearTimeout(timeoutId);
|
||||
}
|
||||
|
||||
// If we didn't receive any content but the stream completed normally,
|
||||
// display a message to the user
|
||||
if (!receivedAnyContent) {
|
||||
this.addMessageToChat('assistant', 'I processed your request, but I don\'t have any specific information to share at the moment.');
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const data = JSON.parse(event.data);
|
||||
console.log("Received streaming data:", data); // Debug log
|
||||
|
||||
// Handle both content and error cases
|
||||
if (data.content) {
|
||||
receivedAnyContent = true;
|
||||
assistantResponse += data.content;
|
||||
|
||||
// Update the UI with the accumulated response
|
||||
const assistantElement = this.noteContextChatMessages.querySelector('.assistant-message:last-child .message-content');
|
||||
if (assistantElement) {
|
||||
@ -169,18 +211,38 @@ export default class LlmChatPanel extends BasicWidget {
|
||||
} else {
|
||||
this.addMessageToChat('assistant', assistantResponse);
|
||||
}
|
||||
// Scroll to the bottom
|
||||
this.chatContainer.scrollTop = this.chatContainer.scrollHeight;
|
||||
} else if (data.error) {
|
||||
// Handle error message
|
||||
this.hideLoadingIndicator();
|
||||
this.addMessageToChat('assistant', `Error: ${data.error}`);
|
||||
receivedAnyContent = true;
|
||||
source.close();
|
||||
|
||||
if (timeoutId !== null) {
|
||||
window.clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
// Scroll to the bottom
|
||||
this.chatContainer.scrollTop = this.chatContainer.scrollHeight;
|
||||
} catch (e) {
|
||||
console.error('Error parsing SSE message:', e);
|
||||
console.error('Error parsing SSE message:', e, 'Raw data:', event.data);
|
||||
}
|
||||
};
|
||||
|
||||
source.onerror = () => {
|
||||
source.close();
|
||||
this.hideLoadingIndicator();
|
||||
toastService.showError('Error connecting to the LLM service. Please try again.');
|
||||
|
||||
// Clear the timeout if there was an error
|
||||
if (timeoutId !== null) {
|
||||
window.clearTimeout(timeoutId);
|
||||
}
|
||||
|
||||
// Only show error message if we haven't received any content yet
|
||||
if (!receivedAnyContent) {
|
||||
this.addMessageToChat('assistant', 'Error connecting to the LLM service. Please try again.');
|
||||
}
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
|
@ -469,7 +469,7 @@ async function sendMessage(req: Request, res: Response) {
|
||||
|
||||
// Get the Accept header once at the start
|
||||
const acceptHeader = req.get('Accept');
|
||||
const isStreamingRequest = acceptHeader && acceptHeader.includes('text/event-stream');
|
||||
const isStreamingRequest = req.method === 'GET' && req.query.format === 'stream';
|
||||
|
||||
// For GET requests, ensure we have the format=stream parameter
|
||||
if (req.method === 'GET' && (!req.query.format || req.query.format !== 'stream')) {
|
||||
@ -580,46 +580,81 @@ async function sendMessage(req: Request, res: Response) {
|
||||
|
||||
// Process based on whether this is a streaming request
|
||||
if (isStreamingRequest) {
|
||||
// Set streaming headers once
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
// Flag to indicate we've handled the response directly
|
||||
// This lets the route handler know not to process the result
|
||||
(res as any).triliumResponseHandled = true;
|
||||
|
||||
let messageContent = '';
|
||||
|
||||
// Use the correct method name: generateChatCompletion
|
||||
const response = await service.generateChatCompletion(aiMessages, chatOptions);
|
||||
try {
|
||||
// Use the correct method name: generateChatCompletion
|
||||
const response = await service.generateChatCompletion(aiMessages, chatOptions);
|
||||
|
||||
// Handle streaming if the response includes a stream method
|
||||
if (response.stream) {
|
||||
await response.stream((chunk: { text: string; done: boolean }) => {
|
||||
if (chunk.text) {
|
||||
messageContent += chunk.text;
|
||||
res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
|
||||
}
|
||||
// Handle streaming if the response includes a stream method
|
||||
if (response.stream) {
|
||||
await response.stream((chunk: { text: string; done: boolean }) => {
|
||||
if (chunk.text) {
|
||||
messageContent += chunk.text;
|
||||
// Only write if the response hasn't finished
|
||||
if (!res.writableEnded) {
|
||||
res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk.done) {
|
||||
// Signal the end of the stream when done
|
||||
if (chunk.done) {
|
||||
// Signal the end of the stream when done, only if not already ended
|
||||
if (!res.writableEnded) {
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// If no streaming available, send the response as a single chunk
|
||||
messageContent = response.text;
|
||||
// Only write if the response hasn't finished
|
||||
if (!res.writableEnded) {
|
||||
res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
}
|
||||
|
||||
// Store the full response for the session
|
||||
aiResponse = messageContent;
|
||||
|
||||
// Store the assistant's response in the session
|
||||
session.messages.push({
|
||||
role: 'assistant',
|
||||
content: aiResponse,
|
||||
timestamp: new Date()
|
||||
});
|
||||
} else {
|
||||
// If no streaming available, send the response as a single chunk
|
||||
messageContent = response.text;
|
||||
res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
|
||||
// For streaming requests we don't return anything as we've already sent the response
|
||||
return null;
|
||||
} catch (streamingError: any) {
|
||||
// If streaming fails and we haven't sent a response yet, throw the error
|
||||
if (!res.headersSent) {
|
||||
throw streamingError;
|
||||
} else {
|
||||
// If headers were already sent, try to send an error event
|
||||
try {
|
||||
if (!res.writableEnded) {
|
||||
res.write(`data: ${JSON.stringify({ error: streamingError.message })}\n\n`);
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
} catch (e) {
|
||||
log.error(`Failed to write streaming error: ${e}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Store the full response for the session
|
||||
aiResponse = messageContent;
|
||||
|
||||
// Store the assistant's response in the session
|
||||
session.messages.push({
|
||||
role: 'assistant',
|
||||
content: aiResponse,
|
||||
timestamp: new Date()
|
||||
});
|
||||
} else {
|
||||
// Non-streaming approach for POST requests
|
||||
const response = await service.generateChatCompletion(aiMessages, chatOptions);
|
||||
@ -646,7 +681,7 @@ async function sendMessage(req: Request, res: Response) {
|
||||
} else {
|
||||
// Original approach - find relevant notes through direct embedding comparison
|
||||
const relevantNotes = await findRelevantNotes(
|
||||
content,
|
||||
messageContent,
|
||||
session.noteContext || null,
|
||||
5
|
||||
);
|
||||
@ -654,7 +689,7 @@ async function sendMessage(req: Request, res: Response) {
|
||||
sourceNotes = relevantNotes;
|
||||
|
||||
// Build context from relevant notes
|
||||
const context = buildContextFromNotes(relevantNotes, content);
|
||||
const context = buildContextFromNotes(relevantNotes, messageContent);
|
||||
|
||||
// Add system message with the context
|
||||
const contextMessage: Message = {
|
||||
@ -680,46 +715,81 @@ async function sendMessage(req: Request, res: Response) {
|
||||
};
|
||||
|
||||
if (isStreamingRequest) {
|
||||
// Set streaming headers once
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
// Flag to indicate we've handled the response directly
|
||||
// This lets the route handler know not to process the result
|
||||
(res as any).triliumResponseHandled = true;
|
||||
|
||||
let messageContent = '';
|
||||
|
||||
// Use the correct method name: generateChatCompletion
|
||||
const response = await service.generateChatCompletion(aiMessages, chatOptions);
|
||||
try {
|
||||
// Use the correct method name: generateChatCompletion
|
||||
const response = await service.generateChatCompletion(aiMessages, chatOptions);
|
||||
|
||||
// Handle streaming if the response includes a stream method
|
||||
if (response.stream) {
|
||||
await response.stream((chunk: { text: string; done: boolean }) => {
|
||||
if (chunk.text) {
|
||||
messageContent += chunk.text;
|
||||
res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
|
||||
}
|
||||
// Handle streaming if the response includes a stream method
|
||||
if (response.stream) {
|
||||
await response.stream((chunk: { text: string; done: boolean }) => {
|
||||
if (chunk.text) {
|
||||
messageContent += chunk.text;
|
||||
// Only write if the response hasn't finished
|
||||
if (!res.writableEnded) {
|
||||
res.write(`data: ${JSON.stringify({ content: chunk.text })}\n\n`);
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk.done) {
|
||||
// Signal the end of the stream when done
|
||||
if (chunk.done) {
|
||||
// Signal the end of the stream when done, only if not already ended
|
||||
if (!res.writableEnded) {
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// If no streaming available, send the response as a single chunk
|
||||
messageContent = response.text;
|
||||
// Only write if the response hasn't finished
|
||||
if (!res.writableEnded) {
|
||||
res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
}
|
||||
|
||||
// Store the full response for the session
|
||||
aiResponse = messageContent;
|
||||
|
||||
// Store the assistant's response in the session
|
||||
session.messages.push({
|
||||
role: 'assistant',
|
||||
content: aiResponse,
|
||||
timestamp: new Date()
|
||||
});
|
||||
} else {
|
||||
// If no streaming available, send the response as a single chunk
|
||||
messageContent = response.text;
|
||||
res.write(`data: ${JSON.stringify({ content: messageContent })}\n\n`);
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
|
||||
// For streaming requests we don't return anything as we've already sent the response
|
||||
return null;
|
||||
} catch (streamingError: any) {
|
||||
// If streaming fails and we haven't sent a response yet, throw the error
|
||||
if (!res.headersSent) {
|
||||
throw streamingError;
|
||||
} else {
|
||||
// If headers were already sent, try to send an error event
|
||||
try {
|
||||
if (!res.writableEnded) {
|
||||
res.write(`data: ${JSON.stringify({ error: streamingError.message })}\n\n`);
|
||||
res.write('data: [DONE]\n\n');
|
||||
res.end();
|
||||
}
|
||||
} catch (e) {
|
||||
log.error(`Failed to write streaming error: ${e}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Store the full response for the session
|
||||
aiResponse = messageContent;
|
||||
|
||||
// Store the assistant's response in the session
|
||||
session.messages.push({
|
||||
role: 'assistant',
|
||||
content: aiResponse,
|
||||
timestamp: new Date()
|
||||
});
|
||||
} else {
|
||||
// Non-streaming approach for POST requests
|
||||
const response = await service.generateChatCompletion(aiMessages, chatOptions);
|
||||
@ -744,6 +814,12 @@ async function sendMessage(req: Request, res: Response) {
|
||||
};
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If it's not a POST or streaming GET request, return the session's message history
|
||||
return {
|
||||
id: session.id,
|
||||
messages: session.messages
|
||||
};
|
||||
}
|
||||
} catch (error: any) {
|
||||
log.error(`Error sending message to LLM: ${error.message}`);
|
||||
|
@ -505,8 +505,14 @@ function route(method: HttpMethod, path: string, middleware: express.Handler[],
|
||||
}
|
||||
|
||||
function handleResponse(resultHandler: ApiResultHandler, req: express.Request, res: express.Response, result: unknown, start: number) {
|
||||
const responseLength = resultHandler(req, res, result);
|
||||
// Skip result handling if the response has already been handled
|
||||
if ((res as any).triliumResponseHandled) {
|
||||
// Just log the request without additional processing
|
||||
log.request(req, res, Date.now() - start, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const responseLength = resultHandler(req, res, result);
|
||||
log.request(req, res, Date.now() - start, responseLength);
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,10 @@ export class OllamaService extends BaseAIService {
|
||||
// Format messages for Ollama
|
||||
const formattedMessages = this.formatMessages(messages, systemPrompt);
|
||||
|
||||
// Log the formatted messages for debugging
|
||||
console.log('Input messages for formatting:', messages);
|
||||
console.log('Formatted messages for Ollama:', formattedMessages);
|
||||
|
||||
try {
|
||||
const endpoint = `${baseUrl.replace(/\/+$/, '')}/api/chat`;
|
||||
|
||||
@ -75,6 +79,7 @@ export class OllamaService extends BaseAIService {
|
||||
const reader = response.body.getReader();
|
||||
let fullText = "";
|
||||
let partialLine = "";
|
||||
let receivedAnyContent = false;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
@ -102,6 +107,7 @@ export class OllamaService extends BaseAIService {
|
||||
const newContent = data.message.content;
|
||||
// Add to full text
|
||||
fullText += newContent;
|
||||
receivedAnyContent = true;
|
||||
// Call the callback with the new content
|
||||
await callback({
|
||||
text: newContent,
|
||||
@ -110,6 +116,18 @@ export class OllamaService extends BaseAIService {
|
||||
}
|
||||
|
||||
if (data.done) {
|
||||
// If we received an empty response with done=true,
|
||||
// generate a fallback response
|
||||
if (!receivedAnyContent && fullText.trim() === "") {
|
||||
// Generate a fallback response
|
||||
const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
|
||||
await callback({
|
||||
text: fallbackText,
|
||||
done: false
|
||||
});
|
||||
fullText = fallbackText;
|
||||
}
|
||||
|
||||
// Final message in the stream
|
||||
await callback({
|
||||
text: "",
|
||||
@ -136,19 +154,64 @@ export class OllamaService extends BaseAIService {
|
||||
const data = JSON.parse(partialLine.trim());
|
||||
if (data.message && data.message.content) {
|
||||
fullText += data.message.content;
|
||||
receivedAnyContent = true;
|
||||
await callback({
|
||||
text: data.message.content,
|
||||
done: false
|
||||
});
|
||||
}
|
||||
|
||||
if (data.done) {
|
||||
// Check for empty responses
|
||||
if (!receivedAnyContent && fullText.trim() === "") {
|
||||
// Generate a fallback response
|
||||
const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
|
||||
await callback({
|
||||
text: fallbackText,
|
||||
done: false
|
||||
});
|
||||
fullText = fallbackText;
|
||||
}
|
||||
|
||||
await callback({
|
||||
text: "",
|
||||
done: true,
|
||||
usage: {
|
||||
promptTokens: data.prompt_eval_count || 0,
|
||||
completionTokens: data.eval_count || 0,
|
||||
totalTokens: (data.prompt_eval_count || 0) + (data.eval_count || 0)
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Error parsing final JSON from Ollama stream:", err);
|
||||
console.error("Error parsing JSON from last line:", err, "Line:", partialLine);
|
||||
}
|
||||
}
|
||||
|
||||
// If we reached the end without a done message and without any content
|
||||
if (!receivedAnyContent && fullText.trim() === "") {
|
||||
// Generate a fallback response
|
||||
const fallbackText = "I've processed your request but don't have a specific response for you at this time.";
|
||||
await callback({
|
||||
text: fallbackText,
|
||||
done: false
|
||||
});
|
||||
|
||||
// Final message
|
||||
await callback({
|
||||
text: "",
|
||||
done: true,
|
||||
usage: {
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return fullText;
|
||||
} catch (err) {
|
||||
console.error("Error reading Ollama stream:", err);
|
||||
console.error("Error processing Ollama stream:", err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user