diff --git a/assets/agent/system_prompt.txt b/assets/agent/system_prompt.txt index 9ed8e02c..d2ed7b11 100644 --- a/assets/agent/system_prompt.txt +++ b/assets/agent/system_prompt.txt @@ -3,19 +3,26 @@ You are an expert ROM hacking assistant for The Legend of Zelda: A Link to the P Your task is to generate a sequence of z3ed CLI commands to achieve the user's request, or to answer questions about the ROM using available tools. # Output Format -You MUST respond with ONLY a JSON object with the following structure: +You MUST respond with ONLY a JSON object. NO other text before or after the JSON. + +**REQUIRED JSON SCHEMA:** +```json { - "text_response": "Your natural language reply to the user.", - "tool_calls": [{ "tool_name": "tool_name", "args": { "arg1": "value1" } }], - "commands": ["command1", "command2"], - "reasoning": "Your thought process." + "text_response": "string (your natural language reply)", + "tool_calls": [{"tool_name": "string", "args": {"key": "value"}}], + "commands": ["string array of z3ed commands"], + "reasoning": "string (your thought process)" } +``` + +**CRITICAL:** The field name is `"text_response"` NOT `"response"` NOT `"answer"` NOT anything else. # CRITICAL RULES: -1. If you previously called tools and received [TOOL RESULT], you MUST include text_response with your answer -2. NEVER send an empty text_response after receiving tool results +1. If you previously called tools and received [TOOL RESULT], you MUST include "text_response" with your answer +2. NEVER send an empty "text_response" after receiving tool results 3. NEVER call the same tool twice with the same arguments -4. If you have all the information needed to answer, provide text_response WITHOUT calling more tools +4. If you have all the information needed to answer, provide "text_response" WITHOUT calling more tools +5. The field name is `"text_response"` - this exact spelling is REQUIRED # Tool Calling Workflow (CRITICAL) diff --git a/src/cli/service/agent/conversational_agent_service.cc b/src/cli/service/agent/conversational_agent_service.cc index c28c4c67..8909a493 100644 --- a/src/cli/service/agent/conversational_agent_service.cc +++ b/src/cli/service/agent/conversational_agent_service.cc @@ -270,7 +270,12 @@ absl::StatusOr ConversationalAgentService::SendMessage( } // Add tool result with a clear marker for the LLM - std::string marked_output = "[TOOL RESULT] " + tool_output; + // Format as plain text to avoid confusing the LLM with nested JSON + std::string marked_output = absl::StrCat( + "[TOOL RESULT for ", tool_call.tool_name, "]\n", + "The tool returned the following data:\n", + tool_output, "\n\n", + "Please provide a text_response field in your JSON to summarize this information for the user."); history_.push_back( CreateMessage(ChatMessage::Sender::kUser, marked_output)); } diff --git a/src/cli/service/ai/ollama_ai_service.cc b/src/cli/service/ai/ollama_ai_service.cc index 5f36608f..fb996cbd 100644 --- a/src/cli/service/ai/ollama_ai_service.cc +++ b/src/cli/service/ai/ollama_ai_service.cc @@ -222,6 +222,13 @@ absl::StatusOr OllamaAIService::GenerateResponse( std::string llm_output = ollama_wrapper["response"].get(); + // Debug: Print raw LLM output when verbose mode is enabled + const char* verbose_env = std::getenv("Z3ED_VERBOSE"); + if (verbose_env && std::string(verbose_env) == "1") { + std::cout << "\n" << "\033[35m" << "🔍 Raw LLM Response:" << "\033[0m" << "\n" + << "\033[2m" << llm_output << "\033[0m" << "\n\n"; + } + // Parse the LLM's JSON response (the agent structure) nlohmann::json response_json; try {