feat: Enhance AI agent capabilities with new tool calling instructions, improved response handling, and terminal color utilities

2025-10-04 03:04:22 -04:00
parent 06dcffb6ac
commit 2931634837
10 changed files with 562 additions and 15 deletions
--- a/src/cli/service/agent/conversational_agent_service.cc
+++ b/src/cli/service/agent/conversational_agent_service.cc
@@ -11,6 +11,7 @@
 #include "absl/strings/str_join.h"
 #include "absl/time/clock.h"
 #include "cli/service/ai/service_factory.h"
+#include "cli/util/terminal_colors.h"
 #include "nlohmann/json.hpp"

 namespace yaze {
@@ -174,9 +175,23 @@ absl::StatusOr<ChatMessage> ConversationalAgentService::SendMessage(
  }

  constexpr int kMaxToolIterations = 4;
+  bool waiting_for_text_response = false;
+  
  for (int iteration = 0; iteration < kMaxToolIterations; ++iteration) {
+    // Show loading indicator while waiting for AI response
+    util::LoadingIndicator loader(
+        waiting_for_text_response 
+            ? "Generating final response..." 
+            : "Thinking...", 
+        true);
+    loader.Start();
+    
    auto response_or = ai_service_->GenerateResponse(history_);
+    loader.Stop();
+    
    if (!response_or.ok()) {
+      util::PrintError(absl::StrCat(
+          "Failed to get AI response: ", response_or.status().message()));
      return absl::InternalError(absl::StrCat(
          "Failed to get AI response: ", response_or.status().message()));
    }
@@ -184,28 +199,61 @@ absl::StatusOr<ChatMessage> ConversationalAgentService::SendMessage(
    const auto& agent_response = response_or.value();

    if (!agent_response.tool_calls.empty()) {
+      // Check if we were waiting for a text response but got more tool calls instead
+      if (waiting_for_text_response) {
+        util::PrintWarning(
+            absl::StrCat("LLM called tools again instead of providing final response (Iteration: ",
+                        iteration, "/", kMaxToolIterations, ")"));
+      }
+      
      bool executed_tool = false;
      for (const auto& tool_call : agent_response.tool_calls) {
+        // Format tool arguments for display
+        std::vector<std::string> arg_parts;
+        for (const auto& [key, value] : tool_call.args) {
+          arg_parts.push_back(absl::StrCat(key, "=", value));
+        }
+        std::string args_str = absl::StrJoin(arg_parts, ", ");
+        
+        util::PrintToolCall(tool_call.tool_name, args_str);
+        
        auto tool_result_or = tool_dispatcher_.Dispatch(tool_call);
        if (!tool_result_or.ok()) {
+          util::PrintError(absl::StrCat(
+              "Tool execution failed: ", tool_result_or.status().message()));
          return absl::InternalError(absl::StrCat(
              "Tool execution failed: ", tool_result_or.status().message()));
        }

        const std::string& tool_output = tool_result_or.value();
        if (!tool_output.empty()) {
+          util::PrintSuccess("Tool executed successfully");
+          // Add tool result with a clear marker for the LLM
+          std::string marked_output = "[TOOL RESULT] " + tool_output;
          history_.push_back(
-              CreateMessage(ChatMessage::Sender::kAgent, tool_output));
+              CreateMessage(ChatMessage::Sender::kUser, marked_output));
        }
        executed_tool = true;
      }

      if (executed_tool) {
+        // Now we're waiting for the LLM to provide a text response
+        waiting_for_text_response = true;
        // Re-query the AI with updated context.
        continue;
      }
    }

+    // Check if we received a text response after tool execution
+    if (waiting_for_text_response && agent_response.text_response.empty() && 
+        agent_response.commands.empty()) {
+      util::PrintWarning(
+          absl::StrCat("LLM did not provide text_response after receiving tool results (Iteration: ",
+                      iteration, "/", kMaxToolIterations, ")"));
+      // Continue to give it another chance
+      continue;
+    }
+
    std::string response_text = agent_response.text_response;
    if (!agent_response.reasoning.empty()) {
      if (!response_text.empty()) {
--- a/src/cli/service/ai/ai_service.cc
+++ b/src/cli/service/ai/ai_service.cc
@@ -110,8 +110,7 @@ absl::StatusOr<AgentResponse> MockAIService::GenerateResponse(
  }

  response.text_response =
-      "I'm not sure how to help with that yet. Try asking for resource labels "
-      "or listing dungeon sprites.";
+      "I'm just a mock service. Please load a provider like ollama or gemini.";
  return response;
 }

--- a/src/cli/service/ai/gemini_ai_service.cc
+++ b/src/cli/service/ai/gemini_ai_service.cc
@@ -348,9 +348,12 @@ absl::StatusOr<AgentResponse> GeminiAIService::ParseGeminiResponse(
        absl::StrCat("❌ Failed to parse Gemini response: ", e.what()));
  }
  
-  if (agent_response.commands.empty()) {
+  if (agent_response.text_response.empty() && 
+      agent_response.commands.empty() && 
+      agent_response.tool_calls.empty()) {
    return absl::InternalError(
-        "❌ No valid commands extracted from Gemini response\n"
+        "❌ No valid response extracted from Gemini\n"
+        "   Expected at least one of: text_response, commands, or tool_calls\n"
        "   Raw response: " + response_body);
  }
  
--- a/src/cli/service/ai/prompt_builder.cc
+++ b/src/cli/service/ai/prompt_builder.cc
@@ -525,6 +525,62 @@ std::string PromptBuilder::BuildFewShotExamplesSection() const {
 }

 std::string PromptBuilder::BuildConstraintsSection() const {
+  // Try to load from file first
+  const std::vector<std::string> search_paths = {
+      "assets/agent/tool_calling_instructions.txt",
+      "../assets/agent/tool_calling_instructions.txt",
+      "../../assets/agent/tool_calling_instructions.txt",
+  };
+  
+  for (const auto& path : search_paths) {
+    std::ifstream file(path);
+    if (file.is_open()) {
+      std::string content((std::istreambuf_iterator<char>(file)),
+                          std::istreambuf_iterator<char>());
+      if (!content.empty()) {
+        std::ostringstream oss;
+        oss << content;
+        
+        // Add tool schemas if available
+        if (!tool_specs_.empty()) {
+          oss << "\n\n# Available Tools for ROM Inspection\n\n";
+          oss << "You have access to the following tools to answer questions:\n\n";
+          oss << "```json\n";
+          oss << BuildFunctionCallSchemas();
+          oss << "\n```\n\n";
+          oss << "**Tool Call Example (Initial Request):**\n";
+          oss << "```json\n";
+          oss << R"({
+  "tool_calls": [
+    {
+      "tool_name": "resource-list",
+      "args": {
+        "type": "dungeon"
+      }
+    }
+  ],
+  "reasoning": "I need to call the resource-list tool to get the dungeon information."
+})";
+          oss << "\n```\n\n";
+          oss << "**Tool Result Response (After Tool Executes):**\n";
+          oss << "```json\n";
+          oss << R"({
+  "text_response": "I found the following dungeons in the ROM: Hyrule Castle, Eastern Palace, Desert Palace, Tower of Hera, Palace of Darkness, Swamp Palace, Skull Woods, Thieves' Town, Ice Palace, Misery Mire, Turtle Rock, and Ganon's Tower.",
+  "reasoning": "The tool returned a list of 12 dungeons which I've formatted into a readable response."
+})";
+          oss << "\n```\n";
+        }
+
+        if (!tile_reference_.empty()) {
+          oss << "\n" << BuildTileReferenceSection();
+        }
+        
+        return oss.str();
+      }
+    }
+  }
+  
+  // Fallback to embedded version if file not found
  std::ostringstream oss;
  oss << R"(
 # Critical Constraints
@@ -541,23 +597,38 @@ std::string PromptBuilder::BuildConstraintsSection() const {
  - `commands` is for generating commands to modify the ROM.
  - All fields are optional, but you should always provide at least one.

-2. **Tool Usage:** When the user asks a question about the ROM state, use tool_calls instead of commands
+2. **Tool Calling Workflow (CRITICAL):**
+   WHEN YOU CALL A TOOL:
+   a) First response: Include tool_calls with the tool name and arguments
+   b) The tool will execute and you'll receive results in the next message
+   c) Second response: You MUST provide a text_response that answers the user's question using the tool results
+   d) DO NOT call the same tool again unless you need different parameters
+   e) DO NOT leave text_response empty after receiving tool results
+   
+   Example conversation flow:
+   User: "What dungeons are in this ROM?"
+   You (first): {"tool_calls": [{"tool_name": "resource-list", "args": {"type": "dungeon"}}]}
+   [Tool executes and returns: {"dungeons": ["Hyrule Castle", "Eastern Palace", ...]}]
+   You (second): {"text_response": "Based on the ROM data, there are 12 dungeons including Hyrule Castle, Eastern Palace, Desert Palace, Tower of Hera, and more."}
+
+3. **Tool Usage:** When the user asks a question about the ROM state, use tool_calls instead of commands
  - Tools are read-only and return information
  - Commands modify the ROM and should only be used when explicitly requested
  - You can call multiple tools in one response
  - Always use JSON format for tool results
+  - ALWAYS provide text_response after receiving tool results

-3. **Command Syntax:** Follow the exact syntax shown in examples
+4. **Command Syntax:** Follow the exact syntax shown in examples
  - Use correct flag names (--group, --id, --to, --from, etc.)
  - Use hex format for colors (0xRRGGBB) and tile IDs (0xNNN)
  - Coordinates are 0-based indices

-4. **Common Patterns:**
+5. **Common Patterns:**
  - Palette modifications: export → set-color → import
  - Multiple tile placement: multiple overworld set-tile commands
  - Validation: single rom validate command

-5. **Error Prevention:**
+6. **Error Prevention:**
  - Always export before modifying palettes
  - Use temporary file names (temp_*.json) for intermediate files
  - Validate coordinates are within bounds
@@ -569,10 +640,9 @@ std::string PromptBuilder::BuildConstraintsSection() const {
    oss << "```json\n";
    oss << BuildFunctionCallSchemas();
    oss << "\n```\n\n";
-    oss << "**Tool Call Example:**\n";
+    oss << "**Tool Call Example (Initial Request):**\n";
    oss << "```json\n";
    oss << R"({
-  "text_response": "Let me check the dungeons in this ROM.",
  "tool_calls": [
    {
      "tool_name": "resource-list",
@@ -580,7 +650,15 @@ std::string PromptBuilder::BuildConstraintsSection() const {
        "type": "dungeon"
      }
    }
-  ]
+  ],
+  "reasoning": "I need to call the resource-list tool to get the dungeon information."
+})";
+    oss << "\n```\n\n";
+    oss << "**Tool Result Response (After Tool Executes):**\n";
+    oss << "```json\n";
+    oss << R"({
+  "text_response": "I found the following dungeons in the ROM: Hyrule Castle, Eastern Palace, Desert Palace, Tower of Hera, Palace of Darkness, Swamp Palace, Skull Woods, Thieves' Town, Ice Palace, Misery Mire, Turtle Rock, and Ganon's Tower.",
+  "reasoning": "The tool returned a list of 12 dungeons which I've formatted into a readable response."
 })";
    oss << "\n```\n";
  }
@@ -642,6 +720,38 @@ std::string PromptBuilder::BuildContextSection(const RomContext& context) {
 }

 std::string PromptBuilder::BuildSystemInstruction() {
+  // Try to load from file first
+  const std::vector<std::string> search_paths = {
+      "assets/agent/system_prompt.txt",
+      "../assets/agent/system_prompt.txt",
+      "../../assets/agent/system_prompt.txt",
+  };
+  
+  for (const auto& path : search_paths) {
+    std::ifstream file(path);
+    if (file.is_open()) {
+      std::string content((std::istreambuf_iterator<char>(file)),
+                          std::istreambuf_iterator<char>());
+      if (!content.empty()) {
+        std::ostringstream oss;
+        oss << content;
+        
+        // Add command reference if available
+        if (catalogue_loaded_ && !command_docs_.empty()) {
+          oss << "\n\n" << BuildCommandReference();
+        }
+        
+        // Add tool reference if available
+        if (!tool_specs_.empty()) {
+          oss << "\n\n" << BuildToolReference();
+        }
+        
+        return oss.str();
+      }
+    }
+  }
+  
+  // Fallback to embedded version if file not found
  std::ostringstream oss;
  
  oss << "You are an expert ROM hacking assistant for The Legend of Zelda: "