feat: Enhance AI agent capabilities with new tool calling instructions, improved response handling, and terminal color utilities

This commit is contained in:
scawful
2025-10-04 03:04:22 -04:00
parent 06dcffb6ac
commit 2931634837
10 changed files with 562 additions and 15 deletions

View File

@@ -11,6 +11,7 @@
#include "absl/strings/str_join.h"
#include "absl/time/clock.h"
#include "cli/service/ai/service_factory.h"
#include "cli/util/terminal_colors.h"
#include "nlohmann/json.hpp"
namespace yaze {
@@ -174,9 +175,23 @@ absl::StatusOr<ChatMessage> ConversationalAgentService::SendMessage(
}
constexpr int kMaxToolIterations = 4;
bool waiting_for_text_response = false;
for (int iteration = 0; iteration < kMaxToolIterations; ++iteration) {
// Show loading indicator while waiting for AI response
util::LoadingIndicator loader(
waiting_for_text_response
? "Generating final response..."
: "Thinking...",
true);
loader.Start();
auto response_or = ai_service_->GenerateResponse(history_);
loader.Stop();
if (!response_or.ok()) {
util::PrintError(absl::StrCat(
"Failed to get AI response: ", response_or.status().message()));
return absl::InternalError(absl::StrCat(
"Failed to get AI response: ", response_or.status().message()));
}
@@ -184,28 +199,61 @@ absl::StatusOr<ChatMessage> ConversationalAgentService::SendMessage(
const auto& agent_response = response_or.value();
if (!agent_response.tool_calls.empty()) {
// Check if we were waiting for a text response but got more tool calls instead
if (waiting_for_text_response) {
util::PrintWarning(
absl::StrCat("LLM called tools again instead of providing final response (Iteration: ",
iteration, "/", kMaxToolIterations, ")"));
}
bool executed_tool = false;
for (const auto& tool_call : agent_response.tool_calls) {
// Format tool arguments for display
std::vector<std::string> arg_parts;
for (const auto& [key, value] : tool_call.args) {
arg_parts.push_back(absl::StrCat(key, "=", value));
}
std::string args_str = absl::StrJoin(arg_parts, ", ");
util::PrintToolCall(tool_call.tool_name, args_str);
auto tool_result_or = tool_dispatcher_.Dispatch(tool_call);
if (!tool_result_or.ok()) {
util::PrintError(absl::StrCat(
"Tool execution failed: ", tool_result_or.status().message()));
return absl::InternalError(absl::StrCat(
"Tool execution failed: ", tool_result_or.status().message()));
}
const std::string& tool_output = tool_result_or.value();
if (!tool_output.empty()) {
util::PrintSuccess("Tool executed successfully");
// Add tool result with a clear marker for the LLM
std::string marked_output = "[TOOL RESULT] " + tool_output;
history_.push_back(
CreateMessage(ChatMessage::Sender::kAgent, tool_output));
CreateMessage(ChatMessage::Sender::kUser, marked_output));
}
executed_tool = true;
}
if (executed_tool) {
// Now we're waiting for the LLM to provide a text response
waiting_for_text_response = true;
// Re-query the AI with updated context.
continue;
}
}
// Check if we received a text response after tool execution
if (waiting_for_text_response && agent_response.text_response.empty() &&
agent_response.commands.empty()) {
util::PrintWarning(
absl::StrCat("LLM did not provide text_response after receiving tool results (Iteration: ",
iteration, "/", kMaxToolIterations, ")"));
// Continue to give it another chance
continue;
}
std::string response_text = agent_response.text_response;
if (!agent_response.reasoning.empty()) {
if (!response_text.empty()) {

View File

@@ -110,8 +110,7 @@ absl::StatusOr<AgentResponse> MockAIService::GenerateResponse(
}
response.text_response =
"I'm not sure how to help with that yet. Try asking for resource labels "
"or listing dungeon sprites.";
"I'm just a mock service. Please load a provider like ollama or gemini.";
return response;
}

View File

@@ -348,9 +348,12 @@ absl::StatusOr<AgentResponse> GeminiAIService::ParseGeminiResponse(
absl::StrCat("❌ Failed to parse Gemini response: ", e.what()));
}
if (agent_response.commands.empty()) {
if (agent_response.text_response.empty() &&
agent_response.commands.empty() &&
agent_response.tool_calls.empty()) {
return absl::InternalError(
"❌ No valid commands extracted from Gemini response\n"
"❌ No valid response extracted from Gemini\n"
" Expected at least one of: text_response, commands, or tool_calls\n"
" Raw response: " + response_body);
}

View File

@@ -525,6 +525,62 @@ std::string PromptBuilder::BuildFewShotExamplesSection() const {
}
std::string PromptBuilder::BuildConstraintsSection() const {
// Try to load from file first
const std::vector<std::string> search_paths = {
"assets/agent/tool_calling_instructions.txt",
"../assets/agent/tool_calling_instructions.txt",
"../../assets/agent/tool_calling_instructions.txt",
};
for (const auto& path : search_paths) {
std::ifstream file(path);
if (file.is_open()) {
std::string content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
if (!content.empty()) {
std::ostringstream oss;
oss << content;
// Add tool schemas if available
if (!tool_specs_.empty()) {
oss << "\n\n# Available Tools for ROM Inspection\n\n";
oss << "You have access to the following tools to answer questions:\n\n";
oss << "```json\n";
oss << BuildFunctionCallSchemas();
oss << "\n```\n\n";
oss << "**Tool Call Example (Initial Request):**\n";
oss << "```json\n";
oss << R"({
"tool_calls": [
{
"tool_name": "resource-list",
"args": {
"type": "dungeon"
}
}
],
"reasoning": "I need to call the resource-list tool to get the dungeon information."
})";
oss << "\n```\n\n";
oss << "**Tool Result Response (After Tool Executes):**\n";
oss << "```json\n";
oss << R"({
"text_response": "I found the following dungeons in the ROM: Hyrule Castle, Eastern Palace, Desert Palace, Tower of Hera, Palace of Darkness, Swamp Palace, Skull Woods, Thieves' Town, Ice Palace, Misery Mire, Turtle Rock, and Ganon's Tower.",
"reasoning": "The tool returned a list of 12 dungeons which I've formatted into a readable response."
})";
oss << "\n```\n";
}
if (!tile_reference_.empty()) {
oss << "\n" << BuildTileReferenceSection();
}
return oss.str();
}
}
}
// Fallback to embedded version if file not found
std::ostringstream oss;
oss << R"(
# Critical Constraints
@@ -541,23 +597,38 @@ std::string PromptBuilder::BuildConstraintsSection() const {
- `commands` is for generating commands to modify the ROM.
- All fields are optional, but you should always provide at least one.
2. **Tool Usage:** When the user asks a question about the ROM state, use tool_calls instead of commands
2. **Tool Calling Workflow (CRITICAL):**
WHEN YOU CALL A TOOL:
a) First response: Include tool_calls with the tool name and arguments
b) The tool will execute and you'll receive results in the next message
c) Second response: You MUST provide a text_response that answers the user's question using the tool results
d) DO NOT call the same tool again unless you need different parameters
e) DO NOT leave text_response empty after receiving tool results
Example conversation flow:
User: "What dungeons are in this ROM?"
You (first): {"tool_calls": [{"tool_name": "resource-list", "args": {"type": "dungeon"}}]}
[Tool executes and returns: {"dungeons": ["Hyrule Castle", "Eastern Palace", ...]}]
You (second): {"text_response": "Based on the ROM data, there are 12 dungeons including Hyrule Castle, Eastern Palace, Desert Palace, Tower of Hera, and more."}
3. **Tool Usage:** When the user asks a question about the ROM state, use tool_calls instead of commands
- Tools are read-only and return information
- Commands modify the ROM and should only be used when explicitly requested
- You can call multiple tools in one response
- Always use JSON format for tool results
- ALWAYS provide text_response after receiving tool results
3. **Command Syntax:** Follow the exact syntax shown in examples
4. **Command Syntax:** Follow the exact syntax shown in examples
- Use correct flag names (--group, --id, --to, --from, etc.)
- Use hex format for colors (0xRRGGBB) and tile IDs (0xNNN)
- Coordinates are 0-based indices
4. **Common Patterns:**
5. **Common Patterns:**
- Palette modifications: export set-color import
- Multiple tile placement: multiple overworld set-tile commands
- Validation: single rom validate command
5. **Error Prevention:**
6. **Error Prevention:**
- Always export before modifying palettes
- Use temporary file names (temp_*.json) for intermediate files
- Validate coordinates are within bounds
@@ -569,10 +640,9 @@ std::string PromptBuilder::BuildConstraintsSection() const {
oss << "```json\n";
oss << BuildFunctionCallSchemas();
oss << "\n```\n\n";
oss << "**Tool Call Example:**\n";
oss << "**Tool Call Example (Initial Request):**\n";
oss << "```json\n";
oss << R"({
"text_response": "Let me check the dungeons in this ROM.",
"tool_calls": [
{
"tool_name": "resource-list",
@@ -580,7 +650,15 @@ std::string PromptBuilder::BuildConstraintsSection() const {
"type": "dungeon"
}
}
]
],
"reasoning": "I need to call the resource-list tool to get the dungeon information."
})";
oss << "\n```\n\n";
oss << "**Tool Result Response (After Tool Executes):**\n";
oss << "```json\n";
oss << R"({
"text_response": "I found the following dungeons in the ROM: Hyrule Castle, Eastern Palace, Desert Palace, Tower of Hera, Palace of Darkness, Swamp Palace, Skull Woods, Thieves' Town, Ice Palace, Misery Mire, Turtle Rock, and Ganon's Tower.",
"reasoning": "The tool returned a list of 12 dungeons which I've formatted into a readable response."
})";
oss << "\n```\n";
}
@@ -642,6 +720,38 @@ std::string PromptBuilder::BuildContextSection(const RomContext& context) {
}
std::string PromptBuilder::BuildSystemInstruction() {
// Try to load from file first
const std::vector<std::string> search_paths = {
"assets/agent/system_prompt.txt",
"../assets/agent/system_prompt.txt",
"../../assets/agent/system_prompt.txt",
};
for (const auto& path : search_paths) {
std::ifstream file(path);
if (file.is_open()) {
std::string content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
if (!content.empty()) {
std::ostringstream oss;
oss << content;
// Add command reference if available
if (catalogue_loaded_ && !command_docs_.empty()) {
oss << "\n\n" << BuildCommandReference();
}
// Add tool reference if available
if (!tool_specs_.empty()) {
oss << "\n\n" << BuildToolReference();
}
return oss.str();
}
}
}
// Fallback to embedded version if file not found
std::ostringstream oss;
oss << "You are an expert ROM hacking assistant for The Legend of Zelda: "