diff --git a/assets/agent/context_and_followup.txt b/assets/agent/context_and_followup.txt new file mode 100644 index 00000000..64e3d068 --- /dev/null +++ b/assets/agent/context_and_followup.txt @@ -0,0 +1,15 @@ +# Test 1: Context and Follow-up Queries + +# The agent should use the 'resource-list' tool. +What dungeons are defined in this ROM? + +# The agent must use the context from the previous answer to identify "the first one" +# and then call the 'dungeon-list-sprites' tool for that specific room. +Tell me about the sprites in the first dungeon listed. + +# The agent must remember the room from the previous query and use the 'overworld-list-warps' tool. +Are there any warp points in that same room? + +# This is a complex reasoning test. The agent needs to synthesize information +# from the last two tool calls (sprite list and warp list) to answer. +Is there a soldier sprite located near any of the warp points in that room? diff --git a/assets/agent/system_prompt_v2.txt b/assets/agent/system_prompt_v2.txt index 9ae3fa6f..a0edd405 100644 --- a/assets/agent/system_prompt_v2.txt +++ b/assets/agent/system_prompt_v2.txt @@ -28,16 +28,16 @@ You must follow this exact two-step process to avoid errors. *Example Step 1:* ```json { - "text_response": "Let me look up the dungeons for you...", + "text_response": "Let me look up the rooms for you...", "tool_calls": [ { - "tool_name": "resource_list", + "tool_name": "resource-list", "args": { - "type": "dungeon" + "type": "room" } } ], - "reasoning": "The user is asking for a list of dungeons. I need to call the `resource_list` tool with the type 'dungeon' to get this information." + "reasoning": "The user is asking for a list of rooms. I need to call the `resource-list` tool with the type 'room' to get this information." } ``` @@ -49,8 +49,8 @@ You must follow this exact two-step process to avoid errors. *Example Step 2:* ```json { - "text_response": "This ROM contains 12 dungeons, including: Hyrule Castle, Eastern Palace, and Desert Palace.", - "reasoning": "I have received the list of dungeons from the tool result. I will now format this information into a friendly, readable response for the user." + "text_response": "This ROM contains 297 rooms, including: Ganon, Hyrule Castle (North Corridor), and Behind Sanctuary (Switch).", + "reasoning": "I have received the list of rooms from the tool result. I will now format this information into a friendly, readable response for the user." } ``` diff --git a/src/cli/cli_main.cc b/src/cli/cli_main.cc index 3fcbde97..50575583 100644 --- a/src/cli/cli_main.cc +++ b/src/cli/cli_main.cc @@ -20,6 +20,9 @@ ABSL_DECLARE_FLAG(std::string, ai_provider); ABSL_DECLARE_FLAG(std::string, ai_model); ABSL_DECLARE_FLAG(std::string, gemini_api_key); ABSL_DECLARE_FLAG(std::string, ollama_host); +ABSL_DECLARE_FLAG(std::string, prompt_version); +ABSL_DECLARE_FLAG(bool, use_function_calling); +ABSL_FLAG(bool, quiet, false, "Enable quiet mode for simple-chat."); namespace { @@ -132,6 +135,34 @@ ParsedGlobals ParseGlobalFlags(int argc, char* argv[]) { absl::SetFlag(&FLAGS_ollama_host, std::string(argv[++i])); continue; } + + if (absl::StartsWith(token, "--prompt_version=")) { + absl::SetFlag(&FLAGS_prompt_version, std::string(token.substr(17))); + continue; + } + if (token == "--prompt_version") { + if (i + 1 >= argc) { + result.error = "--prompt_version flag requires a value"; + return result; + } + absl::SetFlag(&FLAGS_prompt_version, std::string(argv[++i])); + continue; + } + + if (absl::StartsWith(token, "--use_function_calling=")) { + std::string value(token.substr(23)); + absl::SetFlag(&FLAGS_use_function_calling, value == "true" || value == "1"); + continue; + } + if (token == "--use_function_calling") { + if (i + 1 >= argc) { + result.error = "--use_function_calling flag requires a value"; + return result; + } + std::string value(argv[++i]); + absl::SetFlag(&FLAGS_use_function_calling, value == "true" || value == "1"); + continue; + } } result.positional.push_back(current); diff --git a/src/cli/handlers/agent.cc b/src/cli/handlers/agent.cc index 2d810602..5abf155e 100644 --- a/src/cli/handlers/agent.cc +++ b/src/cli/handlers/agent.cc @@ -133,7 +133,7 @@ absl::Status Agent::Run(const std::vector& arg_vec) { return agent::HandleChatCommand(rom_); } if (subcommand == "simple-chat") { - return agent::HandleSimpleChatCommand(subcommand_args, rom_); + return agent::HandleSimpleChatCommand(subcommand_args, rom_, absl::GetFlag(FLAGS_quiet)); } return absl::InvalidArgumentError(std::string(agent::kUsage)); diff --git a/src/cli/handlers/agent/commands.h b/src/cli/handlers/agent/commands.h index 5e93b54e..ba3b7817 100644 --- a/src/cli/handlers/agent/commands.h +++ b/src/cli/handlers/agent/commands.h @@ -41,7 +41,7 @@ absl::Status HandleOverworldListWarpsCommand( const std::vector& arg_vec, Rom* rom_context = nullptr); absl::Status HandleChatCommand(Rom& rom); -absl::Status HandleSimpleChatCommand(const std::vector& arg_vec, Rom& rom); +absl::Status HandleSimpleChatCommand(const std::vector&, Rom* rom, bool quiet); absl::Status HandleTestConversationCommand( const std::vector& arg_vec); diff --git a/src/cli/handlers/agent/general_commands.cc b/src/cli/handlers/agent/general_commands.cc index d5121d44..5178e7b9 100644 --- a/src/cli/handlers/agent/general_commands.cc +++ b/src/cli/handlers/agent/general_commands.cc @@ -619,67 +619,69 @@ absl::Status HandleChatCommand(Rom& rom) { } absl::Status HandleSimpleChatCommand(const std::vector& arg_vec, - Rom& rom) { + Rom& rom, bool quiet) { RETURN_IF_ERROR(EnsureRomLoaded(rom, "agent simple-chat")); - // Try to load project and labels automatically - auto _ = TryLoadProjectAndLabels(rom); // Ignore errors - we'll use defaults + auto _ = TryLoadProjectAndLabels(rom); - // Parse flags and positional arguments std::optional batch_file; std::optional single_message; - bool non_interactive = false; bool verbose = false; - bool show_reasoning = true; - int max_tool_iterations = 4; - int max_retry_attempts = 3; for (size_t i = 0; i < arg_vec.size(); ++i) { const std::string& arg = arg_vec[i]; - if (absl::StartsWith(arg, "--file=")) { batch_file = arg.substr(7); } else if (arg == "--file" && i + 1 < arg_vec.size()) { - batch_file = arg_vec[i + 1]; - ++i; - } else if (arg == "--non-interactive" || arg == "-n") { - non_interactive = true; + batch_file = arg_vec[++i]; } else if (arg == "--verbose" || arg == "-v") { verbose = true; - } else if (arg == "--no-reasoning") { - show_reasoning = false; - } else if (absl::StartsWith(arg, "--max-tool-iterations=")) { - absl::SimpleAtoi(arg.substr(22), &max_tool_iterations); - } else if (arg == "--max-tool-iterations" && i + 1 < arg_vec.size()) { - absl::SimpleAtoi(arg_vec[i + 1], &max_tool_iterations); - ++i; - } else if (absl::StartsWith(arg, "--max-retries=")) { - absl::SimpleAtoi(arg.substr(14), &max_retry_attempts); - } else if (arg == "--max-retries" && i + 1 < arg_vec.size()) { - absl::SimpleAtoi(arg_vec[i + 1], &max_retry_attempts); - ++i; } else if (!absl::StartsWith(arg, "--") && !single_message.has_value()) { - // Treat first non-flag argument as the message single_message = arg; } } - // Configure agent agent::AgentConfig config; config.verbose = verbose; - config.show_reasoning = show_reasoning; - config.max_tool_iterations = max_tool_iterations; - config.max_retry_attempts = max_retry_attempts; SimpleChatSession session; session.SetConfig(config); session.SetRomContext(&rom); - - // Priority: batch file > single message > interactive/piped + session.SetQuietMode(quiet); + if (batch_file.has_value()) { - return session.RunBatch(*batch_file); + std::ifstream file(*batch_file); + if (!file.is_open()) { + return absl::NotFoundError(absl::StrCat("Failed to open file: ", *batch_file)); + } + if (!quiet) { + std::cout << "Running batch session from: " << *batch_file << std::endl; + std::cout << "----------------------------------------\n\n"; + } + std::string line; + int line_num = 0; + while (std::getline(file, line)) { + line_num++; + std::string trimmed_line = std::string(absl::StripAsciiWhitespace(line)); + if (trimmed_line.empty() || absl::StartsWith(trimmed_line, "#")) { + continue; + } + if (!quiet) { + std::cout << "Input [" << line_num << "]: " << trimmed_line << std::endl; + } + std::string response; + auto status = session.SendAndWaitForResponse(trimmed_line, &response); + if (!status.ok()) { + std::cerr << "Error processing line " << line_num << ": " << status.message() << std::endl; + continue; + } + std::cout << response << "\n"; + if (!quiet) { + std::cout << "\n"; + } + } + return absl::OkStatus(); } else if (single_message.has_value()) { - // Single message mode - send message and print response std::string response; auto status = session.SendAndWaitForResponse(*single_message, &response); if (!status.ok()) { @@ -688,7 +690,6 @@ absl::Status HandleSimpleChatCommand(const std::vector& arg_vec, std::cout << response << "\n"; return absl::OkStatus(); } else { - // Interactive or piped input mode return session.RunInteractive(); } }