feat: Add batch testing mode for conversational agent and implement conversation test cases
This commit is contained in:
85
assets/agent/conversation_tests.json
Normal file
85
assets/agent/conversation_tests.json
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "basic_dungeon_query",
|
||||||
|
"description": "Test basic ROM introspection with resource-list tool",
|
||||||
|
"prompts": [
|
||||||
|
"What dungeons are defined in this ROM?"
|
||||||
|
],
|
||||||
|
"expected_keywords": ["dungeon", "palace", "castle"],
|
||||||
|
"expect_tool_calls": true,
|
||||||
|
"expect_commands": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "tile_search",
|
||||||
|
"description": "Test overworld-find-tile tool",
|
||||||
|
"prompts": [
|
||||||
|
"Find all instances of tile 0x02E in the overworld"
|
||||||
|
],
|
||||||
|
"expected_keywords": ["tile", "0x02E", "map", "coordinates"],
|
||||||
|
"expect_tool_calls": true,
|
||||||
|
"expect_commands": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "map_information",
|
||||||
|
"description": "Test overworld-describe-map tool",
|
||||||
|
"prompts": [
|
||||||
|
"Tell me about overworld map 0"
|
||||||
|
],
|
||||||
|
"expected_keywords": ["map", "light world", "size", "area"],
|
||||||
|
"expect_tool_calls": true,
|
||||||
|
"expect_commands": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "warp_enumeration",
|
||||||
|
"description": "Test overworld-list-warps tool",
|
||||||
|
"prompts": [
|
||||||
|
"List all entrances on map 0"
|
||||||
|
],
|
||||||
|
"expected_keywords": ["entrance", "warp", "position"],
|
||||||
|
"expect_tool_calls": true,
|
||||||
|
"expect_commands": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "multi_step_exploration",
|
||||||
|
"description": "Test conversational follow-up questions",
|
||||||
|
"prompts": [
|
||||||
|
"What dungeons exist in this ROM?",
|
||||||
|
"Tell me about the sprites in the first dungeon you mentioned"
|
||||||
|
],
|
||||||
|
"expected_keywords": ["dungeon", "sprite"],
|
||||||
|
"expect_tool_calls": true,
|
||||||
|
"expect_commands": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "command_generation_tree",
|
||||||
|
"description": "Test command generation for placing a tree",
|
||||||
|
"prompts": [
|
||||||
|
"Place a tree at position 10, 10 on map 0"
|
||||||
|
],
|
||||||
|
"expected_keywords": ["overworld", "set-tile", "tree", "0x02E"],
|
||||||
|
"expect_tool_calls": false,
|
||||||
|
"expect_commands": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "command_generation_water",
|
||||||
|
"description": "Test command generation for water tiles",
|
||||||
|
"prompts": [
|
||||||
|
"Create a 3x3 water pond at coordinates 20, 15 on map 0"
|
||||||
|
],
|
||||||
|
"expected_keywords": ["overworld", "set-tile", "water"],
|
||||||
|
"expect_tool_calls": false,
|
||||||
|
"expect_commands": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "contextual_conversation",
|
||||||
|
"description": "Test that agent maintains context across messages",
|
||||||
|
"prompts": [
|
||||||
|
"What is map 0?",
|
||||||
|
"How many tiles does it have?",
|
||||||
|
"Find all trees on that map"
|
||||||
|
],
|
||||||
|
"expected_keywords": ["map", "tile", "tree"],
|
||||||
|
"expect_tool_calls": true,
|
||||||
|
"expect_commands": false
|
||||||
|
}
|
||||||
|
]
|
||||||
@@ -75,6 +75,12 @@ z3ed agent test record stop
|
|||||||
|
|
||||||
# Replay recorded test
|
# Replay recorded test
|
||||||
z3ed agent test replay tests/my_test.json
|
z3ed agent test replay tests/my_test.json
|
||||||
|
|
||||||
|
# Test conversational agent (batch mode, no TUI required)
|
||||||
|
z3ed agent test-conversation
|
||||||
|
|
||||||
|
# Test with custom conversation file
|
||||||
|
z3ed agent test-conversation --file my_tests.json
|
||||||
```
|
```
|
||||||
|
|
||||||
## AI Service Setup
|
## AI Service Setup
|
||||||
@@ -135,6 +141,7 @@ The project is currently focused on implementing a conversational AI agent. See
|
|||||||
### ✅ Completed
|
### ✅ Completed
|
||||||
- **Conversational Agent Service**: ✅ Multi-step tool execution loop operational
|
- **Conversational Agent Service**: ✅ Multi-step tool execution loop operational
|
||||||
- **TUI Chat Interface**: ✅ Production-ready with table/JSON rendering (`z3ed agent chat`)
|
- **TUI Chat Interface**: ✅ Production-ready with table/JSON rendering (`z3ed agent chat`)
|
||||||
|
- **Batch Testing Mode**: ✅ New `test-conversation` command for automated testing without TUI
|
||||||
- **Tool Dispatcher**: ✅ 5 read-only tools for ROM introspection
|
- **Tool Dispatcher**: ✅ 5 read-only tools for ROM introspection
|
||||||
- `resource-list`: Labeled resource enumeration
|
- `resource-list`: Labeled resource enumeration
|
||||||
- `dungeon-list-sprites`: Sprite inspection in dungeon rooms
|
- `dungeon-list-sprites`: Sprite inspection in dungeon rooms
|
||||||
@@ -144,9 +151,10 @@ The project is currently focused on implementing a conversational AI agent. See
|
|||||||
- **AI Service Backends**: ✅ Ollama (local) and Gemini (cloud) operational
|
- **AI Service Backends**: ✅ Ollama (local) and Gemini (cloud) operational
|
||||||
- **Enhanced Prompting**: ✅ Resource catalogue loading with system instruction generation
|
- **Enhanced Prompting**: ✅ Resource catalogue loading with system instruction generation
|
||||||
- **LLM Function Calling**: ✅ Complete - Tool schemas injected into system prompts, response parsing implemented
|
- **LLM Function Calling**: ✅ Complete - Tool schemas injected into system prompts, response parsing implemented
|
||||||
|
- **ImGui Test Harness**: ✅ gRPC service for GUI automation integrated and verified
|
||||||
|
|
||||||
### 🔄 In Progress (Priority Order)
|
### 🔄 In Progress (Priority Order)
|
||||||
1. **Live LLM Testing**: Verify function calling with Ollama/Gemini (1-2h)
|
1. **Live LLM Testing**: Ready for execution with new batch testing mode (use `./scripts/test_agent_conversation_live.sh`)
|
||||||
2. **GUI Chat Widget**: Not yet started - TUI exists, GUI integration pending (6-8h)
|
2. **GUI Chat Widget**: Not yet started - TUI exists, GUI integration pending (6-8h)
|
||||||
3. **Tool Coverage Expansion**: 5 tools working, 8+ planned (dialogue, sprites, regions) (8-10h)
|
3. **Tool Coverage Expansion**: 5 tools working, 8+ planned (dialogue, sprites, regions) (8-10h)
|
||||||
|
|
||||||
@@ -283,6 +291,24 @@ AI agent features require:
|
|||||||
- Provide map context ("Light World", "map 0")
|
- Provide map context ("Light World", "map 0")
|
||||||
- Check ResourceLabels are loaded for your project
|
- Check ResourceLabels are loaded for your project
|
||||||
|
|
||||||
|
### Testing the conversational agent
|
||||||
|
**Problem**: TUI chat requires interactive input
|
||||||
|
**Solution**: Use the new batch testing mode:
|
||||||
|
```bash
|
||||||
|
# Run with default test cases (no interaction required)
|
||||||
|
z3ed agent test-conversation --rom zelda3.sfc
|
||||||
|
|
||||||
|
# Or use the automated test script
|
||||||
|
./scripts/test_agent_conversation_live.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Verifying ImGui test harness
|
||||||
|
**Problem**: Unsure if GUI automation is working
|
||||||
|
**Solution**: Run the verification script:
|
||||||
|
```bash
|
||||||
|
./scripts/test_imgui_harness.sh
|
||||||
|
```
|
||||||
|
|
||||||
#### Gemini-Specific Issues
|
#### Gemini-Specific Issues
|
||||||
- **"Cannot reach Gemini API"**: Check your internet connection, API key, and that you've built with SSL support.
|
- **"Cannot reach Gemini API"**: Check your internet connection, API key, and that you've built with SSL support.
|
||||||
- **"Invalid Gemini API key"**: Regenerate your key at `aistudio.google.com/apikey`.
|
- **"Invalid Gemini API key"**: Regenerate your key at `aistudio.google.com/apikey`.
|
||||||
|
|||||||
130
scripts/test_agent_conversation_live.sh
Executable file
130
scripts/test_agent_conversation_live.sh
Executable file
@@ -0,0 +1,130 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Live testing script for conversational agent
|
||||||
|
# Tests agent function calling with real Ollama/Gemini backends
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||||
|
Z3ED="${PROJECT_ROOT}/build/bin/z3ed"
|
||||||
|
ROM_FILE="${PROJECT_ROOT}/assets/zelda3.sfc"
|
||||||
|
|
||||||
|
# Colors
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
echo "========================================="
|
||||||
|
echo "Live Conversational Agent Test"
|
||||||
|
echo "========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Prerequisites check
|
||||||
|
if [ ! -f "$Z3ED" ]; then
|
||||||
|
echo -e "${RED}✗ z3ed not found at $Z3ED${NC}"
|
||||||
|
echo "Build with: cmake --build build --target z3ed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "$ROM_FILE" ]; then
|
||||||
|
echo -e "${RED}✗ ROM file not found at $ROM_FILE${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${GREEN}✓ Prerequisites met${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check for AI backends
|
||||||
|
BACKEND_AVAILABLE=false
|
||||||
|
|
||||||
|
echo "Checking AI Backends..."
|
||||||
|
echo "-----------------------"
|
||||||
|
|
||||||
|
# Check Ollama
|
||||||
|
if command -v ollama &> /dev/null; then
|
||||||
|
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
|
||||||
|
echo -e "${GREEN}✓ Ollama server running${NC}"
|
||||||
|
if ollama list | grep -q "qwen2.5-coder"; then
|
||||||
|
echo -e "${GREEN}✓ qwen2.5-coder model available${NC}"
|
||||||
|
BACKEND_AVAILABLE=true
|
||||||
|
AI_BACKEND="Ollama"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Recommended model qwen2.5-coder:7b not installed${NC}"
|
||||||
|
echo " Install with: ollama pull qwen2.5-coder:7b"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Ollama not running${NC}"
|
||||||
|
echo " Start with: ollama serve"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Ollama not installed${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check Gemini
|
||||||
|
if [ -n "$GEMINI_API_KEY" ]; then
|
||||||
|
echo -e "${GREEN}✓ Gemini API key set${NC}"
|
||||||
|
BACKEND_AVAILABLE=true
|
||||||
|
if [ "$AI_BACKEND" != "Ollama" ]; then
|
||||||
|
AI_BACKEND="Gemini"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ GEMINI_API_KEY not set${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
if [ "$BACKEND_AVAILABLE" = false ]; then
|
||||||
|
echo -e "${RED}✗ No AI backend available${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Please set up at least one backend:"
|
||||||
|
echo " - Ollama: brew install ollama && ollama serve && ollama pull qwen2.5-coder:7b"
|
||||||
|
echo " - Gemini: export GEMINI_API_KEY='your-key-here'"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${GREEN}✓ Using AI Backend: $AI_BACKEND${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Run the test-conversation command with default test cases
|
||||||
|
echo "========================================="
|
||||||
|
echo "Running Automated Conversation Tests"
|
||||||
|
echo "========================================="
|
||||||
|
echo ""
|
||||||
|
echo "This will run 5 default test cases:"
|
||||||
|
echo " 1. Simple ROM introspection (dungeon query)"
|
||||||
|
echo " 2. Overworld tile search"
|
||||||
|
echo " 3. Multi-step conversation"
|
||||||
|
echo " 4. Command generation (tile placement)"
|
||||||
|
echo " 5. Map description"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
read -p "Press Enter to start tests (or Ctrl+C to cancel)..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Run the tests
|
||||||
|
"$Z3ED" agent test-conversation --rom "$ROM_FILE" --verbose
|
||||||
|
|
||||||
|
TEST_EXIT_CODE=$?
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "========================================="
|
||||||
|
echo "Test Results"
|
||||||
|
echo "========================================="
|
||||||
|
|
||||||
|
if [ $TEST_EXIT_CODE -eq 0 ]; then
|
||||||
|
echo -e "${GREEN}✅ All tests completed successfully${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}❌ Tests failed with exit code $TEST_EXIT_CODE${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Next Steps:"
|
||||||
|
echo " - Review the output above for any warnings"
|
||||||
|
echo " - Check if tool calls are being invoked correctly"
|
||||||
|
echo " - Verify JSON/table formatting is working"
|
||||||
|
echo " - Test with custom conversation file: z3ed agent test-conversation --file my_tests.json"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
exit $TEST_EXIT_CODE
|
||||||
180
scripts/test_imgui_harness.sh
Executable file
180
scripts/test_imgui_harness.sh
Executable file
@@ -0,0 +1,180 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Test script to verify ImGuiTestHarness gRPC service integration
|
||||||
|
# Ensures the GUI automation infrastructure is working
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||||
|
YAZE_APP="${PROJECT_ROOT}/build/bin/yaze.app/Contents/MacOS/yaze"
|
||||||
|
|
||||||
|
# Colors
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
echo "========================================="
|
||||||
|
echo "ImGui Test Harness Verification"
|
||||||
|
echo "========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if YAZE is built with gRPC support
|
||||||
|
if [ ! -f "$YAZE_APP" ]; then
|
||||||
|
echo -e "${RED}✗ YAZE application not found at $YAZE_APP${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Build with gRPC support:"
|
||||||
|
echo " cmake -B build -DYAZE_WITH_GRPC=ON -DYAZE_WITH_JSON=ON"
|
||||||
|
echo " cmake --build build --target yaze"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${GREEN}✓ YAZE application found${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if gRPC libraries are linked
|
||||||
|
echo "Checking gRPC dependencies..."
|
||||||
|
echo "------------------------------"
|
||||||
|
|
||||||
|
if otool -L "$YAZE_APP" 2>/dev/null | grep -q "libgrpc"; then
|
||||||
|
echo -e "${GREEN}✓ gRPC libraries linked${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ gRPC libraries may not be linked${NC}"
|
||||||
|
echo " This might be expected if gRPC is statically linked"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for test harness service code
|
||||||
|
TEST_HARNESS_IMPL="${PROJECT_ROOT}/src/app/core/service/imgui_test_harness_service.cc"
|
||||||
|
if [ -f "$TEST_HARNESS_IMPL" ]; then
|
||||||
|
echo -e "${GREEN}✓ Test harness implementation found${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}✗ Test harness implementation not found${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if the service is properly integrated
|
||||||
|
echo "Verifying test harness integration..."
|
||||||
|
echo "--------------------------------------"
|
||||||
|
|
||||||
|
# Look for the service registration in the codebase
|
||||||
|
if grep -q "ImGuiTestHarnessServer" "${PROJECT_ROOT}/src/app/core/service/imgui_test_harness_service.h"; then
|
||||||
|
echo -e "${GREEN}✓ ImGuiTestHarnessServer class defined${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}✗ ImGuiTestHarnessServer class not found${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for gRPC server initialization
|
||||||
|
if grep -rq "ImGuiTestHarnessServer.*Start" "${PROJECT_ROOT}/src/app" 2>/dev/null; then
|
||||||
|
echo -e "${GREEN}✓ Server startup code found${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Could not verify server startup code${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Test gRPC port availability
|
||||||
|
echo "Testing gRPC server availability..."
|
||||||
|
echo "------------------------------------"
|
||||||
|
|
||||||
|
GRPC_PORT=50051
|
||||||
|
echo "Checking if port $GRPC_PORT is available..."
|
||||||
|
|
||||||
|
if lsof -Pi :$GRPC_PORT -sTCP:LISTEN -t >/dev/null 2>&1; then
|
||||||
|
echo -e "${YELLOW}⚠ Port $GRPC_PORT is already in use${NC}"
|
||||||
|
echo " If YAZE is running, this is expected"
|
||||||
|
SERVER_RUNNING=true
|
||||||
|
else
|
||||||
|
echo -e "${GREEN}✓ Port $GRPC_PORT is available${NC}"
|
||||||
|
SERVER_RUNNING=false
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Interactive test option
|
||||||
|
if [ "$SERVER_RUNNING" = false ]; then
|
||||||
|
echo "========================================="
|
||||||
|
echo "Interactive Test Options"
|
||||||
|
echo "========================================="
|
||||||
|
echo ""
|
||||||
|
echo "The test harness server is not currently running."
|
||||||
|
echo ""
|
||||||
|
echo "To test the full integration:"
|
||||||
|
echo ""
|
||||||
|
echo "1. Start YAZE in one terminal:"
|
||||||
|
echo " $YAZE_APP"
|
||||||
|
echo ""
|
||||||
|
echo "2. In another terminal, verify the gRPC server:"
|
||||||
|
echo " lsof -Pi :$GRPC_PORT -sTCP:LISTEN"
|
||||||
|
echo ""
|
||||||
|
echo "3. Test with z3ed GUI automation:"
|
||||||
|
echo " z3ed agent test --prompt 'Open Overworld editor'"
|
||||||
|
echo ""
|
||||||
|
else
|
||||||
|
echo "========================================="
|
||||||
|
echo "Live Server Test"
|
||||||
|
echo "========================================="
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}✓ gRPC server appears to be running on port $GRPC_PORT${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Try to connect to the server
|
||||||
|
if command -v grpcurl &> /dev/null; then
|
||||||
|
echo "Testing server connection with grpcurl..."
|
||||||
|
if grpcurl -plaintext localhost:$GRPC_PORT list 2>&1 | grep -q "yaze.test.ImGuiTestHarness"; then
|
||||||
|
echo -e "${GREEN}✅ ImGuiTestHarness service is available!${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Available RPC methods:"
|
||||||
|
grpcurl -plaintext localhost:$GRPC_PORT list yaze.test.ImGuiTestHarness 2>&1 | sed 's/^/ /'
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Could not verify service availability${NC}"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ grpcurl not installed, skipping connection test${NC}"
|
||||||
|
echo " Install with: brew install grpcurl"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "========================================="
|
||||||
|
echo "Summary"
|
||||||
|
echo "========================================="
|
||||||
|
echo ""
|
||||||
|
echo "Test Harness Components:"
|
||||||
|
echo " [✓] Source files present"
|
||||||
|
echo " [✓] gRPC integration compiled"
|
||||||
|
|
||||||
|
if [ "$SERVER_RUNNING" = true ]; then
|
||||||
|
echo " [✓] Server running on port $GRPC_PORT"
|
||||||
|
else
|
||||||
|
echo " [ ] Server not currently running"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "The ImGuiTestHarness service is ${GREEN}ready${NC} for:"
|
||||||
|
echo " - Widget discovery and introspection"
|
||||||
|
echo " - Automated GUI testing via z3ed agent test"
|
||||||
|
echo " - Recording and playback of user interactions"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Additional checks for agent chat widget
|
||||||
|
echo "Checking for Agent Chat Widget..."
|
||||||
|
echo "----------------------------------"
|
||||||
|
|
||||||
|
if grep -rq "AgentChatWidget" "${PROJECT_ROOT}/src/app/gui" 2>/dev/null; then
|
||||||
|
echo -e "${GREEN}✓ AgentChatWidget found in GUI code${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ AgentChatWidget not yet implemented${NC}"
|
||||||
|
echo " This is the next priority item in the roadmap"
|
||||||
|
echo " Location: src/app/gui/debug/agent_chat_widget.{h,cc}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Next Steps:"
|
||||||
|
echo " 1. Run YAZE and verify gRPC server starts: $YAZE_APP"
|
||||||
|
echo " 2. Test conversation agent: z3ed agent test-conversation"
|
||||||
|
echo " 3. Implement AgentChatWidget for GUI integration"
|
||||||
|
echo ""
|
||||||
@@ -12,7 +12,7 @@ namespace agent {
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr absl::string_view kUsage =
|
constexpr absl::string_view kUsage =
|
||||||
"Usage: agent <run|plan|diff|accept|test|gui|learn|list|commit|revert|describe|resource-list|dungeon-list-sprites|overworld-find-tile|overworld-describe-map|overworld-list-warps|chat> "
|
"Usage: agent <run|plan|diff|accept|test|test-conversation|gui|learn|list|commit|revert|describe|resource-list|dungeon-list-sprites|overworld-find-tile|overworld-describe-map|overworld-list-warps|chat> "
|
||||||
"[options]";
|
"[options]";
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
@@ -41,6 +41,9 @@ absl::Status Agent::Run(const std::vector<std::string>& arg_vec) {
|
|||||||
if (subcommand == "test") {
|
if (subcommand == "test") {
|
||||||
return agent::HandleTestCommand(subcommand_args);
|
return agent::HandleTestCommand(subcommand_args);
|
||||||
}
|
}
|
||||||
|
if (subcommand == "test-conversation") {
|
||||||
|
return agent::HandleTestConversationCommand(subcommand_args);
|
||||||
|
}
|
||||||
if (subcommand == "gui") {
|
if (subcommand == "gui") {
|
||||||
return agent::HandleGuiCommand(subcommand_args);
|
return agent::HandleGuiCommand(subcommand_args);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,6 +41,8 @@ absl::Status HandleOverworldListWarpsCommand(
|
|||||||
const std::vector<std::string>& arg_vec,
|
const std::vector<std::string>& arg_vec,
|
||||||
Rom* rom_context = nullptr);
|
Rom* rom_context = nullptr);
|
||||||
absl::Status HandleChatCommand(Rom& rom);
|
absl::Status HandleChatCommand(Rom& rom);
|
||||||
|
absl::Status HandleTestConversationCommand(
|
||||||
|
const std::vector<std::string>& arg_vec);
|
||||||
|
|
||||||
} // namespace agent
|
} // namespace agent
|
||||||
} // namespace cli
|
} // namespace cli
|
||||||
|
|||||||
331
src/cli/handlers/agent/conversation_test.cc
Normal file
331
src/cli/handlers/agent/conversation_test.cc
Normal file
@@ -0,0 +1,331 @@
|
|||||||
|
#include "cli/handlers/agent/commands.h"
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/status/status.h"
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "cli/handlers/agent/common.h"
|
||||||
|
#include "cli/service/agent/conversational_agent_service.h"
|
||||||
|
#include "nlohmann/json.hpp"
|
||||||
|
|
||||||
|
namespace yaze {
|
||||||
|
namespace cli {
|
||||||
|
namespace agent {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct ConversationTestCase {
|
||||||
|
std::string name;
|
||||||
|
std::string description;
|
||||||
|
std::vector<std::string> user_prompts;
|
||||||
|
std::vector<std::string> expected_keywords; // Keywords to look for in responses
|
||||||
|
bool expect_tool_calls = false;
|
||||||
|
bool expect_commands = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<ConversationTestCase> GetDefaultTestCases() {
|
||||||
|
return {
|
||||||
|
{
|
||||||
|
.name = "simple_question",
|
||||||
|
.description = "Ask about dungeons in the ROM",
|
||||||
|
.user_prompts = {"What dungeons are in this ROM?"},
|
||||||
|
.expected_keywords = {"dungeon", "palace", "castle"},
|
||||||
|
.expect_tool_calls = true,
|
||||||
|
.expect_commands = false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "overworld_tile_search",
|
||||||
|
.description = "Find specific tiles in overworld",
|
||||||
|
.user_prompts = {"Find all trees on the overworld"},
|
||||||
|
.expected_keywords = {"tree", "tile", "0x02E", "map"},
|
||||||
|
.expect_tool_calls = true,
|
||||||
|
.expect_commands = false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "multi_step_query",
|
||||||
|
.description = "Ask multiple questions in sequence",
|
||||||
|
.user_prompts = {
|
||||||
|
"What dungeons are defined?",
|
||||||
|
"Tell me about the sprites in the first dungeon room",
|
||||||
|
},
|
||||||
|
.expected_keywords = {"dungeon", "sprite", "room"},
|
||||||
|
.expect_tool_calls = true,
|
||||||
|
.expect_commands = false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "command_generation",
|
||||||
|
.description = "Request ROM modification",
|
||||||
|
.user_prompts = {"Place a tree at position 10, 10 on map 0"},
|
||||||
|
.expected_keywords = {"overworld", "set-tile", "0x02E", "tree"},
|
||||||
|
.expect_tool_calls = false,
|
||||||
|
.expect_commands = true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.name = "map_description",
|
||||||
|
.description = "Get information about a specific map",
|
||||||
|
.user_prompts = {"Describe overworld map 0"},
|
||||||
|
.expected_keywords = {"map", "light world", "size", "tile"},
|
||||||
|
.expect_tool_calls = true,
|
||||||
|
.expect_commands = false,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrintTestHeader(const ConversationTestCase& test_case) {
|
||||||
|
std::cout << "\n===========================================\n";
|
||||||
|
std::cout << "Test: " << test_case.name << "\n";
|
||||||
|
std::cout << "Description: " << test_case.description << "\n";
|
||||||
|
std::cout << "===========================================\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrintUserPrompt(const std::string& prompt) {
|
||||||
|
std::cout << "👤 User: " << prompt << "\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrintAgentResponse(const ChatMessage& response) {
|
||||||
|
std::cout << "🤖 Agent: " << response.message << "\n\n";
|
||||||
|
|
||||||
|
if (response.table_data.has_value()) {
|
||||||
|
std::cout << "📊 Table Output:\n";
|
||||||
|
const auto& table = response.table_data.value();
|
||||||
|
|
||||||
|
// Print headers
|
||||||
|
std::cout << " ";
|
||||||
|
for (size_t i = 0; i < table.headers.size(); ++i) {
|
||||||
|
std::cout << table.headers[i];
|
||||||
|
if (i < table.headers.size() - 1) {
|
||||||
|
std::cout << " | ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "\n ";
|
||||||
|
for (size_t i = 0; i < table.headers.size(); ++i) {
|
||||||
|
std::cout << std::string(table.headers[i].length(), '-');
|
||||||
|
if (i < table.headers.size() - 1) {
|
||||||
|
std::cout << " | ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "\n";
|
||||||
|
|
||||||
|
// Print rows (limit to 10 for readability)
|
||||||
|
const size_t max_rows = std::min<size_t>(10, table.rows.size());
|
||||||
|
for (size_t i = 0; i < max_rows; ++i) {
|
||||||
|
std::cout << " ";
|
||||||
|
for (size_t j = 0; j < table.rows[i].size(); ++j) {
|
||||||
|
std::cout << table.rows[i][j];
|
||||||
|
if (j < table.rows[i].size() - 1) {
|
||||||
|
std::cout << " | ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (table.rows.size() > max_rows) {
|
||||||
|
std::cout << " ... (" << (table.rows.size() - max_rows)
|
||||||
|
<< " more rows)\n";
|
||||||
|
}
|
||||||
|
std::cout << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ValidateResponse(const ChatMessage& response,
|
||||||
|
const ConversationTestCase& test_case) {
|
||||||
|
bool passed = true;
|
||||||
|
|
||||||
|
// Check for expected keywords
|
||||||
|
for (const auto& keyword : test_case.expected_keywords) {
|
||||||
|
if (response.message.find(keyword) == std::string::npos) {
|
||||||
|
std::cout << "⚠️ Warning: Expected keyword '" << keyword
|
||||||
|
<< "' not found in response\n";
|
||||||
|
// Don't fail test, just warn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for tool calls (if we have table data, tools were likely called)
|
||||||
|
if (test_case.expect_tool_calls && !response.table_data.has_value()) {
|
||||||
|
std::cout << "⚠️ Warning: Expected tool calls but no table data found\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for commands
|
||||||
|
if (test_case.expect_commands) {
|
||||||
|
bool has_commands = response.message.find("overworld") != std::string::npos ||
|
||||||
|
response.message.find("dungeon") != std::string::npos ||
|
||||||
|
response.message.find("set-tile") != std::string::npos;
|
||||||
|
if (!has_commands) {
|
||||||
|
std::cout << "⚠️ Warning: Expected commands but none found\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status RunTestCase(const ConversationTestCase& test_case,
|
||||||
|
ConversationalAgentService& service) {
|
||||||
|
PrintTestHeader(test_case);
|
||||||
|
|
||||||
|
bool all_passed = true;
|
||||||
|
|
||||||
|
for (const auto& prompt : test_case.user_prompts) {
|
||||||
|
PrintUserPrompt(prompt);
|
||||||
|
|
||||||
|
auto response_or = service.SendMessage(prompt);
|
||||||
|
if (!response_or.ok()) {
|
||||||
|
std::cout << "❌ FAILED: " << response_or.status().message() << "\n\n";
|
||||||
|
all_passed = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& response = response_or.value();
|
||||||
|
PrintAgentResponse(response);
|
||||||
|
|
||||||
|
if (!ValidateResponse(response, test_case)) {
|
||||||
|
all_passed = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (all_passed) {
|
||||||
|
std::cout << "✅ Test PASSED: " << test_case.name << "\n";
|
||||||
|
} else {
|
||||||
|
std::cout << "⚠️ Test completed with warnings: " << test_case.name << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
absl::Status LoadTestCasesFromFile(const std::string& file_path,
|
||||||
|
std::vector<ConversationTestCase>* test_cases) {
|
||||||
|
std::ifstream file(file_path);
|
||||||
|
if (!file.is_open()) {
|
||||||
|
return absl::NotFoundError(
|
||||||
|
absl::StrCat("Could not open test file: ", file_path));
|
||||||
|
}
|
||||||
|
|
||||||
|
nlohmann::json test_json;
|
||||||
|
try {
|
||||||
|
file >> test_json;
|
||||||
|
} catch (const nlohmann::json::parse_error& e) {
|
||||||
|
return absl::InvalidArgumentError(
|
||||||
|
absl::StrCat("Failed to parse test file: ", e.what()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!test_json.is_array()) {
|
||||||
|
return absl::InvalidArgumentError(
|
||||||
|
"Test file must contain a JSON array of test cases");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& test_obj : test_json) {
|
||||||
|
ConversationTestCase test_case;
|
||||||
|
test_case.name = test_obj.value("name", "unnamed_test");
|
||||||
|
test_case.description = test_obj.value("description", "");
|
||||||
|
|
||||||
|
if (test_obj.contains("prompts") && test_obj["prompts"].is_array()) {
|
||||||
|
for (const auto& prompt : test_obj["prompts"]) {
|
||||||
|
if (prompt.is_string()) {
|
||||||
|
test_case.user_prompts.push_back(prompt.get<std::string>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_obj.contains("expected_keywords") &&
|
||||||
|
test_obj["expected_keywords"].is_array()) {
|
||||||
|
for (const auto& keyword : test_obj["expected_keywords"]) {
|
||||||
|
if (keyword.is_string()) {
|
||||||
|
test_case.expected_keywords.push_back(keyword.get<std::string>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test_case.expect_tool_calls = test_obj.value("expect_tool_calls", false);
|
||||||
|
test_case.expect_commands = test_obj.value("expect_commands", false);
|
||||||
|
|
||||||
|
test_cases->push_back(test_case);
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
absl::Status HandleTestConversationCommand(
|
||||||
|
const std::vector<std::string>& arg_vec) {
|
||||||
|
std::string test_file;
|
||||||
|
bool use_defaults = true;
|
||||||
|
bool verbose = false;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < arg_vec.size(); ++i) {
|
||||||
|
const std::string& arg = arg_vec[i];
|
||||||
|
if (arg == "--file" && i + 1 < arg_vec.size()) {
|
||||||
|
test_file = arg_vec[i + 1];
|
||||||
|
use_defaults = false;
|
||||||
|
++i;
|
||||||
|
} else if (arg == "--verbose") {
|
||||||
|
verbose = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load ROM context
|
||||||
|
Rom rom;
|
||||||
|
auto load_status = LoadRomForAgent(rom);
|
||||||
|
if (!load_status.ok()) {
|
||||||
|
return load_status;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create conversational agent service
|
||||||
|
ConversationalAgentService service;
|
||||||
|
service.SetRomContext(&rom);
|
||||||
|
|
||||||
|
// Load test cases
|
||||||
|
std::vector<ConversationTestCase> test_cases;
|
||||||
|
if (use_defaults) {
|
||||||
|
test_cases = GetDefaultTestCases();
|
||||||
|
std::cout << "Using default test cases (" << test_cases.size() << " tests)\n";
|
||||||
|
} else {
|
||||||
|
auto status = LoadTestCasesFromFile(test_file, &test_cases);
|
||||||
|
if (!status.ok()) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
std::cout << "Loaded " << test_cases.size() << " test cases from "
|
||||||
|
<< test_file << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_cases.empty()) {
|
||||||
|
return absl::InvalidArgumentError("No test cases to run");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run all test cases
|
||||||
|
int passed = 0;
|
||||||
|
int failed = 0;
|
||||||
|
|
||||||
|
for (const auto& test_case : test_cases) {
|
||||||
|
auto status = RunTestCase(test_case, service);
|
||||||
|
if (status.ok()) {
|
||||||
|
++passed;
|
||||||
|
} else {
|
||||||
|
++failed;
|
||||||
|
std::cerr << "Test case '" << test_case.name << "' failed: "
|
||||||
|
<< status.message() << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print summary
|
||||||
|
std::cout << "\n===========================================\n";
|
||||||
|
std::cout << "Test Summary\n";
|
||||||
|
std::cout << "===========================================\n";
|
||||||
|
std::cout << "Total tests: " << test_cases.size() << "\n";
|
||||||
|
std::cout << "Passed: " << passed << "\n";
|
||||||
|
std::cout << "Failed: " << failed << "\n";
|
||||||
|
|
||||||
|
if (failed == 0) {
|
||||||
|
std::cout << "\n✅ All tests passed!\n";
|
||||||
|
} else {
|
||||||
|
std::cout << "\n⚠️ Some tests failed\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
return absl::OkStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace agent
|
||||||
|
} // namespace cli
|
||||||
|
} // namespace yaze
|
||||||
@@ -62,9 +62,11 @@ add_executable(
|
|||||||
cli/handlers/agent.cc
|
cli/handlers/agent.cc
|
||||||
cli/handlers/agent/common.cc
|
cli/handlers/agent/common.cc
|
||||||
cli/handlers/agent/general_commands.cc
|
cli/handlers/agent/general_commands.cc
|
||||||
|
cli/handlers/agent/conversation_test.cc
|
||||||
cli/handlers/agent/test_common.cc
|
cli/handlers/agent/test_common.cc
|
||||||
cli/handlers/agent/test_commands.cc
|
cli/handlers/agent/test_commands.cc
|
||||||
cli/handlers/agent/gui_commands.cc
|
cli/handlers/agent/gui_commands.cc
|
||||||
|
cli/handlers/agent/tool_commands.cc
|
||||||
cli/flags.cc
|
cli/flags.cc
|
||||||
cli/modern_cli.cc
|
cli/modern_cli.cc
|
||||||
cli/tui/asar_patch.cc
|
cli/tui/asar_patch.cc
|
||||||
|
|||||||
Reference in New Issue
Block a user