feat: Consolidate and enhance agent testing scripts, replacing manual and quickstart tests with a comprehensive test suite

2025-10-04 03:56:47 -04:00
parent ed75bdc268
commit 1b4015a87a
6 changed files with 93 additions and 772 deletions
--- a/scripts/agent_test_suite.sh
+++ b/scripts/agent_test_suite.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+# Comprehensive test suite for the z3ed AI Agent.
+# This script consolidates multiple older test scripts into one.
+#
+# Usage: ./scripts/agent_test_suite.sh <provider>
+# provider: ollama, gemini, or mock
+
+set -e # Exit immediately if a command exits with a non-zero status.
+
+# --- Configuration ---
+Z3ED_BIN="/Users/scawful/Code/yaze/build_test/bin/z3ed"
+ROM_PATH="/Users/scawful/Code/yaze/assets/zelda3.sfc"
+TEST_DIR="/Users/scawful/Code/yaze/assets/agent"
+TEST_FILES=(
+  "context_and_followup.txt"
+  "complex_command_generation.txt"
+  "error_handling_and_edge_cases.txt"
+)
+
+# --- Helper Functions ---
+print_header() {
+  echo ""
+  echo "================================================="
+  echo "$1"
+  echo "================================================="
+}
+
+# --- Pre-flight Checks ---
+print_header "Performing Pre-flight Checks"
+
+if [ -z "$1" ]; then
+  echo "❌ Error: No AI provider specified."
+  echo "Usage: $0 <ollama|gemini|mock>"
+  exit 1
+fi
+PROVIDER=$1
+echo "✅ Provider: $PROVIDER"
+
+if [ ! -f "$Z3ED_BIN" ]; then
+  echo "❌ Error: z3ed binary not found at $Z3ED_BIN"
+  echo "Please build the project first (e.g., in build_test)."
+  exit 1
+fi
+echo "✅ z3ed binary found."
+
+if [ ! -f "$ROM_PATH" ]; then
+  echo "❌ Error: ROM not found at $ROM_PATH"
+  exit 1
+fi
+echo "✅ ROM file found."
+
+if [ "$PROVIDER" == "gemini" ] && [ -z "$GEMINI_API_KEY" ]; then
+  echo "❌ Error: GEMINI_API_KEY environment variable is not set."
+  echo "Please set it to your Gemini API key to run this test."
+  exit 1
+fi
+if [ "$PROVIDER" == "gemini" ]; then
+    echo "✅ GEMINI_API_KEY is set."
+fi
+
+if [ "$PROVIDER" == "ollama" ]; then
+    if ! pgrep -x "Ollama" > /dev/null && ! pgrep -x "ollama" > /dev/null; then
+        echo "⚠️ Warning: Ollama server process not found. The script might fail if it's not running."
+    else
+        echo "✅ Ollama server process found."
+    fi
+fi
+
+# --- Run Test Suite ---
+for test_file in "${TEST_FILES[@]}"; do
+  print_header "Running Test File: $test_file (Provider: $PROVIDER)"
+  FULL_TEST_PATH="$TEST_DIR/$test_file"
+  
+  if [ ! -f "$FULL_TEST_PATH" ]; then
+    echo "❌ Error: Test file not found: $FULL_TEST_PATH"
+    continue
+  fi
+
+  # Construct the command. Use --quiet for cleaner test logs.
+  COMMAND="$Z3ED_BIN agent simple-chat --file=$FULL_TEST_PATH --rom=$ROM_PATH --ai_provider=$PROVIDER --quiet"
+  
+  echo "Executing command..."
+  echo "--- Agent Output for $test_file ---"
+  
+  # Execute the command and print its output
+  eval $COMMAND
+  
+  echo "--- Test Complete ---"
+  echo ""
+done
+
+print_header "✅ All tests completed successfully!"
--- a/scripts/manual_gemini_test.sh
+++ b/scripts/manual_gemini_test.sh
@@ -1,129 +0,0 @@
-#!/bin/bash
-# Manual Gemini Integration Test
-# Usage: GEMINI_API_KEY='your-key' ./scripts/manual_gemini_test.sh
-
-set -e
-
-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-PROJECT_ROOT="$SCRIPT_DIR/.."
-Z3ED_BIN="$PROJECT_ROOT/build/bin/z3ed"
-
-echo "🧪 Manual Gemini Integration Test"
-echo "=================================="
-echo ""
-
-# Check if API key is set
-if [ -z "$GEMINI_API_KEY" ]; then
-    echo "❌ Error: GEMINI_API_KEY not set"
-    echo ""
-    echo "Usage:"
-    echo "  GEMINI_API_KEY='your-api-key-here' ./scripts/manual_gemini_test.sh"
-    echo ""
-    echo "Or export it first:"
-    echo "  export GEMINI_API_KEY='your-api-key-here'"
-    echo "  ./scripts/manual_gemini_test.sh"
-    exit 1
-fi
-
-echo "✅ GEMINI_API_KEY is set (length: ${#GEMINI_API_KEY} chars)"
-echo ""
-
-# Test 1: Simple palette command
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "Test 1: Simple palette color change"
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "Prompt: 'Change palette 0 color 5 to red'"
-echo ""
-
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Change palette 0 color 5 to red" 2>&1)
-echo "$OUTPUT"
-echo ""
-
-if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
-    echo "✅ Gemini service detected"
-else
-    echo "❌ Expected 'Using Gemini AI' in output"
-    exit 1
-fi
-
-if echo "$OUTPUT" | grep -q -E "palette|color"; then
-    echo "✅ Generated palette-related commands"
-else
-    echo "❌ No palette commands found"
-    exit 1
-fi
-
-echo ""
-
-# Test 2: Overworld modification
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "Test 2: Overworld tile placement"
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "Prompt: 'Place a tree at position (10, 20) on map 0'"
-echo ""
-
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree at position (10, 20) on map 0" 2>&1)
-echo "$OUTPUT"
-echo ""
-
-if echo "$OUTPUT" | grep -q "overworld"; then
-    echo "✅ Generated overworld commands"
-else
-    echo "⚠️  No overworld commands (model may have interpreted differently)"
-fi
-
-echo ""
-
-# Test 3: Complex multi-step task
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "Test 3: Multi-step task"
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "Prompt: 'Export palette 0, change color 3 to blue, and import it back'"
-echo ""
-
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0, change color 3 to blue, and import it back" 2>&1)
-echo "$OUTPUT"
-echo ""
-
-COMMAND_COUNT=$(echo "$OUTPUT" | grep -c -E "^\s*-" || true)
-
-if [ "$COMMAND_COUNT" -ge 2 ]; then
-    echo "✅ Generated multiple commands ($COMMAND_COUNT commands)"
-else
-    echo "⚠️  Expected multiple commands, got $COMMAND_COUNT"
-fi
-
-echo ""
-
-# Test 4: Direct run command (creates proposal)
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "Test 4: Direct run command (creates proposal)"
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "Prompt: 'Validate the ROM'"
-echo ""
-
-OUTPUT=$($Z3ED_BIN agent run --prompt "Validate the ROM" 2>&1 || true)
-echo "$OUTPUT"
-echo ""
-
-if echo "$OUTPUT" | grep -q "Proposal"; then
-    echo "✅ Proposal created"
-else
-    echo "ℹ️  No proposal created (may need ROM file)"
-fi
-
-echo ""
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo "🎉 Manual Test Suite Complete!"
-echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
-echo ""
-echo "Summary:"
-echo "  • Gemini API integration: ✅ Working"
-echo "  • Command generation: ✅ Functional"
-echo "  • Service factory: ✅ Correct provider selection"
-echo ""
-echo "Next steps:"
-echo "  1. Review generated commands for accuracy"
-echo "  2. Test with more complex prompts"
-echo "  3. Compare with Ollama output quality"
-echo "  4. Proceed to Phase 3 (Claude) or Phase 4 (Enhanced Prompting)"
--- a/scripts/quickstart_ollama.sh
+++ b/scripts/quickstart_ollama.sh
@@ -1,128 +0,0 @@
-#!/bin/bash
-# Quick Start Script for Testing Ollama Integration with z3ed
-# Usage: ./scripts/quickstart_ollama.sh
-
-set -e
-
-echo "🚀 z3ed + Ollama Quick Start"
-echo "================================"
-echo ""
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Step 1: Check if Ollama is installed
-echo "📦 Step 1: Checking Ollama installation..."
-if ! command -v ollama &> /dev/null; then
-    echo -e "${RED}✗ Ollama not found${NC}"
-    echo ""
-    echo "Install Ollama with:"
-    echo "  macOS:  brew install ollama"
-    echo "  Linux:  curl -fsSL https://ollama.com/install.sh | sh"
-    echo ""
-    exit 1
-fi
-echo -e "${GREEN}✓ Ollama installed${NC}"
-echo ""
-
-# Step 2: Check if Ollama server is running
-echo "🔌 Step 2: Checking Ollama server..."
-if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
-    echo -e "${YELLOW}⚠ Ollama server not running${NC}"
-    echo ""
-    echo "Starting Ollama server in background..."
-    ollama serve > /dev/null 2>&1 &
-    OLLAMA_PID=$!
-    echo "Waiting for server to start..."
-    sleep 3
-    
-    if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
-        echo -e "${RED}✗ Failed to start Ollama server${NC}"
-        exit 1
-    fi
-    echo -e "${GREEN}✓ Ollama server started (PID: $OLLAMA_PID)${NC}"
-else
-    echo -e "${GREEN}✓ Ollama server running${NC}"
-fi
-echo ""
-
-# Step 3: Check if recommended model is available
-RECOMMENDED_MODEL="qwen2.5-coder:7b"
-echo "🤖 Step 3: Checking for model: $RECOMMENDED_MODEL..."
-if ! ollama list | grep -q "$RECOMMENDED_MODEL"; then
-    echo -e "${YELLOW}⚠ Model not found${NC}"
-    echo ""
-    read -p "Pull $RECOMMENDED_MODEL? (~4.7GB download) [y/N]: " -n 1 -r
-    echo ""
-    if [[ $REPLY =~ ^[Yy]$ ]]; then
-        echo "Pulling model (this may take a few minutes)..."
-        ollama pull "$RECOMMENDED_MODEL"
-        echo -e "${GREEN}✓ Model pulled successfully${NC}"
-    else
-        echo -e "${RED}✗ Model required for testing${NC}"
-        exit 1
-    fi
-else
-    echo -e "${GREEN}✓ Model available${NC}"
-fi
-echo ""
-
-# Step 4: Check if z3ed is built
-echo "🔨 Step 4: Checking z3ed build..."
-if [ ! -f "./build/bin/z3ed" ]; then
-    echo -e "${YELLOW}⚠ z3ed not found in ./build/bin/${NC}"
-    echo ""
-    echo "Building z3ed..."
-    cmake --build build --target z3ed
-    if [ ! -f "./build/bin/z3ed" ]; then
-        echo -e "${RED}✗ Failed to build z3ed${NC}"
-        exit 1
-    fi
-fi
-echo -e "${GREEN}✓ z3ed ready${NC}"
-echo ""
-
-# Step 5: Test Ollama integration
-echo "🧪 Step 5: Testing z3ed + Ollama integration..."
-export YAZE_AI_PROVIDER=ollama
-export OLLAMA_MODEL="$RECOMMENDED_MODEL"
-
-echo ""
-echo "Running test command:"
-echo -e "${BLUE}z3ed agent plan --prompt \"Validate the ROM file\"${NC}"
-echo ""
-
-if ./build/bin/z3ed agent plan --prompt "Validate the ROM file"; then
-    echo ""
-    echo -e "${GREEN}✓ Integration test passed!${NC}"
-else
-    echo ""
-    echo -e "${RED}✗ Integration test failed${NC}"
-    echo "Check error messages above for details"
-    exit 1
-fi
-
-echo ""
-echo "================================"
-echo -e "${GREEN}🎉 Setup Complete!${NC}"
-echo ""
-echo "Next steps:"
-echo "  1. Try a full agent run:"
-echo "     export YAZE_AI_PROVIDER=ollama"
-echo "     z3ed agent run --prompt \"Export first palette\" --rom zelda3.sfc --sandbox"
-echo ""
-echo "  2. Review generated commands:"
-echo "     z3ed agent list"
-echo "     z3ed agent diff"
-echo ""
-echo "  3. Try different models:"
-echo "     ollama pull codellama:13b"
-echo "     export OLLAMA_MODEL=codellama:13b"
-echo ""
-echo "  4. Read the docs:"
-echo "     docs/z3ed/LLM-INTEGRATION-PLAN.md"
-echo ""
--- a/scripts/test_agent_conversation_live.sh
+++ b/scripts/test_agent_conversation_live.sh
@@ -1,130 +0,0 @@
-#!/bin/bash
-# Live testing script for conversational agent
-# Tests agent function calling with real Ollama/Gemini backends
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-Z3ED="${PROJECT_ROOT}/build/bin/z3ed"
-ROM_FILE="${PROJECT_ROOT}/assets/zelda3.sfc"
-
-# Colors
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m'
-
-echo "========================================="
-echo "Live Conversational Agent Test"
-echo "========================================="
-echo ""
-
-# Prerequisites check
-if [ ! -f "$Z3ED" ]; then
-    echo -e "${RED}✗ z3ed not found at $Z3ED${NC}"
-    echo "Build with: cmake --build build --target z3ed"
-    exit 1
-fi
-
-if [ ! -f "$ROM_FILE" ]; then
-    echo -e "${RED}✗ ROM file not found at $ROM_FILE${NC}"
-    exit 1
-fi
-
-echo -e "${GREEN}✓ Prerequisites met${NC}"
-echo ""
-
-# Check for AI backends
-BACKEND_AVAILABLE=false
-
-echo "Checking AI Backends..."
-echo "-----------------------"
-
-# Check Ollama
-if command -v ollama &> /dev/null; then
-    if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
-        echo -e "${GREEN}✓ Ollama server running${NC}"
-        if ollama list | grep -q "qwen2.5-coder"; then
-            echo -e "${GREEN}✓ qwen2.5-coder model available${NC}"
-            BACKEND_AVAILABLE=true
-            AI_BACKEND="Ollama"
-        else
-            echo -e "${YELLOW}⚠ Recommended model qwen2.5-coder:7b not installed${NC}"
-            echo "  Install with: ollama pull qwen2.5-coder:7b"
-        fi
-    else
-        echo -e "${YELLOW}⚠ Ollama not running${NC}"
-        echo "  Start with: ollama serve"
-    fi
-else
-    echo -e "${YELLOW}⚠ Ollama not installed${NC}"
-fi
-
-# Check Gemini
-if [ -n "$GEMINI_API_KEY" ]; then
-    echo -e "${GREEN}✓ Gemini API key set${NC}"
-    BACKEND_AVAILABLE=true
-    if [ "$AI_BACKEND" != "Ollama" ]; then
-        AI_BACKEND="Gemini"
-    fi
-else
-    echo -e "${YELLOW}⚠ GEMINI_API_KEY not set${NC}"
-fi
-
-echo ""
-
-if [ "$BACKEND_AVAILABLE" = false ]; then
-    echo -e "${RED}✗ No AI backend available${NC}"
-    echo ""
-    echo "Please set up at least one backend:"
-    echo "  - Ollama: brew install ollama && ollama serve && ollama pull qwen2.5-coder:7b"
-    echo "  - Gemini: export GEMINI_API_KEY='your-key-here'"
-    exit 1
-fi
-
-echo -e "${GREEN}✓ Using AI Backend: $AI_BACKEND${NC}"
-echo ""
-
-# Run the test-conversation command with default test cases
-echo "========================================="
-echo "Running Automated Conversation Tests"
-echo "========================================="
-echo ""
-echo "This will run 5 default test cases:"
-echo "  1. Simple ROM introspection (dungeon query)"
-echo "  2. Overworld tile search"
-echo "  3. Multi-step conversation"
-echo "  4. Command generation (tile placement)"
-echo "  5. Map description"
-echo ""
-
-read -p "Press Enter to start tests (or Ctrl+C to cancel)..."
-echo ""
-
-# Run the tests
-"$Z3ED" agent test-conversation --rom "$ROM_FILE" --verbose
-
-TEST_EXIT_CODE=$?
-
-echo ""
-echo "========================================="
-echo "Test Results"
-echo "========================================="
-
-if [ $TEST_EXIT_CODE -eq 0 ]; then
-    echo -e "${GREEN}✅ All tests completed successfully${NC}"
-else
-    echo -e "${RED}❌ Tests failed with exit code $TEST_EXIT_CODE${NC}"
-fi
-
-echo ""
-echo "Next Steps:"
-echo "  - Review the output above for any warnings"
-echo "  - Check if tool calls are being invoked correctly"
-echo "  - Verify JSON/table formatting is working"
-echo "  - Test with custom conversation file: z3ed agent test-conversation --file my_tests.json"
-echo ""
-
-exit $TEST_EXIT_CODE
--- a/scripts/test_gemini_integration.sh
+++ b/scripts/test_gemini_integration.sh
@@ -1,213 +0,0 @@
-#!/bin/bash
-# Integration test for Gemini AI Service (Phase 2)
-
-set -e  # Exit on error
-
-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-PROJECT_ROOT="$SCRIPT_DIR/.."
-Z3ED_BIN="$PROJECT_ROOT/build/bin/z3ed"
-
-echo "🧪 Gemini AI Integration Test Suite"
-echo "======================================"
-
-# Color output helpers
-GREEN='\033[0;32m'
-RED='\033[0;31m'
-YELLOW='\033[0;33m'
-NC='\033[0m' # No Color
-
-pass() {
-    echo -e "${GREEN}✓${NC} $1"
-}
-
-fail() {
-    echo -e "${RED}✗${NC} $1"
-    exit 1
-}
-
-warn() {
-    echo -e "${YELLOW}⚠${NC} $1"
-}
-
-# Test 1: z3ed executable exists
-echo ""
-echo "Test 1: z3ed executable exists"
-if [ -f "$Z3ED_BIN" ]; then
-    pass "z3ed executable found at $Z3ED_BIN"
-else
-    fail "z3ed executable not found. Run: cmake --build build --target z3ed"
-fi
-
-# Test 2: Check GEMINI_API_KEY environment variable
-echo ""
-echo "Test 2: Check GEMINI_API_KEY environment variable"
-if [ -z "$GEMINI_API_KEY" ]; then
-    warn "GEMINI_API_KEY not set - skipping API tests"
-    echo "   To test Gemini integration:"
-    echo "   1. Get API key at: https://makersuite.google.com/app/apikey"
-    echo "   2. Run: export GEMINI_API_KEY='your-api-key'"
-    echo "   3. Re-run this script"
-    
-    # Still test that service factory handles missing key gracefully
-    echo ""
-    echo "Test 2a: Verify graceful fallback without API key"
-    unset YAZE_AI_PROVIDER
-    OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree" 2>&1)
-    
-    if echo "$OUTPUT" | grep -q "Using MockAIService"; then
-        pass "Service factory falls back to Mock when GEMINI_API_KEY missing"
-    else
-        fail "Service factory should fall back to Mock without API key"
-    fi
-    
-    echo ""
-    echo "⏭️  Skipping remaining Gemini API tests (no API key)"
-    exit 0
-fi
-
-pass "GEMINI_API_KEY is set"
-
-# Test 3: Verify Gemini model availability
-echo ""
-echo "Test 3: Verify Gemini model availability"
-GEMINI_MODEL="${GEMINI_MODEL:-gemini-2.5-flash}"
-echo "   Testing with model: $GEMINI_MODEL"
-
-# Quick API check
-HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
-    -H "x-goog-api-key: $GEMINI_API_KEY" \
-    "https://generativelanguage.googleapis.com/v1beta/models/$GEMINI_MODEL")
-
-if [ "$HTTP_CODE" = "200" ]; then
-    pass "Gemini API accessible, model '$GEMINI_MODEL' available"
-elif [ "$HTTP_CODE" = "401" ] || [ "$HTTP_CODE" = "403" ]; then
-    fail "Invalid Gemini API key (HTTP $HTTP_CODE)"
-elif [ "$HTTP_CODE" = "404" ]; then
-    fail "Model '$GEMINI_MODEL' not found (HTTP 404)"
-else
-    warn "Unexpected HTTP status: $HTTP_CODE (continuing anyway)"
-fi
-
-# Test 4: Generate commands with Gemini (simple prompt)
-echo ""
-echo "Test 4: Generate commands with Gemini (simple prompt)"
-unset YAZE_AI_PROVIDER  # Let service factory auto-detect from GEMINI_API_KEY
-
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Change the color of palette 0 index 5 to red" 2>&1)
-
-if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
-    pass "Service factory selected Gemini"
-else
-    fail "Expected 'Using Gemini AI' in output, got: $OUTPUT"
-fi
-
-if echo "$OUTPUT" | grep -q "palette"; then
-    pass "Gemini generated palette-related commands"
-    echo "   Generated commands:"
-    echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/   /'
-else
-    fail "Expected palette commands in output, got: $OUTPUT"
-fi
-
-# Test 5: Generate commands with complex prompt
-echo ""
-echo "Test 5: Generate commands with complex prompt (overworld modification)"
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree at coordinates (10, 20) on overworld map 0" 2>&1)
-
-if echo "$OUTPUT" | grep -q "overworld"; then
-    pass "Gemini generated overworld commands"
-    echo "   Generated commands:"
-    echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/   /'
-else
-    fail "Expected overworld commands in output, got: $OUTPUT"
-fi
-
-# Test 6: Test explicit provider selection
-echo ""
-echo "Test 6: Test explicit provider selection (YAZE_AI_PROVIDER=gemini)"
-# Note: Current implementation doesn't have explicit "gemini" provider value
-# It auto-detects from GEMINI_API_KEY. But we can test that Ollama doesn't override.
-unset YAZE_AI_PROVIDER
-
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0" 2>&1)
-
-if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
-    pass "Gemini selected when GEMINI_API_KEY present"
-else
-    warn "Expected Gemini selection, got: $OUTPUT"
-fi
-
-# Test 7: Verify JSON response parsing
-echo ""
-echo "Test 7: Verify JSON response parsing (check for command format)"
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Set tile at (5,5) to 0x100" 2>&1)
-
-# Commands should NOT have "z3ed" prefix (service should strip it)
-if echo "$OUTPUT" | grep -E "^\s*- z3ed"; then
-    warn "Commands still contain 'z3ed' prefix (should be stripped)"
-else
-    pass "Commands properly formatted without 'z3ed' prefix"
-fi
-
-# Test 8: Test multiple commands in response
-echo ""
-echo "Test 8: Test multiple commands generation"
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0 to test.json, change color 5 to red, then import it back" 2>&1)
-
-COMMAND_COUNT=$(echo "$OUTPUT" | grep -c -E "^\s*- " || true)
-
-if [ "$COMMAND_COUNT" -ge 2 ]; then
-    pass "Gemini generated multiple commands ($COMMAND_COUNT commands)"
-    echo "   Commands:"
-    echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/   /'
-else
-    warn "Expected multiple commands, got $COMMAND_COUNT"
-fi
-
-# Test 9: Error handling - invalid API key
-echo ""
-echo "Test 9: Error handling with invalid API key"
-SAVED_KEY="$GEMINI_API_KEY"
-export GEMINI_API_KEY="invalid_key_12345"
-
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Test" 2>&1 || true)
-
-if echo "$OUTPUT" | grep -q "Invalid Gemini API key\|Falling back to MockAIService"; then
-    pass "Service handles invalid API key gracefully"
-else
-    warn "Expected error handling message, got: $OUTPUT"
-fi
-
-# Restore key
-export GEMINI_API_KEY="$SAVED_KEY"
-
-# Test 10: Model override via environment
-echo ""
-echo "Test 10: Model override via GEMINI_MODEL environment variable"
-export GEMINI_MODEL="gemini-1.5-pro"
-
-OUTPUT=$($Z3ED_BIN agent plan --prompt "Test" 2>&1)
-
-if echo "$OUTPUT" | grep -q "gemini-1.5-pro"; then
-    pass "GEMINI_MODEL environment variable respected"
-else
-    warn "Expected model override, got: $OUTPUT"
-fi
-
-unset GEMINI_MODEL
-
-echo ""
-echo "======================================"
-echo "✅ Gemini Integration Test Suite Complete"
-echo ""
-echo "Summary:"
-echo "  - Gemini API accessible"
-echo "  - Command generation working"
-echo "  - Error handling functional"
-echo "  - JSON parsing robust"
-echo ""
-echo "Next steps:"
-echo "  1. Test with various prompt types"
-echo "  2. Measure response latency"
-echo "  3. Compare accuracy with Ollama"
-echo "  4. Consider rate limiting for production"
--- a/scripts/test_ollama_integration.sh
+++ b/scripts/test_ollama_integration.sh
@@ -1,172 +0,0 @@
-#!/bin/bash
-# Test script for Ollama AI service integration
-# This script validates Phase 1 implementation
-
-set -e
-
-echo "🧪 Testing Ollama AI Service Integration (Phase 1)"
-echo "=================================================="
-echo ""
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-TESTS_PASSED=0
-TESTS_FAILED=0
-
-# Helper functions
-pass_test() {
-    echo -e "${GREEN}✓ PASS:${NC} $1"
-    ((TESTS_PASSED++))
-}
-
-fail_test() {
-    echo -e "${RED}✗ FAIL:${NC} $1"
-    ((TESTS_FAILED++))
-}
-
-info() {
-    echo -e "${BLUE}ℹ${NC} $1"
-}
-
-# Test 1: Check if z3ed built successfully
-echo "Test 1: z3ed executable exists"
-if [ -f "./build/bin/z3ed" ]; then
-    pass_test "z3ed executable found"
-else
-    fail_test "z3ed executable not found"
-    exit 1
-fi
-echo ""
-
-# Test 2: Test MockAIService fallback (no LLM configured)
-echo "Test 2: MockAIService fallback"
-unset YAZE_AI_PROVIDER
-unset GEMINI_API_KEY
-unset CLAUDE_API_KEY
-
-OUTPUT=$(./build/bin/z3ed agent plan --prompt "Place a tree" 2>&1 || true)
-if echo "$OUTPUT" | grep -q "Using MockAIService"; then
-    pass_test "MockAIService activated when no LLM configured"
-    if echo "$OUTPUT" | grep -q "AI Agent Plan:"; then
-        pass_test "MockAIService generated commands"
-    fi
-else
-    fail_test "MockAIService fallback not working"
-fi
-echo ""
-
-# Test 3: Test Ollama provider selection (without server)
-echo "Test 3: Ollama provider selection (without server running)"
-export YAZE_AI_PROVIDER=ollama
-
-OUTPUT=$(./build/bin/z3ed agent plan --prompt "Validate ROM" 2>&1 || true)
-if echo "$OUTPUT" | grep -q "Ollama unavailable"; then
-    pass_test "Ollama health check detected unavailable server"
-    if echo "$OUTPUT" | grep -q "Falling back to MockAIService"; then
-        pass_test "Graceful fallback to MockAIService"
-    else
-        fail_test "Did not fall back to MockAIService"
-    fi
-else
-    info "Note: If Ollama is running, this test will pass differently"
-fi
-echo ""
-
-# Test 4: Check if Ollama is installed
-echo "Test 4: Ollama installation check"
-if command -v ollama &> /dev/null; then
-    pass_test "Ollama is installed"
-    
-    # Test 5: Check if Ollama server is running
-    echo ""
-    echo "Test 5: Ollama server availability"
-    if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
-        pass_test "Ollama server is running"
-        
-        # Test 6: Check for qwen2.5-coder model
-        echo ""
-        echo "Test 6: qwen2.5-coder:7b model availability"
-        if ollama list | grep -q "qwen2.5-coder:7b"; then
-            pass_test "Recommended model is available"
-            
-            # Test 7: End-to-end test with Ollama
-            echo ""
-            echo "Test 7: End-to-end LLM command generation"
-            export YAZE_AI_PROVIDER=ollama
-            export OLLAMA_MODEL=qwen2.5-coder:7b
-            
-            info "Testing: 'agent plan --prompt \"Validate the ROM\"'"
-            OUTPUT=$(./build/bin/z3ed agent plan --prompt "Validate the ROM" 2>&1)
-            
-            if echo "$OUTPUT" | grep -q "Using Ollama AI"; then
-                pass_test "Ollama AI service activated"
-            else
-                fail_test "Ollama AI service not activated"
-            fi
-            
-            if echo "$OUTPUT" | grep -q "AI Agent Plan:"; then
-                pass_test "Command generation completed"
-                
-                # Check if reasonable commands were generated
-                if echo "$OUTPUT" | grep -q "rom"; then
-                    pass_test "Generated ROM-related command"
-                else
-                    fail_test "Generated command doesn't seem ROM-related"
-                fi
-            else
-                fail_test "No commands generated"
-            fi
-            
-            echo ""
-            echo "Generated output:"
-            echo "---"
-            echo "$OUTPUT"
-            echo "---"
-            
-        else
-            fail_test "qwen2.5-coder:7b not found"
-            info "Install with: ollama pull qwen2.5-coder:7b"
-        fi
-    else
-        fail_test "Ollama server not running"
-        info "Start with: ollama serve"
-    fi
-else
-    fail_test "Ollama not installed"
-    info "Install with: brew install ollama (macOS)"
-    info "Or visit: https://ollama.com/download"
-fi
-
-echo ""
-echo "=================================================="
-echo "Test Summary:"
-echo -e "  ${GREEN}Passed: $TESTS_PASSED${NC}"
-echo -e "  ${RED}Failed: $TESTS_FAILED${NC}"
-echo ""
-
-if [ $TESTS_FAILED -eq 0 ]; then
-    echo -e "${GREEN}✓ All tests passed!${NC}"
-    echo ""
-    echo "Next steps:"
-    echo "  1. If Ollama tests were skipped, install and configure:"
-    echo "     brew install ollama"
-    echo "     ollama serve &"
-    echo "     ollama pull qwen2.5-coder:7b"
-    echo ""
-    echo "  2. Try the full agent workflow:"
-    echo "     export YAZE_AI_PROVIDER=ollama"
-    echo "     ./build/bin/z3ed agent run --prompt \"Validate ROM\" --rom zelda3.sfc --sandbox"
-    echo ""
-    echo "  3. Check the implementation checklist:"
-    echo "     docs/z3ed/LLM-IMPLEMENTATION-CHECKLIST.md"
-    exit 0
-else
-    echo -e "${RED}✗ Some tests failed${NC}"
-    echo "Review the output above for details"
-    exit 1
-fi