backend-infra-engineer: Post v0.3.9-hotfix7 snapshot (build cleanup)

2025-12-22 00:20:49 +00:00
parent 2934c82b75
commit 5c4cd57ff8
1259 changed files with 239160 additions and 43801 deletions
--- a/scripts/test_runner.py
+++ b/scripts/test_runner.py
@@ -0,0 +1,434 @@
+#!/usr/bin/env python3
+"""
+Advanced test runner with automatic sharding and parallel execution for yaze.
+Optimizes test execution time by distributing tests across multiple processes.
+"""
+
+import multiprocessing
+import json
+import subprocess
+import time
+import argparse
+import sys
+import os
+from pathlib import Path
+from typing import List, Dict, Tuple, Optional
+from dataclasses import dataclass, asdict
+from concurrent.futures import ProcessPoolExecutor, as_completed
+import hashlib
+
+@dataclass
+class TestResult:
+    """Container for test execution results."""
+    name: str
+    status: str  # passed, failed, skipped
+    duration: float
+    output: str
+    shard_id: int
+
+@dataclass
+class ShardResult:
+    """Results from a single test shard."""
+    shard_id: int
+    return_code: int
+    tests_run: int
+    tests_passed: int
+    tests_failed: int
+    duration: float
+    test_results: List[TestResult]
+
+class TestRunner:
+    """Advanced test runner with sharding and parallel execution."""
+
+    def __init__(self, test_binary: str, num_shards: int = None,
+                 cache_dir: str = None, verbose: bool = False):
+        self.test_binary = Path(test_binary).resolve()
+        if not self.test_binary.exists():
+            raise FileNotFoundError(f"Test binary not found: {test_binary}")
+
+        self.num_shards = num_shards or min(multiprocessing.cpu_count(), 8)
+        self.cache_dir = Path(cache_dir or Path.home() / ".yaze_test_cache")
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.verbose = verbose
+        self.test_times = self.load_test_times()
+
+    def load_test_times(self) -> Dict[str, float]:
+        """Load historical test execution times from cache."""
+        cache_file = self.cache_dir / "test_times.json"
+        if cache_file.exists():
+            try:
+                return json.loads(cache_file.read_text())
+            except (json.JSONDecodeError, IOError):
+                return {}
+        return {}
+
+    def save_test_times(self, test_times: Dict[str, float]):
+        """Save test execution times to cache."""
+        cache_file = self.cache_dir / "test_times.json"
+
+        # Merge with existing times
+        existing = self.load_test_times()
+        for test, time in test_times.items():
+            # Use exponential moving average for smoothing
+            if test in existing:
+                existing[test] = 0.7 * existing[test] + 0.3 * time
+            else:
+                existing[test] = time
+
+        cache_file.write_text(json.dumps(existing, indent=2))
+
+    def discover_tests(self, filter_pattern: str = None) -> List[str]:
+        """Discover all tests in the binary."""
+        cmd = [str(self.test_binary), "--gtest_list_tests"]
+        if filter_pattern:
+            cmd.append(f"--gtest_filter={filter_pattern}")
+
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True,
+                                  timeout=30, check=False)
+        except subprocess.TimeoutExpired:
+            print("Warning: Test discovery timed out", file=sys.stderr)
+            return []
+
+        if result.returncode != 0:
+            print(f"Warning: Test discovery failed: {result.stderr}", file=sys.stderr)
+            return []
+
+        # Parse gtest output
+        tests = []
+        current_suite = ""
+        for line in result.stdout.splitlines():
+            line = line.rstrip()
+            if not line or line.startswith("Running main()"):
+                continue
+
+            if line and not line.startswith(" "):
+                # Test suite name
+                current_suite = line.rstrip(".")
+            elif line.strip():
+                # Test case name
+                test_name = line.strip()
+                # Remove comments (e.g., "  TestName  # Comment")
+                if "#" in test_name:
+                    test_name = test_name.split("#")[0].strip()
+                if test_name:
+                    tests.append(f"{current_suite}.{test_name}")
+
+        if self.verbose:
+            print(f"Discovered {len(tests)} tests")
+
+        return tests
+
+    def create_balanced_shards(self, tests: List[str]) -> List[List[str]]:
+        """Create balanced shards based on historical execution times."""
+        if not tests:
+            return []
+
+        # Sort tests by execution time (longest first)
+        # Use historical times or default estimate
+        default_time = 0.1  # 100ms default per test
+        sorted_tests = sorted(
+            tests,
+            key=lambda t: self.test_times.get(t, default_time),
+            reverse=True
+        )
+
+        # Initialize shards
+        num_shards = min(self.num_shards, len(tests))
+        shards = [[] for _ in range(num_shards)]
+        shard_times = [0.0] * num_shards
+
+        # Distribute tests using greedy bin packing
+        for test in sorted_tests:
+            # Find shard with minimum total time
+            min_shard_idx = shard_times.index(min(shard_times))
+            shards[min_shard_idx].append(test)
+            shard_times[min_shard_idx] += self.test_times.get(test, default_time)
+
+        # Remove empty shards
+        shards = [s for s in shards if s]
+
+        if self.verbose:
+            print(f"Created {len(shards)} shards:")
+            for i, shard in enumerate(shards):
+                print(f"  Shard {i}: {len(shard)} tests, "
+                      f"estimated {shard_times[i]:.2f}s")
+
+        return shards
+
+    def run_shard(self, shard_id: int, tests: List[str],
+                  output_dir: Path = None) -> ShardResult:
+        """Run a single shard of tests."""
+        if not tests:
+            return ShardResult(shard_id, 0, 0, 0, 0, 0.0, [])
+
+        filter_str = ":".join(tests)
+        output_dir = output_dir or self.cache_dir / "results"
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        # Prepare command
+        json_output = output_dir / f"shard_{shard_id}_results.json"
+        xml_output = output_dir / f"shard_{shard_id}_results.xml"
+
+        cmd = [
+            str(self.test_binary),
+            f"--gtest_filter={filter_str}",
+            f"--gtest_output=json:{json_output}",
+            "--gtest_brief=1"
+        ]
+
+        # Run tests
+        start_time = time.time()
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=600  # 10 minute timeout per shard
+            )
+            duration = time.time() - start_time
+        except subprocess.TimeoutExpired:
+            print(f"Shard {shard_id} timed out!", file=sys.stderr)
+            return ShardResult(shard_id, -1, len(tests), 0, len(tests),
+                             600.0, [])
+
+        # Parse results
+        test_results = []
+        tests_run = 0
+        tests_passed = 0
+        tests_failed = 0
+
+        if json_output.exists():
+            try:
+                with open(json_output) as f:
+                    data = json.load(f)
+
+                for suite in data.get("testsuites", []):
+                    for testcase in suite.get("testsuite", []):
+                        test_name = f"{suite['name']}.{testcase['name']}"
+                        status = "passed" if testcase.get("result") == "COMPLETED" else "failed"
+                        test_duration = float(testcase.get("time", "0").rstrip("s"))
+
+                        test_results.append(TestResult(
+                            name=test_name,
+                            status=status,
+                            duration=test_duration,
+                            output=testcase.get("output", ""),
+                            shard_id=shard_id
+                        ))
+
+                        tests_run += 1
+                        if status == "passed":
+                            tests_passed += 1
+                        else:
+                            tests_failed += 1
+
+            except (json.JSONDecodeError, KeyError, IOError) as e:
+                print(f"Warning: Failed to parse results for shard {shard_id}: {e}",
+                      file=sys.stderr)
+
+        return ShardResult(
+            shard_id=shard_id,
+            return_code=result.returncode,
+            tests_run=tests_run,
+            tests_passed=tests_passed,
+            tests_failed=tests_failed,
+            duration=duration,
+            test_results=test_results
+        )
+
+    def run_parallel(self, filter_pattern: str = None,
+                     output_dir: str = None) -> Tuple[int, Dict]:
+        """Run tests in parallel shards."""
+        # Discover tests
+        tests = self.discover_tests(filter_pattern)
+        if not tests:
+            print("No tests found to run")
+            return 0, {}
+
+        print(f"Running {len(tests)} tests in up to {self.num_shards} shards...")
+
+        # Create shards
+        shards = self.create_balanced_shards(tests)
+        output_path = Path(output_dir) if output_dir else self.cache_dir / "results"
+
+        # Run shards in parallel
+        all_results = []
+        start_time = time.time()
+
+        with ProcessPoolExecutor(max_workers=len(shards)) as executor:
+            # Submit all shards
+            futures = {
+                executor.submit(self.run_shard, i, shard, output_path): i
+                for i, shard in enumerate(shards)
+            }
+
+            # Collect results
+            for future in as_completed(futures):
+                shard_id = futures[future]
+                try:
+                    result = future.result()
+                    all_results.append(result)
+
+                    if self.verbose:
+                        print(f"Shard {shard_id} completed: "
+                              f"{result.tests_passed}/{result.tests_run} passed "
+                              f"in {result.duration:.2f}s")
+                except Exception as e:
+                    print(f"Shard {shard_id} failed with exception: {e}",
+                          file=sys.stderr)
+
+        total_duration = time.time() - start_time
+
+        # Aggregate results
+        total_tests = sum(r.tests_run for r in all_results)
+        total_passed = sum(r.tests_passed for r in all_results)
+        total_failed = sum(r.tests_failed for r in all_results)
+        max_return_code = max((r.return_code for r in all_results), default=0)
+
+        # Update test times cache
+        new_times = {}
+        for result in all_results:
+            for test_result in result.test_results:
+                new_times[test_result.name] = test_result.duration
+        self.save_test_times(new_times)
+
+        # Generate summary
+        summary = {
+            "total_tests": total_tests,
+            "passed": total_passed,
+            "failed": total_failed,
+            "duration": total_duration,
+            "num_shards": len(shards),
+            "parallel_efficiency": (sum(r.duration for r in all_results) /
+                                   (total_duration * len(shards)) * 100)
+                                   if len(shards) > 0 else 0,
+            "shards": [asdict(r) for r in all_results]
+        }
+
+        # Save summary
+        summary_file = output_path / "summary.json"
+        summary_file.write_text(json.dumps(summary, indent=2))
+
+        # Print results
+        print(f"\n{'=' * 60}")
+        print(f"Test Execution Summary")
+        print(f"{'=' * 60}")
+        print(f"Total Tests:    {total_tests}")
+        print(f"Passed:         {total_passed} ({total_passed/total_tests*100:.1f}%)")
+        print(f"Failed:         {total_failed}")
+        print(f"Duration:       {total_duration:.2f}s")
+        print(f"Shards Used:    {len(shards)}")
+        print(f"Efficiency:     {summary['parallel_efficiency']:.1f}%")
+
+        if total_failed > 0:
+            print(f"\nFailed Tests:")
+            for result in all_results:
+                for test_result in result.test_results:
+                    if test_result.status == "failed":
+                        print(f"  - {test_result.name}")
+
+        return max_return_code, summary
+
+    def run_with_retry(self, filter_pattern: str = None,
+                       max_retries: int = 2) -> int:
+        """Run tests with automatic retry for flaky tests."""
+        failed_tests = set()
+        attempt = 0
+
+        while attempt <= max_retries:
+            if attempt > 0:
+                # Only retry failed tests
+                if not failed_tests:
+                    break
+                filter_pattern = ":".join(failed_tests)
+                print(f"\nRetry attempt {attempt} for {len(failed_tests)} failed tests")
+
+            return_code, summary = self.run_parallel(filter_pattern)
+
+            if return_code == 0:
+                if attempt > 0:
+                    print(f"All tests passed after {attempt} retries")
+                return 0
+
+            # Collect failed tests for retry
+            failed_tests.clear()
+            for shard in summary.get("shards", []):
+                for test_result in shard.get("test_results", []):
+                    if test_result.get("status") == "failed":
+                        failed_tests.add(test_result.get("name"))
+
+            attempt += 1
+
+        print(f"Tests still failing after {max_retries} retries")
+        return return_code
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Advanced test runner with parallel execution"
+    )
+    parser.add_argument(
+        "test_binary",
+        help="Path to the test binary"
+    )
+    parser.add_argument(
+        "--shards",
+        type=int,
+        help="Number of parallel shards (default: CPU count)"
+    )
+    parser.add_argument(
+        "--filter",
+        help="Test filter pattern (gtest format)"
+    )
+    parser.add_argument(
+        "--output-dir",
+        help="Directory for test results"
+    )
+    parser.add_argument(
+        "--cache-dir",
+        help="Directory for test cache (default: ~/.yaze_test_cache)"
+    )
+    parser.add_argument(
+        "--retry",
+        type=int,
+        default=0,
+        help="Number of retries for failed tests"
+    )
+    parser.add_argument(
+        "--verbose", "-v",
+        action="store_true",
+        help="Enable verbose output"
+    )
+
+    args = parser.parse_args()
+
+    try:
+        runner = TestRunner(
+            test_binary=args.test_binary,
+            num_shards=args.shards,
+            cache_dir=args.cache_dir,
+            verbose=args.verbose
+        )
+
+        if args.retry > 0:
+            return_code = runner.run_with_retry(
+                filter_pattern=args.filter,
+                max_retries=args.retry
+            )
+        else:
+            return_code, _ = runner.run_parallel(
+                filter_pattern=args.filter,
+                output_dir=args.output_dir
+            )
+
+        sys.exit(return_code)
+
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()