#!/usr/bin/env python3
"""
VRAM scaling test - measures Python process memory usage for 1, 2, 3, and 4 streams.
"""

import os
import time
import subprocess
from dotenv import load_dotenv
from services import StreamDecoderFactory

# Load environment variables from .env file
load_dotenv()

# Load camera URLs from environment
camera_urls = []
i = 1
while True:
    url = os.getenv(f'CAMERA_URL_{i}')
    if url:
        camera_urls.append(url)
        i += 1
    else:
        break

if not camera_urls:
    print("Error: No camera URLs found in .env file")
    print("Please add CAMERA_URL_1, CAMERA_URL_2, etc. to your .env file")
    exit(1)

def get_python_gpu_memory():
    """Get Python process GPU memory usage in MB"""
    try:
        pid = os.getpid()
        result = subprocess.run(
            ['nvidia-smi', '--query-compute-apps=pid,used_memory', '--format=csv,noheader,nounits'],
            capture_output=True, text=True, check=True
        )
        for line in result.stdout.strip().split('\n'):
            if line:
                parts = line.split(',')
                if len(parts) >= 2 and int(parts[0].strip()) == pid:
                    return int(parts[1].strip())
        return 0
    except:
        return 0

def test_n_streams(n, wait_time=15):
    """Test with n streams"""
    print(f"\n{'='*80}")
    print(f"Testing with {n} stream(s)")
    print('='*80)

    mem_before = get_python_gpu_memory()
    print(f"Python process VRAM before: {mem_before} MB")

    # Create factory
    factory = StreamDecoderFactory(gpu_id=0)
    time.sleep(1)
    mem_after_factory = get_python_gpu_memory()
    print(f"After factory: {mem_after_factory} MB (+{mem_after_factory - mem_before} MB)")

    # Create decoders
    decoders = []
    for i in range(n):
        decoder = factory.create_decoder(camera_urls[i], buffer_size=30)
        decoders.append(decoder)

    time.sleep(1)
    mem_after_create = get_python_gpu_memory()
    print(f"After creating {n} decoder(s): {mem_after_create} MB (+{mem_after_create - mem_after_factory} MB)")

    # Start decoders
    for decoder in decoders:
        decoder.start()

    time.sleep(2)
    mem_after_start = get_python_gpu_memory()
    print(f"After starting {n} decoder(s): {mem_after_start} MB (+{mem_after_start - mem_after_create} MB)")

    # Wait for connection
    print(f"Waiting {wait_time}s for streams to connect and stabilize...")
    time.sleep(wait_time)

    # Check connection status
    connected = sum(1 for d in decoders if d.is_connected())
    mem_stable = get_python_gpu_memory()

    print(f"Connected: {connected}/{n} streams")
    print(f"Python process VRAM (stable): {mem_stable} MB")

    # Get frame stats
    for i, decoder in enumerate(decoders):
        print(f"  Stream {i+1}: {decoder.get_status().value:10s} "
              f"Buffer: {decoder.get_buffer_size()}/30 "
              f"Frames: {decoder.frame_count}")

    # Cleanup
    for decoder in decoders:
        decoder.stop()

    time.sleep(2)
    mem_after_cleanup = get_python_gpu_memory()
    print(f"After cleanup: {mem_after_cleanup} MB")

    return mem_stable

if __name__ == '__main__':
    print("Python VRAM Scaling Test")
    print(f"PID: {os.getpid()}")

    baseline = get_python_gpu_memory()
    print(f"Baseline Python process VRAM: {baseline} MB\n")

    results = {}
    for n in [1, 2, 3, 4]:
        mem = test_n_streams(n, wait_time=15)
        results[n] = mem
        print(f"\n→ {n} stream(s): {mem} MB (process total)")

        # Give time between tests
        if n < 4:
            print("\nWaiting 5s before next test...")
            time.sleep(5)

    # Summary
    print("\n" + "="*80)
    print("Python Process VRAM Scaling Summary")
    print("="*80)
    print(f"Baseline:     {baseline:4d} MB")
    for n in [1, 2, 3, 4]:
        total = results[n]
        overhead = total - baseline
        per_stream = overhead / n if n > 0 else 0
        print(f"{n} stream(s):  {total:4d} MB  (+{overhead:3d} MB total, {per_stream:5.1f} MB per stream)")

    # Calculate marginal cost
    print("\nMarginal cost per additional stream:")
    for n in [2, 3, 4]:
        marginal = results[n] - results[n-1]
        print(f"  Stream {n}: +{marginal} MB")

    print("="*80)