Some checks failed
		
		
	
	Build Worker Base and Application Images / check-base-changes (push) Successful in 7s
				
			Build Worker Base and Application Images / build-base (push) Has been skipped
				
			Build Worker Base and Application Images / build-docker (push) Successful in 2m38s
				
			Build Worker Base and Application Images / deploy-stack (push) Failing after 16s
				
			
		
			
				
	
	
		
			373 lines
		
	
	
		
			No EOL
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			373 lines
		
	
	
		
			No EOL
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						|
Pipeline Configuration Parser - Handles pipeline.json parsing and validation
 | 
						|
"""
 | 
						|
 | 
						|
import json
 | 
						|
import logging
 | 
						|
from pathlib import Path
 | 
						|
from typing import Dict, List, Any, Optional, Set
 | 
						|
from dataclasses import dataclass, field
 | 
						|
from enum import Enum
 | 
						|
 | 
						|
logger = logging.getLogger(__name__)
 | 
						|
 | 
						|
 | 
						|
class ActionType(Enum):
 | 
						|
    """Supported action types in pipeline"""
 | 
						|
    REDIS_SAVE_IMAGE = "redis_save_image"
 | 
						|
    REDIS_PUBLISH = "redis_publish"
 | 
						|
    # PostgreSQL actions below are DEPRECATED - kept for backward compatibility only
 | 
						|
    # These actions will be silently skipped during pipeline execution
 | 
						|
    POSTGRESQL_UPDATE = "postgresql_update"
 | 
						|
    POSTGRESQL_UPDATE_COMBINED = "postgresql_update_combined"
 | 
						|
    POSTGRESQL_INSERT = "postgresql_insert"
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class RedisConfig:
 | 
						|
    """Redis connection configuration"""
 | 
						|
    host: str
 | 
						|
    port: int = 6379
 | 
						|
    password: Optional[str] = None
 | 
						|
    db: int = 0
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_dict(cls, data: Dict[str, Any]) -> 'RedisConfig':
 | 
						|
        return cls(
 | 
						|
            host=data['host'],
 | 
						|
            port=data.get('port', 6379),
 | 
						|
            password=data.get('password'),
 | 
						|
            db=data.get('db', 0)
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class PostgreSQLConfig:
 | 
						|
    """
 | 
						|
    PostgreSQL connection configuration - DISABLED
 | 
						|
 | 
						|
    NOTE: This configuration is kept for backward compatibility with existing
 | 
						|
    pipeline.json files, but PostgreSQL operations are disabled. All database
 | 
						|
    operations have been moved to microservices architecture.
 | 
						|
 | 
						|
    This config will be parsed but not used for any database connections.
 | 
						|
    """
 | 
						|
    host: str
 | 
						|
    port: int
 | 
						|
    database: str
 | 
						|
    username: str
 | 
						|
    password: str
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_dict(cls, data: Dict[str, Any]) -> 'PostgreSQLConfig':
 | 
						|
        """Parse PostgreSQL config from dict (kept for backward compatibility)"""
 | 
						|
        return cls(
 | 
						|
            host=data['host'],
 | 
						|
            port=data.get('port', 5432),
 | 
						|
            database=data['database'],
 | 
						|
            username=data['username'],
 | 
						|
            password=data['password']
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class Action:
 | 
						|
    """Represents an action in the pipeline"""
 | 
						|
    type: ActionType
 | 
						|
    params: Dict[str, Any] = field(default_factory=dict)
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_dict(cls, data: Dict[str, Any]) -> 'Action':
 | 
						|
        action_type = ActionType(data['type'])
 | 
						|
        params = {k: v for k, v in data.items() if k != 'type'}
 | 
						|
        return cls(type=action_type, params=params)
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class ModelBranch:
 | 
						|
    """Represents a branch in the pipeline with its own model"""
 | 
						|
    model_id: str
 | 
						|
    model_file: str
 | 
						|
    trigger_classes: List[str]
 | 
						|
    min_confidence: float = 0.5
 | 
						|
    crop: bool = False
 | 
						|
    crop_class: Optional[Any] = None  # Can be string or list
 | 
						|
    parallel: bool = False
 | 
						|
    actions: List[Action] = field(default_factory=list)
 | 
						|
    branches: List['ModelBranch'] = field(default_factory=list)
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_dict(cls, data: Dict[str, Any]) -> 'ModelBranch':
 | 
						|
        actions = [Action.from_dict(a) for a in data.get('actions', [])]
 | 
						|
        branches = [cls.from_dict(b) for b in data.get('branches', [])]
 | 
						|
 | 
						|
        return cls(
 | 
						|
            model_id=data['modelId'],
 | 
						|
            model_file=data['modelFile'],
 | 
						|
            trigger_classes=data.get('triggerClasses', []),
 | 
						|
            min_confidence=data.get('minConfidence', 0.5),
 | 
						|
            crop=data.get('crop', False),
 | 
						|
            crop_class=data.get('cropClass'),
 | 
						|
            parallel=data.get('parallel', False),
 | 
						|
            actions=actions,
 | 
						|
            branches=branches
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class TrackingConfig:
 | 
						|
    """Configuration for the tracking phase"""
 | 
						|
    model_id: str
 | 
						|
    model_file: str
 | 
						|
    trigger_classes: List[str]
 | 
						|
    min_confidence: float = 0.6
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_dict(cls, data: Dict[str, Any]) -> 'TrackingConfig':
 | 
						|
        return cls(
 | 
						|
            model_id=data['modelId'],
 | 
						|
            model_file=data['modelFile'],
 | 
						|
            trigger_classes=data.get('triggerClasses', []),
 | 
						|
            min_confidence=data.get('minConfidence', 0.6)
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class PipelineConfig:
 | 
						|
    """Main pipeline configuration"""
 | 
						|
    model_id: str
 | 
						|
    model_file: str
 | 
						|
    trigger_classes: List[str]
 | 
						|
    min_confidence: float = 0.5
 | 
						|
    crop: bool = False
 | 
						|
    branches: List[ModelBranch] = field(default_factory=list)
 | 
						|
    parallel_actions: List[Action] = field(default_factory=list)
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_dict(cls, data: Dict[str, Any]) -> 'PipelineConfig':
 | 
						|
        branches = [ModelBranch.from_dict(b) for b in data.get('branches', [])]
 | 
						|
        parallel_actions = [Action.from_dict(a) for a in data.get('parallelActions', [])]
 | 
						|
 | 
						|
        return cls(
 | 
						|
            model_id=data['modelId'],
 | 
						|
            model_file=data['modelFile'],
 | 
						|
            trigger_classes=data.get('triggerClasses', []),
 | 
						|
            min_confidence=data.get('minConfidence', 0.5),
 | 
						|
            crop=data.get('crop', False),
 | 
						|
            branches=branches,
 | 
						|
            parallel_actions=parallel_actions
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
class PipelineParser:
 | 
						|
    """Parser for pipeline.json configuration files"""
 | 
						|
 | 
						|
    def __init__(self):
 | 
						|
        self.redis_config: Optional[RedisConfig] = None
 | 
						|
        self.postgresql_config: Optional[PostgreSQLConfig] = None
 | 
						|
        self.tracking_config: Optional[TrackingConfig] = None
 | 
						|
        self.pipeline_config: Optional[PipelineConfig] = None
 | 
						|
        self._model_dependencies: Set[str] = set()
 | 
						|
 | 
						|
    def parse(self, config_path: Path) -> bool:
 | 
						|
        """
 | 
						|
        Parse a pipeline.json configuration file
 | 
						|
 | 
						|
        Args:
 | 
						|
            config_path: Path to the pipeline.json file
 | 
						|
 | 
						|
        Returns:
 | 
						|
            True if parsing was successful, False otherwise
 | 
						|
        """
 | 
						|
        try:
 | 
						|
            if not config_path.exists():
 | 
						|
                logger.error(f"Pipeline config not found: {config_path}")
 | 
						|
                return False
 | 
						|
 | 
						|
            with open(config_path, 'r') as f:
 | 
						|
                data = json.load(f)
 | 
						|
 | 
						|
            return self.parse_dict(data)
 | 
						|
 | 
						|
        except json.JSONDecodeError as e:
 | 
						|
            logger.error(f"Invalid JSON in pipeline config: {str(e)}")
 | 
						|
            return False
 | 
						|
        except Exception as e:
 | 
						|
            logger.error(f"Failed to parse pipeline config: {str(e)}", exc_info=True)
 | 
						|
            return False
 | 
						|
 | 
						|
    def parse_dict(self, data: Dict[str, Any]) -> bool:
 | 
						|
        """
 | 
						|
        Parse a pipeline configuration from a dictionary
 | 
						|
 | 
						|
        Args:
 | 
						|
            data: The configuration dictionary
 | 
						|
 | 
						|
        Returns:
 | 
						|
            True if parsing was successful, False otherwise
 | 
						|
        """
 | 
						|
        try:
 | 
						|
            # Parse Redis configuration
 | 
						|
            if 'redis' in data:
 | 
						|
                self.redis_config = RedisConfig.from_dict(data['redis'])
 | 
						|
                logger.debug(f"Parsed Redis config: {self.redis_config.host}:{self.redis_config.port}")
 | 
						|
 | 
						|
            # Parse PostgreSQL configuration
 | 
						|
            if 'postgresql' in data:
 | 
						|
                self.postgresql_config = PostgreSQLConfig.from_dict(data['postgresql'])
 | 
						|
                logger.debug(f"Parsed PostgreSQL config: {self.postgresql_config.host}:{self.postgresql_config.port}/{self.postgresql_config.database}")
 | 
						|
 | 
						|
            # Parse tracking configuration
 | 
						|
            if 'tracking' in data:
 | 
						|
                self.tracking_config = TrackingConfig.from_dict(data['tracking'])
 | 
						|
                self._model_dependencies.add(self.tracking_config.model_file)
 | 
						|
                logger.debug(f"Parsed tracking config: {self.tracking_config.model_id}")
 | 
						|
 | 
						|
            # Parse main pipeline configuration
 | 
						|
            if 'pipeline' in data:
 | 
						|
                self.pipeline_config = PipelineConfig.from_dict(data['pipeline'])
 | 
						|
                self._collect_model_dependencies(self.pipeline_config)
 | 
						|
                logger.debug(f"Parsed pipeline config: {self.pipeline_config.model_id}")
 | 
						|
 | 
						|
            logger.info(f"Successfully parsed pipeline configuration")
 | 
						|
            logger.debug(f"Model dependencies: {self._model_dependencies}")
 | 
						|
            return True
 | 
						|
 | 
						|
        except KeyError as e:
 | 
						|
            logger.error(f"Missing required field in pipeline config: {str(e)}")
 | 
						|
            return False
 | 
						|
        except Exception as e:
 | 
						|
            logger.error(f"Failed to parse pipeline config: {str(e)}", exc_info=True)
 | 
						|
            return False
 | 
						|
 | 
						|
    def _collect_model_dependencies(self, config: Any) -> None:
 | 
						|
        """
 | 
						|
        Recursively collect all model file dependencies
 | 
						|
 | 
						|
        Args:
 | 
						|
            config: Pipeline or branch configuration
 | 
						|
        """
 | 
						|
        if hasattr(config, 'model_file'):
 | 
						|
            self._model_dependencies.add(config.model_file)
 | 
						|
 | 
						|
        if hasattr(config, 'branches'):
 | 
						|
            for branch in config.branches:
 | 
						|
                self._collect_model_dependencies(branch)
 | 
						|
 | 
						|
    def get_model_dependencies(self) -> Set[str]:
 | 
						|
        """
 | 
						|
        Get all model file dependencies from the pipeline
 | 
						|
 | 
						|
        Returns:
 | 
						|
            Set of model filenames required by the pipeline
 | 
						|
        """
 | 
						|
        return self._model_dependencies.copy()
 | 
						|
 | 
						|
    def validate(self) -> bool:
 | 
						|
        """
 | 
						|
        Validate the parsed configuration
 | 
						|
 | 
						|
        Returns:
 | 
						|
            True if configuration is valid, False otherwise
 | 
						|
        """
 | 
						|
        if not self.pipeline_config:
 | 
						|
            logger.error("No pipeline configuration found")
 | 
						|
            return False
 | 
						|
 | 
						|
        # Check that all required model files are specified
 | 
						|
        if not self.pipeline_config.model_file:
 | 
						|
            logger.error("Main pipeline model file not specified")
 | 
						|
            return False
 | 
						|
 | 
						|
        # Validate action configurations
 | 
						|
        if not self._validate_actions(self.pipeline_config):
 | 
						|
            return False
 | 
						|
 | 
						|
        # Validate parallel actions (PostgreSQL actions are skipped)
 | 
						|
        for action in self.pipeline_config.parallel_actions:
 | 
						|
            if action.type == ActionType.POSTGRESQL_UPDATE_COMBINED:
 | 
						|
                logger.warning(f"PostgreSQL parallel action {action.type.value} found but will be SKIPPED (PostgreSQL disabled)")
 | 
						|
                # Skip validation for PostgreSQL actions since they won't be executed
 | 
						|
                # wait_for = action.params.get('waitForBranches', [])
 | 
						|
                # if wait_for:
 | 
						|
                #     # Check that referenced branches exist
 | 
						|
                #     branch_ids = self._get_all_branch_ids(self.pipeline_config)
 | 
						|
                #     for branch_id in wait_for:
 | 
						|
                #         if branch_id not in branch_ids:
 | 
						|
                #             logger.error(f"Referenced branch '{branch_id}' in waitForBranches not found")
 | 
						|
                #             return False
 | 
						|
 | 
						|
        logger.info("Pipeline configuration validated successfully")
 | 
						|
        return True
 | 
						|
 | 
						|
    def _validate_actions(self, config: Any) -> bool:
 | 
						|
        """
 | 
						|
        Validate actions in a pipeline or branch configuration
 | 
						|
 | 
						|
        Args:
 | 
						|
            config: Pipeline or branch configuration
 | 
						|
 | 
						|
        Returns:
 | 
						|
            True if valid, False otherwise
 | 
						|
        """
 | 
						|
        if hasattr(config, 'actions'):
 | 
						|
            for action in config.actions:
 | 
						|
                # Validate Redis actions need Redis config
 | 
						|
                if action.type in [ActionType.REDIS_SAVE_IMAGE, ActionType.REDIS_PUBLISH]:
 | 
						|
                    if not self.redis_config:
 | 
						|
                        logger.error(f"Action {action.type} requires Redis configuration")
 | 
						|
                        return False
 | 
						|
 | 
						|
                # PostgreSQL actions are disabled - log warning instead of failing
 | 
						|
                # Kept for backward compatibility with existing pipeline.json files
 | 
						|
                if action.type in [ActionType.POSTGRESQL_UPDATE, ActionType.POSTGRESQL_UPDATE_COMBINED, ActionType.POSTGRESQL_INSERT]:
 | 
						|
                    logger.warning(f"PostgreSQL action {action.type.value} found but will be SKIPPED (PostgreSQL disabled)")
 | 
						|
                    # Do not fail validation - just skip these actions during execution
 | 
						|
                    # if not self.postgresql_config:
 | 
						|
                    #     logger.error(f"Action {action.type} requires PostgreSQL configuration")
 | 
						|
                    #     return False
 | 
						|
 | 
						|
        # Recursively validate branches
 | 
						|
        if hasattr(config, 'branches'):
 | 
						|
            for branch in config.branches:
 | 
						|
                if not self._validate_actions(branch):
 | 
						|
                    return False
 | 
						|
 | 
						|
        return True
 | 
						|
 | 
						|
    def _get_all_branch_ids(self, config: Any, branch_ids: Set[str] = None) -> Set[str]:
 | 
						|
        """
 | 
						|
        Recursively collect all branch model IDs
 | 
						|
 | 
						|
        Args:
 | 
						|
            config: Pipeline or branch configuration
 | 
						|
            branch_ids: Set to collect IDs into
 | 
						|
 | 
						|
        Returns:
 | 
						|
            Set of all branch model IDs
 | 
						|
        """
 | 
						|
        if branch_ids is None:
 | 
						|
            branch_ids = set()
 | 
						|
 | 
						|
        if hasattr(config, 'branches'):
 | 
						|
            for branch in config.branches:
 | 
						|
                branch_ids.add(branch.model_id)
 | 
						|
                self._get_all_branch_ids(branch, branch_ids)
 | 
						|
 | 
						|
        return branch_ids
 | 
						|
 | 
						|
    def get_redis_config(self) -> Optional[RedisConfig]:
 | 
						|
        """Get the Redis configuration"""
 | 
						|
        return self.redis_config
 | 
						|
 | 
						|
    def get_postgresql_config(self) -> Optional[PostgreSQLConfig]:
 | 
						|
        """Get the PostgreSQL configuration"""
 | 
						|
        return self.postgresql_config
 | 
						|
 | 
						|
    def get_tracking_config(self) -> Optional[TrackingConfig]:
 | 
						|
        """Get the tracking configuration"""
 | 
						|
        return self.tracking_config
 | 
						|
 | 
						|
    def get_pipeline_config(self) -> Optional[PipelineConfig]:
 | 
						|
        """Get the main pipeline configuration"""
 | 
						|
        return self.pipeline_config |