auto_sigma_rule_generator/backend/tasks/maintenance_tasks.py

"""
Maintenance tasks for Celery
"""
import logging
from datetime import datetime, timedelta
from typing import Dict, Any
from celery_config import celery_app, get_db_session

logger = logging.getLogger(__name__)

@celery_app.task(name='tasks.maintenance_tasks.cleanup_old_results')
def cleanup_old_results():
    """
    Periodic task to clean up old Celery results and logs
    """
    try:
        logger.info("Starting cleanup of old Celery results")

        # This would clean up old results from Redis
        # For now, we'll just log the action
        cutoff_date = datetime.utcnow() - timedelta(days=7)

        # Clean up old task results (this would be Redis cleanup)
        # celery_app.backend.cleanup()

        logger.info(f"Cleanup completed for results older than {cutoff_date}")

        return {
            'status': 'completed',
            'cutoff_date': cutoff_date.isoformat(),
            'message': 'Old results cleanup completed'
        }

    except Exception as e:
        logger.error(f"Cleanup task failed: {e}")
        raise

@celery_app.task(name='tasks.maintenance_tasks.health_check')
def health_check():
    """
    Health check task to verify system components
    """
    try:
        db_session = get_db_session()

        # Check database connectivity
        try:
            db_session.execute("SELECT 1")
            db_status = "healthy"
        except Exception as e:
            db_status = f"unhealthy: {e}"
        finally:
            db_session.close()

        # Check Redis connectivity
        try:
            celery_app.backend.ping()
            redis_status = "healthy"
        except Exception as e:
            redis_status = f"unhealthy: {e}"

        result = {
            'timestamp': datetime.utcnow().isoformat(),
            'database': db_status,
            'redis': redis_status,
            'celery': 'healthy'
        }

        logger.info(f"Health check completed: {result}")
        return result

    except Exception as e:
        logger.error(f"Health check failed: {e}")
        raise

@celery_app.task(bind=True, name='tasks.maintenance_tasks.database_cleanup_comprehensive')
def database_cleanup_comprehensive(self, days_to_keep: int = 30, cleanup_failed_jobs: bool = True,
                                 cleanup_logs: bool = True) -> Dict[str, Any]:
    """
    Comprehensive database cleanup task

    Args:
        days_to_keep: Number of days to keep old records
        cleanup_failed_jobs: Whether to clean up failed job records
        cleanup_logs: Whether to clean up old log entries

    Returns:
        Dictionary containing cleanup results
    """
    try:
        from datetime import datetime, timedelta
        from typing import Dict, Any

        db_session = get_db_session()

        # Update task progress
        self.update_state(
            state='PROGRESS',
            meta={
                'stage': 'database_cleanup',
                'progress': 0,
                'message': 'Starting comprehensive database cleanup'
            }
        )

        logger.info(f"Starting comprehensive database cleanup - keeping {days_to_keep} days")

        cutoff_date = datetime.utcnow() - timedelta(days=days_to_keep)
        cleanup_results = {
            'cutoff_date': cutoff_date.isoformat(),
            'cleaned_tables': {},
            'total_records_cleaned': 0
        }

        try:
            # Import models here to avoid circular imports
            import sys
            import os
            sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
            from main import BulkProcessingJob

            # Clean up old bulk processing jobs
            self.update_state(
                state='PROGRESS',
                meta={
                    'stage': 'database_cleanup',
                    'progress': 20,
                    'message': 'Cleaning up old bulk processing jobs'
                }
            )

            old_jobs_query = db_session.query(BulkProcessingJob).filter(
                BulkProcessingJob.created_at < cutoff_date
            )

            if cleanup_failed_jobs:
                # Clean all old jobs
                old_jobs_count = old_jobs_query.count()
                old_jobs_query.delete()
            else:
                # Only clean completed jobs
                old_jobs_query = old_jobs_query.filter(
                    BulkProcessingJob.status.in_(['completed', 'cancelled'])
                )
                old_jobs_count = old_jobs_query.count()
                old_jobs_query.delete()

            cleanup_results['cleaned_tables']['bulk_processing_jobs'] = old_jobs_count
            cleanup_results['total_records_cleaned'] += old_jobs_count

            # Clean up old Celery task results from Redis
            self.update_state(
                state='PROGRESS',
                meta={
                    'stage': 'database_cleanup',
                    'progress': 40,
                    'message': 'Cleaning up old Celery task results'
                }
            )

            try:
                # This would clean up old results from Redis backend
                # For now, we'll simulate this
                celery_cleanup_count = 0
                # celery_app.backend.cleanup()
                cleanup_results['cleaned_tables']['celery_results'] = celery_cleanup_count
            except Exception as e:
                logger.warning(f"Could not clean Celery results: {e}")
                cleanup_results['cleaned_tables']['celery_results'] = 0

            # Clean up old temporary data (if any)
            self.update_state(
                state='PROGRESS',
                meta={
                    'stage': 'database_cleanup',
                    'progress': 60,
                    'message': 'Cleaning up temporary data'
                }
            )

            # Add any custom temporary table cleanup here
            # Example: Clean up old session data, temporary files, etc.
            temp_cleanup_count = 0
            cleanup_results['cleaned_tables']['temporary_data'] = temp_cleanup_count

            # Vacuum/optimize database (PostgreSQL)
            self.update_state(
                state='PROGRESS',
                meta={
                    'stage': 'database_cleanup',
                    'progress': 80,
                    'message': 'Optimizing database'
                }
            )

            try:
                # Run VACUUM on PostgreSQL to reclaim space
                db_session.execute("VACUUM;")
                cleanup_results['database_optimized'] = True
            except Exception as e:
                logger.warning(f"Could not vacuum database: {e}")
                cleanup_results['database_optimized'] = False

            # Commit all changes
            db_session.commit()

            # Update final progress
            self.update_state(
                state='SUCCESS',
                meta={
                    'stage': 'completed',
                    'progress': 100,
                    'message': f'Database cleanup completed - removed {cleanup_results["total_records_cleaned"]} records',
                    'results': cleanup_results
                }
            )

            logger.info(f"Database cleanup completed: {cleanup_results}")
            return cleanup_results

        finally:
            db_session.close()

    except Exception as e:
        logger.error(f"Database cleanup failed: {e}")
        self.update_state(
            state='FAILURE',
            meta={
                'stage': 'error',
                'progress': 0,
                'message': f'Cleanup failed: {str(e)}',
                'error': str(e)
            }
        )
        raise

@celery_app.task(bind=True, name='tasks.maintenance_tasks.health_check_detailed')
def health_check_detailed(self) -> Dict[str, Any]:
    """
    Detailed health check task for all system components

    Returns:
        Dictionary containing detailed health status
    """
    try:
        from datetime import datetime
        import psutil
        import redis

        # Update task progress
        self.update_state(
            state='PROGRESS',
            meta={
                'stage': 'health_check',
                'progress': 0,
                'message': 'Starting detailed health check'
            }
        )

        logger.info("Starting detailed health check")

        health_status = {
            'timestamp': datetime.utcnow().isoformat(),
            'overall_status': 'healthy',
            'components': {}
        }

        # Check database connectivity and performance
        self.update_state(
            state='PROGRESS',
            meta={
                'stage': 'health_check',
                'progress': 20,
                'message': 'Checking database health'
            }
        )

        db_session = get_db_session()
        try:
            start_time = datetime.utcnow()
            db_session.execute("SELECT 1")
            db_response_time = (datetime.utcnow() - start_time).total_seconds()

            # Check database size and connections
            db_size_result = db_session.execute("SELECT pg_size_pretty(pg_database_size(current_database()));").fetchone()
            db_connections_result = db_session.execute("SELECT count(*) FROM pg_stat_activity;").fetchone()

            health_status['components']['database'] = {
                'status': 'healthy',
                'response_time_seconds': db_response_time,
                'database_size': db_size_result[0] if db_size_result else 'unknown',
                'active_connections': db_connections_result[0] if db_connections_result else 0,
                'details': 'Database responsive and accessible'
            }
        except Exception as e:
            health_status['components']['database'] = {
                'status': 'unhealthy',
                'error': str(e),
                'details': 'Database connection failed'
            }
            health_status['overall_status'] = 'degraded'
        finally:
            db_session.close()

        # Check Redis connectivity and performance
        self.update_state(
            state='PROGRESS',
            meta={
                'stage': 'health_check',
                'progress': 40,
                'message': 'Checking Redis health'
            }
        )

        try:
            start_time = datetime.utcnow()
            celery_app.backend.ping()
            redis_response_time = (datetime.utcnow() - start_time).total_seconds()

            # Get Redis info
            redis_client = redis.Redis.from_url(celery_app.conf.broker_url)
            redis_info = redis_client.info()

            health_status['components']['redis'] = {
                'status': 'healthy',
                'response_time_seconds': redis_response_time,
                'memory_usage_mb': redis_info.get('used_memory', 0) / (1024 * 1024),
                'connected_clients': redis_info.get('connected_clients', 0),
                'uptime_seconds': redis_info.get('uptime_in_seconds', 0),
                'details': 'Redis responsive and accessible'
            }
        except Exception as e:
            health_status['components']['redis'] = {
                'status': 'unhealthy',
                'error': str(e),
                'details': 'Redis connection failed'
            }
            health_status['overall_status'] = 'degraded'

        # Check system resources
        self.update_state(
            state='PROGRESS',
            meta={
                'stage': 'health_check',
                'progress': 60,
                'message': 'Checking system resources'
            }
        )

        try:
            cpu_percent = psutil.cpu_percent(interval=1)
            memory = psutil.virtual_memory()
            disk = psutil.disk_usage('/')

            health_status['components']['system'] = {
                'status': 'healthy',
                'cpu_percent': cpu_percent,
                'memory_percent': memory.percent,
                'memory_available_gb': memory.available / (1024**3),
                'disk_percent': disk.percent,
                'disk_free_gb': disk.free / (1024**3),
                'details': 'System resources within normal ranges'
            }

            # Mark as degraded if resources are high
            if cpu_percent > 80 or memory.percent > 85 or disk.percent > 90:
                health_status['components']['system']['status'] = 'degraded'
                health_status['overall_status'] = 'degraded'
                health_status['components']['system']['details'] = 'High resource usage detected'

        except Exception as e:
            health_status['components']['system'] = {
                'status': 'unknown',
                'error': str(e),
                'details': 'Could not check system resources'
            }

        # Check Celery worker status
        self.update_state(
            state='PROGRESS',
            meta={
                'stage': 'health_check',
                'progress': 80,
                'message': 'Checking Celery workers'
            }
        )

        try:
            inspect = celery_app.control.inspect()
            active_workers = inspect.active()
            stats = inspect.stats()

            health_status['components']['celery'] = {
                'status': 'healthy',
                'active_workers': len(active_workers) if active_workers else 0,
                'worker_stats': stats,
                'details': 'Celery workers responding'
            }

            if not active_workers:
                health_status['components']['celery']['status'] = 'degraded'
                health_status['components']['celery']['details'] = 'No active workers found'
                health_status['overall_status'] = 'degraded'

        except Exception as e:
            health_status['components']['celery'] = {
                'status': 'unknown',
                'error': str(e),
                'details': 'Could not check Celery workers'
            }

        # Update final progress
        self.update_state(
            state='SUCCESS',
            meta={
                'stage': 'completed',
                'progress': 100,
                'message': f'Health check completed - overall status: {health_status["overall_status"]}',
                'results': health_status
            }
        )

        logger.info(f"Detailed health check completed: {health_status['overall_status']}")
        return health_status

    except Exception as e:
        logger.error(f"Detailed health check failed: {e}")
        self.update_state(
            state='FAILURE',
            meta={
                'stage': 'error',
                'progress': 0,
                'message': f'Health check failed: {str(e)}',
                'error': str(e)
            }
        )
        raise