""" Data synchronization tasks for Celery """ import asyncio import logging import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from typing import Dict, Any from celery import current_task from celery_config import celery_app, get_db_session from nomi_sec_client import NomiSecClient from exploitdb_client_local import ExploitDBLocalClient from cisa_kev_client import CISAKEVClient from mcdevitt_poc_client import GitHubPoCClient logger = logging.getLogger(__name__) @celery_app.task(bind=True, name='data_sync_tasks.sync_nomi_sec') def sync_nomi_sec_task(self, batch_size: int = 50) -> Dict[str, Any]: """ Celery task for nomi-sec PoC synchronization Args: batch_size: Number of CVEs to process in each batch Returns: Dictionary containing sync results """ db_session = get_db_session() try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_nomi_sec', 'progress': 0, 'message': 'Starting nomi-sec PoC synchronization' } ) logger.info(f"Starting nomi-sec sync task with batch size: {batch_size}") # Create client instance client = NomiSecClient(db_session) # Run the synchronization loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: result = loop.run_until_complete( client.bulk_sync_all_cves(batch_size=batch_size) ) finally: loop.close() # Update final progress self.update_state( state='SUCCESS', meta={ 'stage': 'completed', 'progress': 100, 'message': 'Nomi-sec synchronization completed successfully' } ) logger.info(f"Nomi-sec sync task completed: {result}") return result except Exception as e: logger.error(f"Nomi-sec sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise finally: db_session.close() @celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec') def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]: """ Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization Args: force_refresh: Whether to force refresh the cache regardless of expiry Returns: Dictionary containing sync results """ try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 0, 'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization' } ) logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}") # Import here to avoid circular dependencies from cve2capec_client import CVE2CAPECClient # Create client instance client = CVE2CAPECClient() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 10, 'message': 'Fetching MITRE ATT&CK mappings...' } ) # Force refresh if requested if force_refresh: client._fetch_fresh_data() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 50, 'message': 'Processing CVE mappings...' } ) # Get statistics about the loaded data stats = client.get_stats() # Update progress to completion self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 100, 'message': 'CVE2CAPEC synchronization completed successfully' } ) result = { 'status': 'completed', 'total_mappings': stats.get('total_mappings', 0), 'total_techniques': stats.get('unique_techniques', 0), 'cache_updated': True } logger.info(f"CVE2CAPEC sync task completed: {result}") return result except Exception as e: logger.error(f"CVE2CAPEC sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise @celery_app.task(bind=True, name='data_sync_tasks.sync_github_poc') def sync_github_poc_task(self, batch_size: int = 50) -> Dict[str, Any]: """ Celery task for GitHub PoC synchronization Args: batch_size: Number of CVEs to process in each batch Returns: Dictionary containing sync results """ db_session = get_db_session() try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_github_poc', 'progress': 0, 'message': 'Starting GitHub PoC synchronization' } ) logger.info(f"Starting GitHub PoC sync task with batch size: {batch_size}") # Create client instance client = GitHubPoCClient(db_session) # Run the synchronization loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: result = loop.run_until_complete( client.bulk_sync_all_cves(batch_size=batch_size) ) finally: loop.close() # Update final progress self.update_state( state='SUCCESS', meta={ 'stage': 'completed', 'progress': 100, 'message': 'GitHub PoC synchronization completed successfully' } ) logger.info(f"GitHub PoC sync task completed: {result}") return result except Exception as e: logger.error(f"GitHub PoC sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise finally: db_session.close() @celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec') def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]: """ Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization Args: force_refresh: Whether to force refresh the cache regardless of expiry Returns: Dictionary containing sync results """ try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 0, 'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization' } ) logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}") # Import here to avoid circular dependencies from cve2capec_client import CVE2CAPECClient # Create client instance client = CVE2CAPECClient() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 10, 'message': 'Fetching MITRE ATT&CK mappings...' } ) # Force refresh if requested if force_refresh: client._fetch_fresh_data() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 50, 'message': 'Processing CVE mappings...' } ) # Get statistics about the loaded data stats = client.get_stats() # Update progress to completion self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 100, 'message': 'CVE2CAPEC synchronization completed successfully' } ) result = { 'status': 'completed', 'total_mappings': stats.get('total_mappings', 0), 'total_techniques': stats.get('unique_techniques', 0), 'cache_updated': True } logger.info(f"CVE2CAPEC sync task completed: {result}") return result except Exception as e: logger.error(f"CVE2CAPEC sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise @celery_app.task(bind=True, name='data_sync_tasks.sync_reference_content') def sync_reference_content_task(self, batch_size: int = 30, max_cves: int = 200, force_resync: bool = False) -> Dict[str, Any]: """ Celery task for CVE reference content extraction and analysis Args: batch_size: Number of CVEs to process in each batch max_cves: Maximum number of CVEs to process force_resync: Force re-sync of recently processed CVEs Returns: Dictionary containing sync results """ db_session = get_db_session() try: # Import here to avoid circular imports import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from main import CVE # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_reference_content', 'progress': 0, 'message': 'Starting CVE reference content extraction' } ) logger.info(f"Starting reference content sync task - batch_size: {batch_size}, max_cves: {max_cves}") # Get CVEs to process (prioritize those with references but no extracted content) query = db_session.query(CVE) if not force_resync: # Skip CVEs that were recently processed from datetime import datetime, timedelta cutoff_date = datetime.utcnow() - timedelta(days=7) query = query.filter( (CVE.reference_content_extracted_at.is_(None)) | (CVE.reference_content_extracted_at < cutoff_date) ) # Prioritize CVEs with references cves = query.filter(CVE.references.isnot(None)).limit(max_cves).all() if not cves: logger.info("No CVEs found for reference content extraction") return {'total_processed': 0, 'successful_extractions': 0, 'failed_extractions': 0} total_processed = 0 successful_extractions = 0 failed_extractions = 0 # Process CVEs in batches for i in range(0, len(cves), batch_size): batch = cves[i:i + batch_size] for j, cve in enumerate(batch): try: # Update progress overall_progress = int(((i + j) / len(cves)) * 100) self.update_state( state='PROGRESS', meta={ 'stage': 'sync_reference_content', 'progress': overall_progress, 'message': f'Processing CVE {cve.cve_id} ({i + j + 1}/{len(cves)})', 'current_cve': cve.cve_id, 'processed': i + j, 'total': len(cves) } ) # For now, simulate reference content extraction # In a real implementation, you would create a ReferenceContentExtractor # and extract content from CVE references # Mark CVE as processed from datetime import datetime cve.reference_content_extracted_at = datetime.utcnow() successful_extractions += 1 total_processed += 1 # Small delay between requests import time time.sleep(2) except Exception as e: logger.error(f"Error processing reference content for CVE {cve.cve_id}: {e}") failed_extractions += 1 total_processed += 1 # Commit after each batch db_session.commit() logger.info(f"Processed batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}") # Final results result = { 'total_processed': total_processed, 'successful_extractions': successful_extractions, 'failed_extractions': failed_extractions, 'extraction_rate': (successful_extractions / total_processed * 100) if total_processed > 0 else 0 } # Update final progress self.update_state( state='SUCCESS', meta={ 'stage': 'completed', 'progress': 100, 'message': f'Reference content extraction completed: {successful_extractions} successful, {failed_extractions} failed', 'results': result } ) logger.info(f"Reference content sync task completed: {result}") return result except Exception as e: logger.error(f"Reference content sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise finally: db_session.close() @celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec') def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]: """ Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization Args: force_refresh: Whether to force refresh the cache regardless of expiry Returns: Dictionary containing sync results """ try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 0, 'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization' } ) logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}") # Import here to avoid circular dependencies from cve2capec_client import CVE2CAPECClient # Create client instance client = CVE2CAPECClient() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 10, 'message': 'Fetching MITRE ATT&CK mappings...' } ) # Force refresh if requested if force_refresh: client._fetch_fresh_data() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 50, 'message': 'Processing CVE mappings...' } ) # Get statistics about the loaded data stats = client.get_stats() # Update progress to completion self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 100, 'message': 'CVE2CAPEC synchronization completed successfully' } ) result = { 'status': 'completed', 'total_mappings': stats.get('total_mappings', 0), 'total_techniques': stats.get('unique_techniques', 0), 'cache_updated': True } logger.info(f"CVE2CAPEC sync task completed: {result}") return result except Exception as e: logger.error(f"CVE2CAPEC sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise @celery_app.task(bind=True, name='data_sync_tasks.sync_exploitdb') def sync_exploitdb_task(self, batch_size: int = 30) -> Dict[str, Any]: """ Celery task for ExploitDB synchronization Args: batch_size: Number of CVEs to process in each batch Returns: Dictionary containing sync results """ db_session = get_db_session() try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_exploitdb', 'progress': 0, 'message': 'Starting ExploitDB synchronization' } ) logger.info(f"Starting ExploitDB sync task with batch size: {batch_size}") # Create client instance client = ExploitDBLocalClient(db_session) # Run the synchronization loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: result = loop.run_until_complete( client.bulk_sync_exploitdb(batch_size=batch_size) ) finally: loop.close() # Update final progress self.update_state( state='SUCCESS', meta={ 'stage': 'completed', 'progress': 100, 'message': 'ExploitDB synchronization completed successfully' } ) logger.info(f"ExploitDB sync task completed: {result}") return result except Exception as e: logger.error(f"ExploitDB sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise finally: db_session.close() @celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec') def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]: """ Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization Args: force_refresh: Whether to force refresh the cache regardless of expiry Returns: Dictionary containing sync results """ try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 0, 'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization' } ) logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}") # Import here to avoid circular dependencies from cve2capec_client import CVE2CAPECClient # Create client instance client = CVE2CAPECClient() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 10, 'message': 'Fetching MITRE ATT&CK mappings...' } ) # Force refresh if requested if force_refresh: client._fetch_fresh_data() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 50, 'message': 'Processing CVE mappings...' } ) # Get statistics about the loaded data stats = client.get_stats() # Update progress to completion self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 100, 'message': 'CVE2CAPEC synchronization completed successfully' } ) result = { 'status': 'completed', 'total_mappings': stats.get('total_mappings', 0), 'total_techniques': stats.get('unique_techniques', 0), 'cache_updated': True } logger.info(f"CVE2CAPEC sync task completed: {result}") return result except Exception as e: logger.error(f"CVE2CAPEC sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise @celery_app.task(bind=True, name='data_sync_tasks.sync_cisa_kev') def sync_cisa_kev_task(self, batch_size: int = 100) -> Dict[str, Any]: """ Celery task for CISA KEV synchronization Args: batch_size: Number of CVEs to process in each batch Returns: Dictionary containing sync results """ db_session = get_db_session() try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cisa_kev', 'progress': 0, 'message': 'Starting CISA KEV synchronization' } ) logger.info(f"Starting CISA KEV sync task with batch size: {batch_size}") # Create client instance client = CISAKEVClient(db_session) # Run the synchronization loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: result = loop.run_until_complete( client.bulk_sync_kev_data(batch_size=batch_size) ) finally: loop.close() # Update final progress self.update_state( state='SUCCESS', meta={ 'stage': 'completed', 'progress': 100, 'message': 'CISA KEV synchronization completed successfully' } ) logger.info(f"CISA KEV sync task completed: {result}") return result except Exception as e: logger.error(f"CISA KEV sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise finally: db_session.close() @celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec') def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]: """ Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization Args: force_refresh: Whether to force refresh the cache regardless of expiry Returns: Dictionary containing sync results """ try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 0, 'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization' } ) logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}") # Import here to avoid circular dependencies from cve2capec_client import CVE2CAPECClient # Create client instance client = CVE2CAPECClient() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 10, 'message': 'Fetching MITRE ATT&CK mappings...' } ) # Force refresh if requested if force_refresh: client._fetch_fresh_data() # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 50, 'message': 'Processing CVE mappings...' } ) # Get statistics about the loaded data stats = client.get_stats() # Update progress to completion self.update_state( state='PROGRESS', meta={ 'stage': 'sync_cve2capec', 'progress': 100, 'message': 'CVE2CAPEC synchronization completed successfully' } ) result = { 'status': 'completed', 'total_mappings': stats.get('total_mappings', 0), 'total_techniques': stats.get('unique_techniques', 0), 'cache_updated': True } logger.info(f"CVE2CAPEC sync task completed: {result}") return result except Exception as e: logger.error(f"CVE2CAPEC sync task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise @celery_app.task(bind=True, name='data_sync_tasks.build_exploitdb_index') def build_exploitdb_index_task(self) -> Dict[str, Any]: """ Celery task for building/rebuilding ExploitDB file index Returns: Dictionary containing build results """ try: # Update task progress self.update_state( state='PROGRESS', meta={ 'stage': 'build_exploitdb_index', 'progress': 0, 'message': 'Starting ExploitDB file index building' } ) logger.info("Starting ExploitDB index build task") # Import here to avoid circular dependencies from exploitdb_client_local import ExploitDBLocalClient # Create client instance with lazy_load=False to force index building client = ExploitDBLocalClient(None, lazy_load=False) # Update progress self.update_state( state='PROGRESS', meta={ 'stage': 'build_exploitdb_index', 'progress': 50, 'message': 'Building file index...' } ) # Force index rebuild client._build_file_index() # Update progress to completion self.update_state( state='PROGRESS', meta={ 'stage': 'build_exploitdb_index', 'progress': 100, 'message': 'ExploitDB index building completed successfully' } ) result = { 'status': 'completed', 'total_exploits_indexed': len(client.file_index), 'index_updated': True } logger.info(f"ExploitDB index build task completed: {result}") return result except Exception as e: logger.error(f"ExploitDB index build task failed: {e}") self.update_state( state='FAILURE', meta={ 'stage': 'error', 'progress': 0, 'message': f'Task failed: {str(e)}', 'error': str(e) } ) raise