add kev support, exploitDB mirror support

2025-07-10 16:19:43 -05:00 · 2025-07-10 16:19:43 -05:00 · 696a1a3462
commit 696a1a3462
parent 20b3a63c78
8 changed files with 1945 additions and 69 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,6 @@
 [submodule "github_poc_collector"]
 	path = github_poc_collector
 	url = https://git.mcdevitt.tech/bpmcdevitt/github_poc_collector
 [submodule "exploit-db-mirror"]
 	path = exploit-db-mirror
 	url = https://git.mcdevitt.tech/bpmcdevitt/exploit-db-mirror.git
--- a/backend/bulk_seeder.py
+++ b/backend/bulk_seeder.py
@ -10,6 +10,8 @@ from typing import Optional
 from sqlalchemy.orm import Session
 from nvd_bulk_processor import NVDBulkProcessor
 from nomi_sec_client import NomiSecClient
 from exploitdb_client_local import ExploitDBLocalClient
 from cisa_kev_client import CISAKEVClient
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@ -22,11 +24,15 @@ class BulkSeeder:
        self.db_session = db_session
        self.nvd_processor = NVDBulkProcessor(db_session)
        self.nomi_sec_client = NomiSecClient(db_session)
        self.exploitdb_client = ExploitDBLocalClient(db_session)
        self.cisa_kev_client = CISAKEVClient(db_session)
    async def full_bulk_seed(self, start_year: int = 2002, 
                           end_year: Optional[int] = None,
                           skip_nvd: bool = False,
-                           skip_nomi_sec: bool = False) -> dict:
+                           skip_nomi_sec: bool = False,
                           skip_exploitdb: bool = False,
                           skip_cisa_kev: bool = False) -> dict:
        """
        Perform complete bulk seeding operation
@ -35,6 +41,8 @@ class BulkSeeder:
            end_year: Ending year for NVD data (default: current year)
            skip_nvd: Skip NVD bulk processing (default: False)
            skip_nomi_sec: Skip nomi-sec PoC synchronization (default: False)
            skip_exploitdb: Skip ExploitDB synchronization (default: False)
            skip_cisa_kev: Skip CISA KEV synchronization (default: False)
        Returns:
            Dictionary containing operation results
@ -46,6 +54,9 @@ class BulkSeeder:
            'start_time': datetime.utcnow(),
            'nvd_results': None,
            'nomi_sec_results': None,
            'exploitdb_results': None,
            'cisa_kev_results': None,
            'sigma_results': None,
            'total_time': None,
            'status': 'running'
        }
@ -76,11 +87,33 @@ class BulkSeeder:
            else:
                logger.info("Phase 2: Skipping nomi-sec PoC synchronization")
-            # Phase 3: Generate Enhanced SIGMA Rules
+            # Phase 3: ExploitDB Synchronization
-            logger.info("Phase 3: Generating enhanced SIGMA rules...")
+            if not skip_exploitdb:
                logger.info("Phase 3: Starting ExploitDB synchronization...")
                exploitdb_results = await self.exploitdb_client.bulk_sync_exploitdb(
                    batch_size=30  # Smaller batches for git API stability
                )
                results['exploitdb_results'] = exploitdb_results
                logger.info(f"Phase 3 complete: {exploitdb_results['total_exploits_found']} exploits found")
            else:
                logger.info("Phase 3: Skipping ExploitDB synchronization")
            # Phase 4: CISA KEV Synchronization
            if not skip_cisa_kev:
                logger.info("Phase 4: Starting CISA KEV synchronization...")
                cisa_kev_results = await self.cisa_kev_client.bulk_sync_kev_data(
                    batch_size=100  # Can handle larger batches since data is already filtered
                )
                results['cisa_kev_results'] = cisa_kev_results
                logger.info(f"Phase 4 complete: {cisa_kev_results['total_kev_found']} KEV entries found")
            else:
                logger.info("Phase 4: Skipping CISA KEV synchronization")
            # Phase 5: Generate Enhanced SIGMA Rules
            logger.info("Phase 5: Generating enhanced SIGMA rules...")
            sigma_results = await self.generate_enhanced_sigma_rules()
            results['sigma_results'] = sigma_results
-            logger.info(f"Phase 3 complete: {sigma_results['rules_generated']} rules generated")
+            logger.info(f"Phase 5 complete: {sigma_results['rules_generated']} rules generated")
            results['status'] = 'completed'
            results['end_time'] = datetime.utcnow()
@ -107,6 +140,8 @@ class BulkSeeder:
            'start_time': datetime.utcnow(),
            'nvd_update': None,
            'nomi_sec_update': None,
            'exploitdb_update': None,
            'cisa_kev_update': None,
            'status': 'running'
        }
@ -125,6 +160,16 @@ class BulkSeeder:
                recent_cves = await self._get_recently_modified_cves()
                nomi_sec_update = await self._sync_specific_cves(recent_cves)
                results['nomi_sec_update'] = nomi_sec_update
                # Update ExploitDB data for modified CVEs
                logger.info("Updating ExploitDB data for modified CVEs...")
                exploitdb_update = await self._sync_specific_cves_exploitdb(recent_cves)
                results['exploitdb_update'] = exploitdb_update
                # Update CISA KEV data for modified CVEs
                logger.info("Updating CISA KEV data for modified CVEs...")
                cisa_kev_update = await self._sync_specific_cves_cisa_kev(recent_cves)
                results['cisa_kev_update'] = cisa_kev_update
            results['status'] = 'completed'
            results['end_time'] = datetime.utcnow()
@ -221,6 +266,53 @@ class BulkSeeder:
            'total_pocs_found': total_pocs_found
        }
    async def _sync_specific_cves_exploitdb(self, cve_ids: list) -> dict:
        """Sync ExploitDB data for specific CVEs"""
        total_processed = 0
        total_exploits_found = 0
        for cve_id in cve_ids:
            try:
                result = await self.exploitdb_client.sync_cve_exploits(cve_id)
                total_processed += 1
                total_exploits_found += result.get('exploits_found', 0)
                # Small delay to avoid overwhelming the API
                await asyncio.sleep(0.5)
            except Exception as e:
                logger.error(f"Error syncing ExploitDB for {cve_id}: {e}")
                continue
        return {
            'total_processed': total_processed,
            'total_exploits_found': total_exploits_found
        }
    async def _sync_specific_cves_cisa_kev(self, cve_ids: list) -> dict:
        """Sync CISA KEV data for specific CVEs"""
        total_processed = 0
        total_kev_found = 0
        for cve_id in cve_ids:
            try:
                result = await self.cisa_kev_client.sync_cve_kev_data(cve_id)
                total_processed += 1
                if result.get('kev_found', False):
                    total_kev_found += 1
                # Small delay to be respectful to CISA
                await asyncio.sleep(0.2)
            except Exception as e:
                logger.error(f"Error syncing CISA KEV for {cve_id}: {e}")
                continue
        return {
            'total_processed': total_processed,
            'total_kev_found': total_kev_found
        }
    async def get_seeding_status(self) -> dict:
        """Get current seeding status and statistics"""
        from main import CVE, SigmaRule, BulkProcessingJob
@ -270,7 +362,9 @@ class BulkSeeder:
            },
            'recent_jobs': job_status,
            'nvd_data_status': await self._get_nvd_data_status(),
-            'nomi_sec_status': await self.nomi_sec_client.get_sync_status()
+            'nomi_sec_status': await self.nomi_sec_client.get_sync_status(),
            'exploitdb_status': await self.exploitdb_client.get_exploitdb_sync_status(),
            'cisa_kev_status': await self.cisa_kev_client.get_kev_sync_status()
        }
    async def _get_nvd_data_status(self) -> dict:
--- a/backend/cisa_kev_client.py
+++ b/backend/cisa_kev_client.py
@ -0,0 +1,516 @@
 """
 CISA Known Exploited Vulnerabilities (KEV) Integration Client
 Interfaces with the CISA KEV catalog at https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json
 """
 import aiohttp
 import asyncio
 import json
 import logging
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional, Tuple
 from sqlalchemy.orm import Session
 from sqlalchemy import text
 import re
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class CISAKEVClient:
    """Client for interfacing with CISA Known Exploited Vulnerabilities catalog"""
    def __init__(self, db_session: Session):
        self.db_session = db_session
        # CISA KEV URLs
        self.kev_catalog_url = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json"
        self.kev_schema_url = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities_schema.json"
        # Cache for KEV data
        self.kev_cache = None
        self.cache_timestamp = None
        self.cache_ttl = 3600  # 1 hour cache
        # Rate limiting
        self.rate_limit_delay = 1.0  # 1 second between requests
        self.last_request_time = 0
    async def _make_request(self, session: aiohttp.ClientSession, url: str) -> Optional[dict]:
        """Make a rate-limited request to CISA"""
        try:
            # Rate limiting
            current_time = asyncio.get_event_loop().time()
            time_since_last = current_time - self.last_request_time
            if time_since_last < self.rate_limit_delay:
                await asyncio.sleep(self.rate_limit_delay - time_since_last)
            async with session.get(url, timeout=30) as response:
                self.last_request_time = asyncio.get_event_loop().time()
                if response.status == 200:
                    return await response.json()
                else:
                    logger.warning(f"CISA KEV request failed: {response.status} for {url}")
                    return None
        except Exception as e:
            logger.error(f"Error making request to {url}: {e}")
            return None
    async def fetch_kev_catalog(self, force_refresh: bool = False) -> Optional[dict]:
        """Fetch the CISA KEV catalog with caching"""
        # Check cache first
        if not force_refresh and self.kev_cache and self.cache_timestamp:
            cache_age = datetime.now().timestamp() - self.cache_timestamp
            if cache_age < self.cache_ttl:
                logger.info("Using cached CISA KEV data")
                return self.kev_cache
        logger.info("Fetching fresh CISA KEV catalog...")
        async with aiohttp.ClientSession() as session:
            data = await self._make_request(session, self.kev_catalog_url)
            if data:
                self.kev_cache = data
                self.cache_timestamp = datetime.now().timestamp()
                logger.info(f"Fetched CISA KEV catalog with {data.get('count', 0)} vulnerabilities")
                return data
            else:
                logger.error("Failed to fetch CISA KEV catalog")
                return None
    def analyze_kev_vulnerability(self, vuln: dict) -> dict:
        """Analyze a CISA KEV vulnerability entry to extract threat intelligence"""
        analysis = {
            'cve_id': vuln.get('cveID'),
            'vendor_project': vuln.get('vendorProject'),
            'product': vuln.get('product'),
            'vulnerability_name': vuln.get('vulnerabilityName'),
            'date_added': vuln.get('dateAdded'),
            'short_description': vuln.get('shortDescription'),
            'required_action': vuln.get('requiredAction'),
            'due_date': vuln.get('dueDate'),
            'known_ransomware_use': vuln.get('knownRansomwareCampaignUse', 'Unknown'),
            'notes': vuln.get('notes', ''),
            'cwes': vuln.get('cwes', [])
        }
        # Calculate threat priority score based on various factors
        threat_score = self._calculate_threat_score(vuln)
        analysis['threat_score'] = threat_score
        analysis['threat_level'] = self._get_threat_level(threat_score)
        # Extract indicators from descriptions and required actions
        indicators = self._extract_kev_indicators(vuln)
        analysis['indicators'] = indicators
        # Categorize the vulnerability type
        analysis['vulnerability_category'] = self._categorize_vulnerability(vuln)
        return analysis
    def _calculate_threat_score(self, vuln: dict) -> int:
        """Calculate threat priority score (0-100) based on KEV data"""
        score = 0
        # Base score for being in CISA KEV
        score += 50  # High base score since it's actively exploited
        # Ransomware usage factor (0-25 points)
        ransomware_use = vuln.get('knownRansomwareCampaignUse', 'Unknown').lower()
        if ransomware_use == 'known':
            score += 25
        elif ransomware_use == 'unknown':
            score += 10  # Still concerning but less certain
        # Recency factor (0-15 points)
        try:
            date_added = datetime.strptime(vuln.get('dateAdded', ''), '%Y-%m-%d')
            days_since_added = (datetime.now() - date_added).days
            if days_since_added <= 30:  # Very recent
                score += 15
            elif days_since_added <= 90:  # Recent
                score += 10
            elif days_since_added <= 365:  # This year
                score += 5
        except:
            pass
        # Urgency based on due date (0-10 points)
        try:
            due_date = datetime.strptime(vuln.get('dueDate', ''), '%Y-%m-%d')
            days_to_due = (due_date - datetime.now()).days
            if days_to_due <= 0:  # Overdue
                score += 10
            elif days_to_due <= 7:  # Due soon
                score += 8
            elif days_to_due <= 30:  # Due this month
                score += 5
        except:
            pass
        return min(score, 100)  # Cap at 100
    def _get_threat_level(self, score: int) -> str:
        """Get threat level based on score"""
        if score >= 85:
            return 'critical'
        elif score >= 70:
            return 'high'
        elif score >= 55:
            return 'medium'
        else:
            return 'low'
    def _extract_kev_indicators(self, vuln: dict) -> dict:
        """Extract threat indicators from CISA KEV vulnerability data"""
        indicators = {
            'attack_vectors': [],
            'affected_products': [],
            'required_actions': [],
            'ransomware_indicators': [],
            'vulnerability_types': [],
            'mitigation_techniques': []
        }
        # Extract from vulnerability name and description
        text_sources = [
            vuln.get('vulnerabilityName', ''),
            vuln.get('shortDescription', ''),
            vuln.get('requiredAction', ''),
            vuln.get('notes', '')
        ]
        full_text = ' '.join(text_sources).lower()
        # Attack vector patterns
        attack_vector_patterns = [
            r'\b(remote code execution|rce)\b',
            r'\b(sql injection|sqli)\b',
            r'\b(cross.?site scripting|xss)\b',
            r'\b(buffer overflow)\b',
            r'\b(privilege escalation)\b',
            r'\b(authentication bypass)\b',
            r'\b(directory traversal|path traversal)\b',
            r'\b(file upload)\b',
            r'\b(command injection)\b',
            r'\b(deserialization)\b',
            r'\b(memory corruption)\b',
            r'\b(use.?after.?free)\b',
            r'\b(heap overflow)\b',
            r'\b(stack overflow)\b'
        ]
        for pattern in attack_vector_patterns:
            matches = re.findall(pattern, full_text, re.IGNORECASE)
            indicators['attack_vectors'].extend(matches)
        # Product and vendor information
        vendor = vuln.get('vendorProject', '')
        product = vuln.get('product', '')
        if vendor and product:
            indicators['affected_products'].append(f"{vendor} {product}")
        # Required action keywords
        required_action = vuln.get('requiredAction', '')
        action_patterns = [
            r'\b(patch|update|upgrade)\b',
            r'\b(disable|remove|uninstall)\b',
            r'\b(configure|reconfigure)\b',
            r'\b(firewall|network segmentation)\b',
            r'\b(monitor|logging)\b',
            r'\b(backup|restore)\b'
        ]
        for pattern in action_patterns:
            matches = re.findall(pattern, required_action, re.IGNORECASE)
            indicators['required_actions'].extend(matches)
        # Ransomware indicators
        if vuln.get('knownRansomwareCampaignUse') == 'Known':
            indicators['ransomware_indicators'].append('known_ransomware_use')
        # Vulnerability type classification
        vuln_type_patterns = [
            r'\b(zero.?day|0.?day)\b',
            r'\b(denial.?of.?service|dos)\b',
            r'\b(information disclosure)\b',
            r'\b(data breach)\b',
            r'\b(backdoor)\b',
            r'\b(trojan)\b',
            r'\b(webshell)\b'
        ]
        for pattern in vuln_type_patterns:
            matches = re.findall(pattern, full_text, re.IGNORECASE)
            indicators['vulnerability_types'].extend(matches)
        # Clean up and deduplicate
        for key in indicators:
            indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 2]))
            indicators[key] = indicators[key][:10]  # Limit to 10 items per category
        return indicators
    def _categorize_vulnerability(self, vuln: dict) -> str:
        """Categorize the vulnerability based on CISA KEV data"""
        vuln_name = vuln.get('vulnerabilityName', '').lower()
        description = vuln.get('shortDescription', '').lower()
        full_text = f"{vuln_name} {description}"
        # Check for specific vulnerability categories
        if re.search(r'\b(remote code execution|rce)\b', full_text):
            return 'remote_code_execution'
        elif re.search(r'\b(sql injection|sqli)\b', full_text):
            return 'injection'
        elif re.search(r'\b(cross.?site scripting|xss)\b', full_text):
            return 'cross_site_scripting'
        elif re.search(r'\b(privilege escalation)\b', full_text):
            return 'privilege_escalation'
        elif re.search(r'\b(authentication bypass)\b', full_text):
            return 'authentication_bypass'
        elif re.search(r'\b(buffer overflow|memory corruption)\b', full_text):
            return 'memory_corruption'
        elif re.search(r'\b(directory traversal|path traversal)\b', full_text):
            return 'path_traversal'
        elif re.search(r'\b(denial.?of.?service|dos)\b', full_text):
            return 'denial_of_service'
        elif re.search(r'\b(information disclosure)\b', full_text):
            return 'information_disclosure'
        else:
            return 'other'
    async def sync_cve_kev_data(self, cve_id: str) -> dict:
        """Synchronize CISA KEV data for a specific CVE"""
        from main import CVE, SigmaRule
        # Get existing CVE
        cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first()
        if not cve:
            logger.warning(f"CVE {cve_id} not found in database")
            return {"error": "CVE not found"}
        # Fetch CISA KEV catalog
        kev_catalog = await self.fetch_kev_catalog()
        if not kev_catalog:
            return {"error": "Failed to fetch CISA KEV catalog"}
        # Search for the CVE in the KEV catalog
        kev_entry = None
        for vuln in kev_catalog.get('vulnerabilities', []):
            if vuln.get('cveID') == cve_id:
                kev_entry = vuln
                break
        if not kev_entry:
            logger.info(f"CVE {cve_id} not found in CISA KEV catalog")
            return {"cve_id": cve_id, "kev_found": False}
        # Analyze the KEV entry
        kev_analysis = self.analyze_kev_vulnerability(kev_entry)
        # Update CVE with CISA KEV data
        if not cve.poc_data:
            cve.poc_data = {}
        cve.poc_data['cisa_kev'] = {
            'vulnerability_data': kev_analysis,
            'catalog_version': kev_catalog.get('catalogVersion'),
            'date_released': kev_catalog.get('dateReleased'),
            'synced_at': datetime.utcnow().isoformat(),
            'source': 'cisa_kev'
        }
        cve.updated_at = datetime.utcnow()
        # Update SIGMA rule with CISA KEV data
        sigma_rule = self.db_session.query(SigmaRule).filter(
            SigmaRule.cve_id == cve_id
        ).first()
        if sigma_rule:
            # Update rule with CISA KEV data
            if not sigma_rule.nomi_sec_data:
                sigma_rule.nomi_sec_data = {}
            sigma_rule.nomi_sec_data['cisa_kev'] = {
                'threat_level': kev_analysis['threat_level'],
                'threat_score': kev_analysis['threat_score'],
                'known_ransomware_use': kev_analysis['known_ransomware_use'],
                'vulnerability_category': kev_analysis['vulnerability_category'],
                'due_date': kev_analysis['due_date'],
                'indicators': kev_analysis['indicators'],
                'source': 'cisa_kev'
            }
            # Update exploit indicators with CISA KEV data
            existing_indicators = json.loads(sigma_rule.exploit_indicators) if sigma_rule.exploit_indicators else {}
            kev_indicators = kev_analysis['indicators']
            for key, values in kev_indicators.items():
                if key not in existing_indicators:
                    existing_indicators[key] = []
                existing_indicators[key].extend(values)
                existing_indicators[key] = list(set(existing_indicators[key]))
            sigma_rule.exploit_indicators = json.dumps(existing_indicators)
            sigma_rule.updated_at = datetime.utcnow()
        self.db_session.commit()
        logger.info(f"Synchronized CISA KEV data for {cve_id} (Threat Level: {kev_analysis['threat_level']})")
        return {
            "cve_id": cve_id,
            "kev_found": True,
            "threat_level": kev_analysis['threat_level'],
            "threat_score": kev_analysis['threat_score'],
            "known_ransomware_use": kev_analysis['known_ransomware_use'],
            "vulnerability_category": kev_analysis['vulnerability_category'],
            "source": "cisa_kev"
        }
    async def bulk_sync_kev_data(self, batch_size: int = 100, cancellation_flag: Optional[callable] = None) -> dict:
        """Synchronize CISA KEV data for all matching CVEs"""
        from main import CVE, BulkProcessingJob
        # Create bulk processing job
        job = BulkProcessingJob(
            job_type='cisa_kev_sync',
            status='running',
            started_at=datetime.utcnow(),
            job_metadata={'batch_size': batch_size}
        )
        self.db_session.add(job)
        self.db_session.commit()
        total_processed = 0
        total_found = 0
        results = []
        try:
            # Fetch CISA KEV catalog first
            kev_catalog = await self.fetch_kev_catalog(force_refresh=True)
            if not kev_catalog:
                raise Exception("Failed to fetch CISA KEV catalog")
            # Extract all CVE IDs from the KEV catalog
            kev_cve_ids = [vuln.get('cveID') for vuln in kev_catalog.get('vulnerabilities', []) if vuln.get('cveID')]
            # Get CVEs that exist in our database and match KEV catalog
            cves = self.db_session.query(CVE).filter(CVE.cve_id.in_(kev_cve_ids)).all()
            job.total_items = len(cves)
            self.db_session.commit()
            logger.info(f"Found {len(cves)} CVEs matching CISA KEV catalog")
            # Process in batches
            for i in range(0, len(cves), batch_size):
                # Check for cancellation
                if cancellation_flag and cancellation_flag():
                    logger.info("CISA KEV sync cancelled by user")
                    job.status = 'cancelled'
                    job.cancelled_at = datetime.utcnow()
                    job.error_message = "Job cancelled by user"
                    break
                batch = cves[i:i + batch_size]
                for cve in batch:
                    # Check for cancellation
                    if cancellation_flag and cancellation_flag():
                        logger.info("CISA KEV sync cancelled by user")
                        job.status = 'cancelled'
                        job.cancelled_at = datetime.utcnow()
                        job.error_message = "Job cancelled by user"
                        break
                    try:
                        result = await self.sync_cve_kev_data(cve.cve_id)
                        total_processed += 1
                        if result.get("kev_found", False):
                            total_found += 1
                            results.append(result)
                        job.processed_items += 1
                        # Small delay to be respectful to CISA
                        await asyncio.sleep(0.1)
                    except Exception as e:
                        logger.error(f"Error syncing CISA KEV for {cve.cve_id}: {e}")
                        job.failed_items += 1
                # Break out of outer loop if cancelled
                if job.status == 'cancelled':
                    break
                # Commit after each batch
                self.db_session.commit()
                logger.info(f"Processed CISA KEV batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}")
            # Update job status
            if job.status != 'cancelled':
                job.status = 'completed'
                job.completed_at = datetime.utcnow()
            job.job_metadata.update({
                'total_processed': total_processed,
                'total_kev_found': total_found,
                'cves_with_kev': len(results),
                'catalog_version': kev_catalog.get('catalogVersion'),
                'source': 'cisa_kev'
            })
        except Exception as e:
            job.status = 'failed'
            job.error_message = str(e)
            job.completed_at = datetime.utcnow()
            logger.error(f"Bulk CISA KEV sync job failed: {e}")
        finally:
            self.db_session.commit()
        return {
            'job_id': str(job.id),
            'status': job.status,
            'total_processed': total_processed,
            'total_kev_found': total_found,
            'cves_with_kev': len(results),
            'source': 'cisa_kev'
        }
    async def get_kev_sync_status(self) -> dict:
        """Get CISA KEV synchronization status"""
        from main import CVE
        # Count CVEs with CISA KEV data
        total_cves = self.db_session.query(CVE).count()
        result = self.db_session.execute(
            text("SELECT COUNT(*) FROM cves WHERE poc_data::text LIKE '%\"cisa_kev\"%'")
        )
        cves_with_kev = result.scalar()
        # Get catalog information if available
        catalog_info = {}
        if self.kev_cache:
            catalog_info = {
                'catalog_version': self.kev_cache.get('catalogVersion'),
                'date_released': self.kev_cache.get('dateReleased'),
                'total_vulnerabilities': self.kev_cache.get('count', 0),
                'cache_age_seconds': int(datetime.now().timestamp() - (self.cache_timestamp or 0))
            }
        return {
            'total_cves': total_cves,
            'cves_with_kev_data': cves_with_kev,
            'kev_coverage': (cves_with_kev / total_cves * 100) if total_cves > 0 else 0,
            'kev_sync_status': 'active' if cves_with_kev > 0 else 'pending',
            'catalog_info': catalog_info,
            'source': 'cisa_kev'
        }
--- a/backend/exploitdb_client_local.py
+++ b/backend/exploitdb_client_local.py
@ -0,0 +1,725 @@
 """
 ExploitDB Local Filesystem Integration Client
 Interfaces with the local ExploitDB submodule at exploit-db-mirror/
 """
 import os
 import re
 import json
 import logging
 from datetime import datetime
 from typing import Dict, List, Optional, Tuple
 from sqlalchemy.orm import Session
 from pathlib import Path
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class ExploitDBLocalClient:
    """Client for interfacing with local ExploitDB mirror filesystem"""
    def __init__(self, db_session: Session):
        self.db_session = db_session
        # Path to the local exploit-db-mirror submodule (in container: /app/exploit-db-mirror)
        self.exploitdb_path = Path("/app/exploit-db-mirror")
        self.exploits_path = self.exploitdb_path / "exploits"
        # ExploitDB URL pattern for mapping
        self.exploit_url_pattern = re.compile(r'https?://(?:www\.)?exploit-db\.com/exploits/(\d+)')
        # Cache for file searches
        self.file_cache = {}
        # Build file index on initialization
        self._build_file_index()
    def _build_file_index(self):
        """Build an index of exploit ID to file path for fast lookups"""
        logger.info("Building ExploitDB file index...")
        self.file_index = {}
        if not self.exploits_path.exists():
            logger.error(f"ExploitDB path not found: {self.exploits_path}")
            return
        # Walk through all exploit files
        for root, dirs, files in os.walk(self.exploits_path):
            for file in files:
                # Extract exploit ID from filename (e.g., "12345.py" -> "12345")
                match = re.match(r'^(\d+)\.(\w+)$', file)
                if match:
                    exploit_id = match.group(1)
                    file_extension = match.group(2)
                    file_path = Path(root) / file
                    # Store in index
                    self.file_index[exploit_id] = {
                        'path': file_path,
                        'filename': file,
                        'extension': file_extension,
                        'category': self._extract_category_from_path(file_path),
                        'subcategory': self._extract_subcategory_from_path(file_path)
                    }
        logger.info(f"Built index with {len(self.file_index)} exploits")
    def _extract_category_from_path(self, file_path: Path) -> str:
        """Extract category from file path (e.g., linux, windows, etc.)"""
        parts = file_path.parts
        exploits_index = None
        for i, part in enumerate(parts):
            if part == "exploits":
                exploits_index = i
                break
        if exploits_index and exploits_index + 1 < len(parts):
            return parts[exploits_index + 1]
        return "unknown"
    def _extract_subcategory_from_path(self, file_path: Path) -> str:
        """Extract subcategory from file path (e.g., local, remote, webapps, etc.)"""
        parts = file_path.parts
        exploits_index = None
        for i, part in enumerate(parts):
            if part == "exploits":
                exploits_index = i
                break
        if exploits_index and exploits_index + 2 < len(parts):
            return parts[exploits_index + 2]
        return "unknown"
    def extract_exploit_id_from_url(self, url: str) -> Optional[str]:
        """Extract exploit ID from ExploitDB URL"""
        match = self.exploit_url_pattern.search(url)
        if match:
            return match.group(1)
        return None
    def get_exploit_details(self, exploit_id: str) -> Optional[dict]:
        """Get exploit details from local filesystem"""
        if exploit_id not in self.file_index:
            logger.debug(f"Exploit {exploit_id} not found in local index")
            return None
        file_info = self.file_index[exploit_id]
        file_path = file_info['path']
        try:
            # Read file content
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read()
            # Get file stats
            stat = file_path.stat()
            return {
                'id': exploit_id,
                'filename': file_info['filename'],
                'path': str(file_path.relative_to(self.exploitdb_path)),
                'full_path': str(file_path),
                'category': file_info['category'],
                'subcategory': file_info['subcategory'],
                'extension': file_info['extension'],
                'content': content,
                'size': stat.st_size,
                'modified_time': datetime.fromtimestamp(stat.st_mtime),
                'local_url': f"file://{file_path}"
            }
        except Exception as e:
            logger.error(f"Error reading exploit file {file_path}: {e}")
            return None
    def analyze_exploit_content(self, exploit_data: dict) -> dict:
        """Analyze exploit content to extract indicators"""
        if not exploit_data or not exploit_data.get('content'):
            return {}
        content = exploit_data['content']
        indicators = {
            'processes': [],
            'files': [],
            'network': [],
            'registry': [],
            'commands': [],
            'urls': [],
            'techniques': [],
            'languages': [],
            'platforms': [],
            'syscalls': [],
            'functions': []
        }
        # Determine programming language from extension
        extension = exploit_data.get('extension', '').lower()
        language_map = {
            'py': 'python',
            'rb': 'ruby', 
            'pl': 'perl',
            'c': 'c',
            'cpp': 'cpp',
            'cc': 'cpp',
            'cxx': 'cpp',
            'sh': 'bash',
            'ps1': 'powershell',
            'java': 'java',
            'js': 'javascript',
            'php': 'php',
            'asp': 'asp',
            'aspx': 'aspx',
            'jsp': 'jsp',
            'go': 'go',
            'rs': 'rust',
            'asm': 'assembly',
            's': 'assembly',
            'nasm': 'assembly'
        }
        if extension in language_map:
            indicators['languages'].append(language_map[extension])
        # Extract platform from path
        category = exploit_data.get('category', '').lower()
        if category in ['linux', 'windows', 'osx', 'macos', 'android', 'freebsd', 'solaris']:
            indicators['platforms'].append(category)
        # Extract indicators from content
        content_lower = content.lower()
        # Process patterns - enhanced for different languages
        process_patterns = [
            r'\b(cmd\.exe|powershell\.exe|bash|sh|python|ruby|perl|java)\b',
            r'\b(system|exec|popen|subprocess|shell_exec|eval|execve|execl|execlp)\b',
            r'\b(createprocess|shellexecute|winexec|createthread)\b',
            r'\b(mshta|rundll32|regsvr32|wscript|cscript|certutil|bitsadmin)\b',
            r'\b(/bin/sh|/bin/bash|/usr/bin/python|/usr/bin/perl)\b'
        ]
        for pattern in process_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            indicators['processes'].extend(matches)
        # File patterns - enhanced
        file_patterns = [
            r'\b([a-zA-Z]:\\[^\\\s"\']+\\[^\\\s"\']+\.[a-zA-Z0-9]+)\b',  # Windows paths
            r'\b(/[^/\s"\']+/[^/\s"\']+\.[a-zA-Z0-9]+)\b',  # Unix paths
            r'\b(\w+\.(exe|dll|so|dylib|bat|ps1|py|sh|jar|war|php|jsp|asp|aspx|txt|log|conf))\b',
            r'\b(/tmp/[^\s"\']+)\b',  # Temp files
            r'\b(/etc/[^\s"\']+)\b',  # Config files
            r'\b(/var/[^\s"\']+)\b',  # Var files
            r'\b(/proc/[^\s"\']+)\b'  # Proc files
        ]
        for pattern in file_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            if matches and isinstance(matches[0], tuple):
                indicators['files'].extend([m[0] for m in matches])
            else:
                indicators['files'].extend(matches)
        # Network patterns - enhanced
        network_patterns = [
            r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b',  # IP addresses
            r'\b(https?://[^\s<>"\']+)\b',  # URLs
            r'\b([a-zA-Z0-9-]+\.[a-zA-Z]{2,})\b',  # Domain names
            r'\b(bind|connect|listen|socket|recv|send|accept)\b',  # Network functions
            r'\b(AF_INET|SOCK_STREAM|SOCK_DGRAM)\b',  # Socket constants
            r'\b(nc|netcat|ncat|telnet|ssh|ftp|wget|curl)\b'  # Network tools
        ]
        for pattern in network_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            if 'http' in pattern:
                indicators['urls'].extend(matches)
            else:
                indicators['network'].extend(matches)
        # Command patterns - enhanced
        command_patterns = [
            r'\b(curl|wget|nc|netcat|telnet|ssh|ftp)\b',
            r'\b(whoami|id|uname|systeminfo|ipconfig|ifconfig|netstat|ps|top|lsof)\b',
            r'\b(cat|type|dir|ls|find|grep|awk|sed|sort|uniq)\b',
            r'\b(echo|printf|print)\b',
            r'\b(base64|decode|encode|openssl|gpg)\b',
            r'\b(sudo|su|chmod|chown|mount|umount)\b',
            r'\b(service|systemctl|chkconfig|update-rc\.d)\b'
        ]
        for pattern in command_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            indicators['commands'].extend(matches)
        # Registry patterns (Windows-specific)
        registry_patterns = [
            r'\b(HKEY_[A-Z_]+)\b',
            r'\b(HKLM|HKCU|HKCR|HKU|HKCC)\b',
            r'\b(reg\s+add|reg\s+query|reg\s+delete|regedit)\b',
            r'\b(SOFTWARE\\\\[^\\\s"\']+)\b',
            r'\b(SYSTEM\\\\[^\\\s"\']+)\b',
            r'\b(CurrentVersion\\\\[^\\\s"\']+)\b'
        ]
        for pattern in registry_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            indicators['registry'].extend(matches)
        # System call patterns (Linux/Unix)
        syscall_patterns = [
            r'\b(open|close|read|write|lseek|stat|fstat|lstat)\b',
            r'\b(fork|vfork|clone|execve|wait|waitpid)\b',
            r'\b(socket|bind|listen|accept|connect|send|recv)\b',
            r'\b(mmap|munmap|mprotect|brk|sbrk)\b',
            r'\b(ptrace|kill|signal|alarm)\b'
        ]
        for pattern in syscall_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            indicators['syscalls'].extend(matches)
        # Function patterns
        function_patterns = [
            r'\b(main|printf|scanf|malloc|free|strcpy|strcat|strlen)\b',
            r'\b(gets|puts|fgets|fputs|fopen|fclose|fread|fwrite)\b',
            r'\b(sprintf|snprintf|memcpy|memset|strcmp|strncmp)\b'
        ]
        for pattern in function_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            indicators['functions'].extend(matches)
        # Clean up and deduplicate
        for key in indicators:
            # Remove empty strings and duplicates
            indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 1]))
            # Limit to reasonable number of indicators
            indicators[key] = indicators[key][:25]
        return indicators
    def calculate_exploit_quality_score(self, exploit_data: dict) -> dict:
        """Calculate quality score for an exploit"""
        quality_score = 0
        factors = {}
        # File size factor (0-25 points)
        file_size = exploit_data.get('size', 0)
        if file_size > 0:
            if file_size > 10000:  # Large files (10KB+)
                size_score = 25
            elif file_size > 5000:  # Medium files (5KB+)
                size_score = 20
            elif file_size > 1000:  # Small files (1KB+)
                size_score = 15
            elif file_size > 500:   # Very small files (500B+)
                size_score = 10
            else:                   # Tiny files
                size_score = 5
            quality_score += size_score
            factors['size_score'] = size_score
        # Content analysis factor (0-30 points)
        content = exploit_data.get('content', '')
        if content:
            content_score = 0
            lines = content.split('\n')
            # Check for comments and documentation
            comment_lines = 0
            for line in lines:
                stripped = line.strip()
                if (stripped.startswith('#') or stripped.startswith('//') or 
                    stripped.startswith('/*') or stripped.startswith('*') or
                    stripped.startswith('"""') or stripped.startswith("'''")):
                    comment_lines += 1
            comment_score = min(comment_lines, 10)  # Up to 10 points
            content_score += comment_score
            # Check for function definitions
            function_patterns = [
                r'\bdef\s+\w+',      # Python
                r'\bfunction\s+\w+', # JavaScript
                r'\bvoid\s+\w+',     # C/C++
                r'\bint\s+\w+',      # C/C++
                r'\bchar\s+\w+',     # C/C++
                r'\bsub\s+\w+',      # Perl
                r'^\w+\s*\(',        # Generic function calls
            ]
            function_count = 0
            for pattern in function_patterns:
                matches = re.findall(pattern, content, re.IGNORECASE | re.MULTILINE)
                function_count += len(matches)
            function_score = min(function_count, 15)  # Up to 15 points
            content_score += function_score
            # Check for include/import statements
            include_patterns = [
                r'#include\s*[<"]',   # C/C++
                r'import\s+\w+',      # Python/Java
                r'require\s+\w+',     # Ruby/Perl
                r'use\s+\w+',         # Perl
            ]
            include_count = 0
            for pattern in include_patterns:
                matches = re.findall(pattern, content, re.IGNORECASE)
                include_count += len(matches)
            include_score = min(include_count, 5)  # Up to 5 points
            content_score += include_score
            quality_score += content_score
            factors['content_score'] = content_score
        # Platform/category factor (0-20 points)
        category = exploit_data.get('category', '').lower()
        subcategory = exploit_data.get('subcategory', '').lower()
        platform_score = 0
        if category in ['linux', 'windows', 'osx', 'macos']:
            platform_score += 10
        elif category in ['android', 'freebsd', 'solaris']:
            platform_score += 8
        elif category in ['multiple', 'unix']:
            platform_score += 6
        if subcategory in ['local', 'remote']:
            platform_score += 10
        elif subcategory in ['webapps', 'dos']:
            platform_score += 8
        elif subcategory in ['shellcode']:
            platform_score += 6
        quality_score += platform_score
        factors['platform_score'] = platform_score
        # Language factor (0-15 points)
        extension = exploit_data.get('extension', '').lower()
        lang_score = 0
        if extension in ['c', 'cpp', 'cc', 'cxx']:  # Compiled languages
            lang_score = 15
        elif extension in ['py', 'rb', 'pl', 'java']:  # High-level languages
            lang_score = 12
        elif extension in ['sh', 'ps1', 'bat']:  # Scripting languages
            lang_score = 8
        elif extension in ['asm', 's', 'nasm']:  # Assembly
            lang_score = 10
        elif extension in ['php', 'asp', 'aspx', 'jsp']:  # Web languages
            lang_score = 6
        elif extension in ['txt', 'html']:  # Text/docs
            lang_score = 3
        quality_score += lang_score
        factors['language_score'] = lang_score
        # File age factor (0-10 points) - newer exploits might be more relevant
        modified_time = exploit_data.get('modified_time')
        if modified_time:
            days_old = (datetime.now() - modified_time).days
            if days_old < 365:  # Less than 1 year
                age_score = 10
            elif days_old < 365 * 3:  # Less than 3 years
                age_score = 8
            elif days_old < 365 * 5:  # Less than 5 years
                age_score = 6
            elif days_old < 365 * 10:  # Less than 10 years
                age_score = 4
            else:  # Very old
                age_score = 2
            quality_score += age_score
            factors['age_score'] = age_score
        # Determine quality tier
        quality_tier = self._get_exploit_quality_tier(quality_score)
        return {
            'quality_score': quality_score,
            'factors': factors,
            'quality_tier': quality_tier
        }
    def _get_exploit_quality_tier(self, score: int) -> str:
        """Get quality tier based on score"""
        if score >= 80:
            return 'excellent'
        elif score >= 65:
            return 'good'
        elif score >= 45:
            return 'fair'
        elif score >= 25:
            return 'poor'
        else:
            return 'very_poor'
    async def sync_cve_exploits(self, cve_id: str) -> dict:
        """Synchronize ExploitDB data for a specific CVE using local filesystem"""
        from main import CVE, SigmaRule
        # Get existing CVE
        cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first()
        if not cve:
            logger.warning(f"CVE {cve_id} not found in database")
            return {"error": "CVE not found"}
        # Extract ExploitDB URLs from reference URLs
        exploit_urls = []
        if cve.reference_urls:
            for url in cve.reference_urls:
                exploit_id = self.extract_exploit_id_from_url(url)
                if exploit_id:
                    exploit_urls.append((url, exploit_id))
        if not exploit_urls:
            logger.info(f"No ExploitDB URLs found for {cve_id}")
            return {"cve_id": cve_id, "exploits_found": 0}
        # Fetch exploit details from local filesystem
        exploit_data = []
        total_quality_score = 0
        for url, exploit_id in exploit_urls:
            try:
                details = self.get_exploit_details(exploit_id)
                if details:
                    # Analyze exploit content
                    indicators = self.analyze_exploit_content(details)
                    quality_analysis = self.calculate_exploit_quality_score(details)
                    exploit_entry = {
                        'id': exploit_id,
                        'url': url,
                        'filename': details.get('filename'),
                        'path': details.get('path'),
                        'category': details.get('category'),
                        'subcategory': details.get('subcategory'),
                        'extension': details.get('extension'),
                        'size': details.get('size'),
                        'modified_time': details.get('modified_time').isoformat() if details.get('modified_time') else None,
                        'local_url': details.get('local_url'),
                        'indicators': indicators,
                        'quality_analysis': quality_analysis
                    }
                    exploit_data.append(exploit_entry)
                    total_quality_score += quality_analysis['quality_score']
                    logger.info(f"Successfully processed exploit {exploit_id} from local filesystem")
            except Exception as e:
                logger.error(f"Error processing exploit {exploit_id}: {e}")
        # Update CVE with ExploitDB data
        if exploit_data:
            # Store in existing poc_data field
            if not cve.poc_data:
                cve.poc_data = {}
            cve.poc_data['exploitdb'] = {
                'exploits': exploit_data,
                'total_exploits': len(exploit_data),
                'average_quality': total_quality_score // len(exploit_data) if exploit_data else 0,
                'synced_at': datetime.utcnow().isoformat(),
                'source': 'local_filesystem'
            }
            cve.updated_at = datetime.utcnow()
            # Update SIGMA rule with ExploitDB data
            sigma_rule = self.db_session.query(SigmaRule).filter(
                SigmaRule.cve_id == cve_id
            ).first()
            if sigma_rule:
                # Combine indicators from all exploits
                combined_indicators = {}
                for exploit in exploit_data:
                    for key, values in exploit['indicators'].items():
                        if key not in combined_indicators:
                            combined_indicators[key] = []
                        combined_indicators[key].extend(values)
                # Deduplicate
                for key in combined_indicators:
                    combined_indicators[key] = list(set(combined_indicators[key]))
                # Update rule with ExploitDB data
                if not sigma_rule.nomi_sec_data:
                    sigma_rule.nomi_sec_data = {}
                sigma_rule.nomi_sec_data['exploitdb'] = {
                    'total_exploits': len(exploit_data),
                    'average_quality': total_quality_score // len(exploit_data) if exploit_data else 0,
                    'best_exploit': max(exploit_data, key=lambda x: x['quality_analysis']['quality_score']) if exploit_data else None,
                    'indicators': combined_indicators,
                    'source': 'local_filesystem'
                }
                # Update exploit indicators
                existing_indicators = json.loads(sigma_rule.exploit_indicators) if sigma_rule.exploit_indicators else {}
                for key, values in combined_indicators.items():
                    if key not in existing_indicators:
                        existing_indicators[key] = []
                    existing_indicators[key].extend(values)
                    existing_indicators[key] = list(set(existing_indicators[key]))
                sigma_rule.exploit_indicators = json.dumps(existing_indicators)
                sigma_rule.updated_at = datetime.utcnow()
        self.db_session.commit()
        logger.info(f"Synchronized {len(exploit_data)} ExploitDB exploits for {cve_id} from local filesystem")
        return {
            "cve_id": cve_id,
            "exploits_found": len(exploit_data),
            "total_quality_score": total_quality_score,
            "average_quality": total_quality_score // len(exploit_data) if exploit_data else 0,
            "exploit_urls": [e['url'] for e in exploit_data],
            "source": "local_filesystem"
        }
    async def bulk_sync_exploitdb(self, batch_size: int = 50, cancellation_flag: Optional[callable] = None) -> dict:
        """Synchronize ExploitDB data for all CVEs with ExploitDB references using local filesystem"""
        from main import CVE, BulkProcessingJob
        from sqlalchemy import text
        # Create bulk processing job
        job = BulkProcessingJob(
            job_type='exploitdb_sync_local',
            status='running',
            started_at=datetime.utcnow(),
            job_metadata={'batch_size': batch_size, 'source': 'local_filesystem'}
        )
        self.db_session.add(job)
        self.db_session.commit()
        total_processed = 0
        total_found = 0
        results = []
        try:
            # Get all CVEs with ExploitDB references using text search
            cves = self.db_session.query(CVE).filter(
                text("reference_urls::text LIKE '%exploit-db%'")
            ).all()
            job.total_items = len(cves)
            self.db_session.commit()
            logger.info(f"Found {len(cves)} CVEs with ExploitDB references for local sync")
            # Process in batches
            for i in range(0, len(cves), batch_size):
                # Check for cancellation
                if cancellation_flag and cancellation_flag():
                    logger.info("ExploitDB local sync cancelled by user")
                    job.status = 'cancelled'
                    job.cancelled_at = datetime.utcnow()
                    job.error_message = "Job cancelled by user"
                    break
                batch = cves[i:i + batch_size]
                for cve in batch:
                    # Check for cancellation
                    if cancellation_flag and cancellation_flag():
                        logger.info("ExploitDB local sync cancelled by user")
                        job.status = 'cancelled'
                        job.cancelled_at = datetime.utcnow()
                        job.error_message = "Job cancelled by user"
                        break
                    try:
                        result = await self.sync_cve_exploits(cve.cve_id)
                        total_processed += 1
                        if result.get("exploits_found", 0) > 0:
                            total_found += result["exploits_found"]
                            results.append(result)
                        job.processed_items += 1
                        # Very small delay for responsiveness
                        # No need for long delays with local filesystem
                    except Exception as e:
                        logger.error(f"Error syncing ExploitDB for {cve.cve_id}: {e}")
                        job.failed_items += 1
                # Break out of outer loop if cancelled
                if job.status == 'cancelled':
                    break
                # Commit after each batch
                self.db_session.commit()
                logger.info(f"Processed ExploitDB local batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}")
            # Update job status
            if job.status != 'cancelled':
                job.status = 'completed'
                job.completed_at = datetime.utcnow()
            job.job_metadata.update({
                'total_processed': total_processed,
                'total_exploits_found': total_found,
                'cves_with_exploits': len(results),
                'source': 'local_filesystem'
            })
        except Exception as e:
            job.status = 'failed'
            job.error_message = str(e)
            job.completed_at = datetime.utcnow()
            logger.error(f"Bulk ExploitDB local sync job failed: {e}")
        finally:
            self.db_session.commit()
        return {
            'job_id': str(job.id),
            'status': job.status,
            'total_processed': total_processed,
            'total_exploits_found': total_found,
            'cves_with_exploits': len(results),
            'source': 'local_filesystem'
        }
    async def get_exploitdb_sync_status(self) -> dict:
        """Get ExploitDB synchronization status for local filesystem"""
        from main import CVE
        from sqlalchemy import text
        # Count CVEs with ExploitDB references
        total_cves = self.db_session.query(CVE).count()
        # Count CVEs with ExploitDB data  
        result = self.db_session.execute(
            text("SELECT COUNT(*) FROM cves WHERE poc_data::text LIKE '%\"exploitdb\"%'")
        )
        cves_with_exploitdb = result.scalar()
        # Count CVEs with ExploitDB URLs in references
        result2 = self.db_session.execute(
            text("SELECT COUNT(*) FROM cves WHERE reference_urls::text LIKE '%exploit-db%'")
        )
        cves_with_exploitdb_refs = result2.scalar()
        return {
            'total_cves': total_cves,
            'cves_with_exploitdb_refs': cves_with_exploitdb_refs,
            'cves_with_exploitdb_data': cves_with_exploitdb,
            'exploitdb_coverage': (cves_with_exploitdb / cves_with_exploitdb_refs * 100) if cves_with_exploitdb_refs > 0 else 0,
            'exploitdb_sync_status': 'active' if cves_with_exploitdb > 0 else 'pending',
            'exploitdb_local_index_size': len(self.file_index),
            'source': 'local_filesystem'
        }
--- a/backend/main.py
+++ b/backend/main.py
@ -154,6 +154,14 @@ class GitHubPoCSyncRequest(BaseModel):
    cve_id: Optional[str] = None
    batch_size: int = 50
 class ExploitDBSyncRequest(BaseModel):
    cve_id: Optional[str] = None
    batch_size: int = 30
 class CISAKEVSyncRequest(BaseModel):
    cve_id: Optional[str] = None
    batch_size: int = 100
 class RuleRegenRequest(BaseModel):
    force: bool = False
@ -1224,6 +1232,238 @@ async def sync_github_pocs(background_tasks: BackgroundTasks,
        "batch_size": request.batch_size
    }
@app.post("/api/sync-exploitdb")
 async def sync_exploitdb(background_tasks: BackgroundTasks, 
                        request: ExploitDBSyncRequest,
                        db: Session = Depends(get_db)):
    """Synchronize ExploitDB data from git mirror"""
    # Create job record
    job = BulkProcessingJob(
        job_type='exploitdb_sync',
        status='pending',
        job_metadata={
            'cve_id': request.cve_id,
            'batch_size': request.batch_size
        }
    )
    db.add(job)
    db.commit()
    db.refresh(job)
    job_id = str(job.id)
    running_jobs[job_id] = job
    job_cancellation_flags[job_id] = False
    async def sync_task():
        # Create a new database session for the background task
        task_db = SessionLocal()
        try:
            # Get the job in the new session
            task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
            if not task_job:
                logger.error(f"Job {job_id} not found in task session")
                return
            task_job.status = 'running'
            task_job.started_at = datetime.utcnow()
            task_db.commit()
            from exploitdb_client_local import ExploitDBLocalClient
            client = ExploitDBLocalClient(task_db)
            if request.cve_id:
                # Sync specific CVE
                if job_cancellation_flags.get(job_id, False):
                    logger.info(f"Job {job_id} cancelled before starting")
                    return
                result = await client.sync_cve_exploits(request.cve_id)
                logger.info(f"ExploitDB sync for {request.cve_id}: {result}")
            else:
                # Sync all CVEs with cancellation support
                result = await client.bulk_sync_exploitdb(
                    batch_size=request.batch_size,
                    cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
                )
                logger.info(f"ExploitDB bulk sync completed: {result}")
            # Update job status if not cancelled
            if not job_cancellation_flags.get(job_id, False):
                task_job.status = 'completed'
                task_job.completed_at = datetime.utcnow()
                task_db.commit()
        except Exception as e:
            if not job_cancellation_flags.get(job_id, False):
                # Get the job again in case it was modified
                task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
                if task_job:
                    task_job.status = 'failed'
                    task_job.error_message = str(e)
                    task_job.completed_at = datetime.utcnow()
                    task_db.commit()
            logger.error(f"ExploitDB sync failed: {e}")
            import traceback
            traceback.print_exc()
        finally:
            # Clean up tracking and close the task session
            running_jobs.pop(job_id, None)
            job_cancellation_flags.pop(job_id, None)
            task_db.close()
    background_tasks.add_task(sync_task)
    return {
        "message": f"ExploitDB sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
        "status": "started",
        "job_id": job_id,
        "cve_id": request.cve_id,
        "batch_size": request.batch_size
    }
@app.post("/api/sync-cisa-kev")
 async def sync_cisa_kev(background_tasks: BackgroundTasks, 
                       request: CISAKEVSyncRequest,
                       db: Session = Depends(get_db)):
    """Synchronize CISA Known Exploited Vulnerabilities data"""
    # Create job record
    job = BulkProcessingJob(
        job_type='cisa_kev_sync',
        status='pending',
        job_metadata={
            'cve_id': request.cve_id,
            'batch_size': request.batch_size
        }
    )
    db.add(job)
    db.commit()
    db.refresh(job)
    job_id = str(job.id)
    running_jobs[job_id] = job
    job_cancellation_flags[job_id] = False
    async def sync_task():
        # Create a new database session for the background task
        task_db = SessionLocal()
        try:
            # Get the job in the new session
            task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
            if not task_job:
                logger.error(f"Job {job_id} not found in task session")
                return
            task_job.status = 'running'
            task_job.started_at = datetime.utcnow()
            task_db.commit()
            from cisa_kev_client import CISAKEVClient
            client = CISAKEVClient(task_db)
            if request.cve_id:
                # Sync specific CVE
                if job_cancellation_flags.get(job_id, False):
                    logger.info(f"Job {job_id} cancelled before starting")
                    return
                result = await client.sync_cve_kev_data(request.cve_id)
                logger.info(f"CISA KEV sync for {request.cve_id}: {result}")
            else:
                # Sync all CVEs with cancellation support
                result = await client.bulk_sync_kev_data(
                    batch_size=request.batch_size,
                    cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
                )
                logger.info(f"CISA KEV bulk sync completed: {result}")
            # Update job status if not cancelled
            if not job_cancellation_flags.get(job_id, False):
                task_job.status = 'completed'
                task_job.completed_at = datetime.utcnow()
                task_db.commit()
        except Exception as e:
            if not job_cancellation_flags.get(job_id, False):
                # Get the job again in case it was modified
                task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
                if task_job:
                    task_job.status = 'failed'
                    task_job.error_message = str(e)
                    task_job.completed_at = datetime.utcnow()
                    task_db.commit()
            logger.error(f"CISA KEV sync failed: {e}")
            import traceback
            traceback.print_exc()
        finally:
            # Clean up tracking and close the task session
            running_jobs.pop(job_id, None)
            job_cancellation_flags.pop(job_id, None)
            task_db.close()
    background_tasks.add_task(sync_task)
    return {
        "message": f"CISA KEV sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
        "status": "started",
        "job_id": job_id,
        "cve_id": request.cve_id,
        "batch_size": request.batch_size
    }
@app.get("/api/exploitdb-stats")
 async def get_exploitdb_stats(db: Session = Depends(get_db)):
    """Get ExploitDB-related statistics"""
    try:
        from exploitdb_client_local import ExploitDBLocalClient
        client = ExploitDBLocalClient(db)
        # Get sync status
        status = await client.get_exploitdb_sync_status()
        # Get quality distribution from ExploitDB data
        quality_distribution = {}
        from sqlalchemy import text
        cves_with_exploitdb = db.query(CVE).filter(
            text("poc_data::text LIKE '%\"exploitdb\"%'")
        ).all()
        for cve in cves_with_exploitdb:
            if cve.poc_data and 'exploitdb' in cve.poc_data:
                exploits = cve.poc_data['exploitdb'].get('exploits', [])
                for exploit in exploits:
                    quality_tier = exploit.get('quality_analysis', {}).get('quality_tier', 'unknown')
                    quality_distribution[quality_tier] = quality_distribution.get(quality_tier, 0) + 1
        # Get category distribution
        category_distribution = {}
        for cve in cves_with_exploitdb:
            if cve.poc_data and 'exploitdb' in cve.poc_data:
                exploits = cve.poc_data['exploitdb'].get('exploits', [])
                for exploit in exploits:
                    category = exploit.get('category', 'unknown')
                    category_distribution[category] = category_distribution.get(category, 0) + 1
        return {
            "exploitdb_sync_status": status,
            "quality_distribution": quality_distribution,
            "category_distribution": category_distribution,
            "total_exploitdb_cves": len(cves_with_exploitdb),
            "total_exploits": sum(
                len(cve.poc_data.get('exploitdb', {}).get('exploits', []))
                for cve in cves_with_exploitdb
                if cve.poc_data and 'exploitdb' in cve.poc_data
            )
        }
    except Exception as e:
        logger.error(f"Error getting ExploitDB stats: {e}")
        raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/github-poc-stats")
 async def get_github_poc_stats(db: Session = Depends(get_db)):
    """Get GitHub PoC-related statistics"""
@ -1298,6 +1538,74 @@ async def get_github_poc_status(db: Session = Depends(get_db)):
        logger.error(f"Error checking GitHub PoC status: {e}")
        return {"error": str(e)}
@app.get("/api/cisa-kev-stats")
 async def get_cisa_kev_stats(db: Session = Depends(get_db)):
    """Get CISA KEV-related statistics"""
    try:
        from cisa_kev_client import CISAKEVClient
        client = CISAKEVClient(db)
        # Get sync status
        status = await client.get_kev_sync_status()
        # Get threat level distribution from CISA KEV data
        threat_level_distribution = {}
        from sqlalchemy import text
        cves_with_kev = db.query(CVE).filter(
            text("poc_data::text LIKE '%\"cisa_kev\"%'")
        ).all()
        for cve in cves_with_kev:
            if cve.poc_data and 'cisa_kev' in cve.poc_data:
                vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {})
                threat_level = vuln_data.get('threat_level', 'unknown')
                threat_level_distribution[threat_level] = threat_level_distribution.get(threat_level, 0) + 1
        # Get vulnerability category distribution
        category_distribution = {}
        for cve in cves_with_kev:
            if cve.poc_data and 'cisa_kev' in cve.poc_data:
                vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {})
                category = vuln_data.get('vulnerability_category', 'unknown')
                category_distribution[category] = category_distribution.get(category, 0) + 1
        # Get ransomware usage statistics
        ransomware_stats = {'known': 0, 'unknown': 0}
        for cve in cves_with_kev:
            if cve.poc_data and 'cisa_kev' in cve.poc_data:
                vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {})
                ransomware_use = vuln_data.get('known_ransomware_use', 'Unknown').lower()
                if ransomware_use == 'known':
                    ransomware_stats['known'] += 1
                else:
                    ransomware_stats['unknown'] += 1
        # Calculate average threat score
        threat_scores = []
        for cve in cves_with_kev:
            if cve.poc_data and 'cisa_kev' in cve.poc_data:
                vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {})
                threat_score = vuln_data.get('threat_score', 0)
                if threat_score:
                    threat_scores.append(threat_score)
        avg_threat_score = sum(threat_scores) / len(threat_scores) if threat_scores else 0
        return {
            "cisa_kev_sync_status": status,
            "threat_level_distribution": threat_level_distribution,
            "category_distribution": category_distribution,
            "ransomware_stats": ransomware_stats,
            "average_threat_score": round(avg_threat_score, 2),
            "total_kev_cves": len(cves_with_kev),
            "total_with_threat_scores": len(threat_scores)
        }
    except Exception as e:
        logger.error(f"Error getting CISA KEV stats: {e}")
        raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/bulk-jobs")
 async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)):
    """Get bulk processing job status"""
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -24,12 +24,18 @@ services:
      DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
      NVD_API_KEY: ${NVD_API_KEY:-}
      GITHUB_TOKEN: ${GITHUB_TOKEN}
      OPENAI_API_KEY: ${OPENAI_API_KEY:-}
      ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
      OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434}
      LLM_PROVIDER: ${LLM_PROVIDER:-ollama}
      LLM_MODEL: ${LLM_MODEL:-llama3.2}
    depends_on:
      db:
        condition: service_healthy
    volumes:
      - ./backend:/app
      - ./github_poc_collector:/github_poc_collector
      - ./exploit-db-mirror:/app/exploit-db-mirror
    command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
  frontend:
@ -51,6 +57,17 @@ services:
    volumes:
      - redis_data:/data
  ollama:
    image: ollama/ollama:latest
    ports:
      - "11434:11434"
    volumes:
      - ollama_data:/root/.ollama
    environment:
      - OLLAMA_HOST=0.0.0.0
    restart: unless-stopped
 volumes:
  postgres_data:
  redis_data:
  ollama_data:
--- a/1
+++ b/1
@ -0,0 +1 @@
 Subproject commit 99e10e9ba8fa6e0db140c33eb19ea76e9a144fb8
--- a/frontend/src/App.js
+++ b/frontend/src/App.js
@ -19,18 +19,73 @@ function App() {
  const [bulkStatus, setBulkStatus] = useState({});
  const [pocStats, setPocStats] = useState({});
  const [gitHubPocStats, setGitHubPocStats] = useState({});
  const [exploitdbStats, setExploitdbStats] = useState({});
  const [cisaKevStats, setCisaKevStats] = useState({});
  const [bulkProcessing, setBulkProcessing] = useState(false);
  const [hasRunningJobs, setHasRunningJobs] = useState(false);
  const [runningJobTypes, setRunningJobTypes] = useState(new Set());
  const [llmStatus, setLlmStatus] = useState({});
  const [exploitSyncDropdownOpen, setExploitSyncDropdownOpen] = useState(false);
  useEffect(() => {
    fetchData();
  }, []);
  // Close dropdown when clicking outside
  useEffect(() => {
    const handleClickOutside = (event) => {
      if (exploitSyncDropdownOpen && !event.target.closest('.relative')) {
        setExploitSyncDropdownOpen(false);
      }
    };
    document.addEventListener('mousedown', handleClickOutside);
    return () => {
      document.removeEventListener('mousedown', handleClickOutside);
    };
  }, [exploitSyncDropdownOpen]);
  // Helper functions to check if specific job types are running
  const isJobTypeRunning = (jobType) => {
    return runningJobTypes.has(jobType);
  };
  const isBulkSeedRunning = () => {
    return isJobTypeRunning('nvd_bulk_seed') || isJobTypeRunning('bulk_seed');
  };
  const isIncrementalUpdateRunning = () => {
    return isJobTypeRunning('incremental_update');
  };
  const isNomiSecSyncRunning = () => {
    return isJobTypeRunning('nomi_sec_sync');
  };
  const isGitHubPocSyncRunning = () => {
    return isJobTypeRunning('github_poc_sync');
  };
  const isExploitDBSyncRunning = () => {
    return isJobTypeRunning('exploitdb_sync') || isJobTypeRunning('exploitdb_sync_local');
  };
  const isCISAKEVSyncRunning = () => {
    return isJobTypeRunning('cisa_kev_sync');
  };
  const isRuleGenerationRunning = () => {
    return isJobTypeRunning('rule_regeneration') || isJobTypeRunning('llm_rule_generation');
  };
  const areAnyExploitSyncsRunning = () => {
    return isNomiSecSyncRunning() || isGitHubPocSyncRunning() || isExploitDBSyncRunning() || isCISAKEVSyncRunning();
  };
  const fetchData = async () => {
    try {
      setLoading(true);
-      const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes, llmStatusRes] = await Promise.all([
+      const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes, exploitdbStatsRes, cisaKevStatsRes, llmStatusRes] = await Promise.all([
        axios.get(`${API_BASE_URL}/api/cves`),
        axios.get(`${API_BASE_URL}/api/sigma-rules`),
        axios.get(`${API_BASE_URL}/api/stats`),
@ -38,6 +93,8 @@ function App() {
        axios.get(`${API_BASE_URL}/api/bulk-status`),
        axios.get(`${API_BASE_URL}/api/poc-stats`),
        axios.get(`${API_BASE_URL}/api/github-poc-stats`).catch(err => ({ data: {} })),
        axios.get(`${API_BASE_URL}/api/exploitdb-stats`).catch(err => ({ data: {} })),
        axios.get(`${API_BASE_URL}/api/cisa-kev-stats`).catch(err => ({ data: {} })),
        axios.get(`${API_BASE_URL}/api/llm-status`).catch(err => ({ data: {} }))
      ]);
@ -48,11 +105,17 @@ function App() {
      setBulkStatus(bulkStatusRes.data);
      setPocStats(pocStatsRes.data);
      setGitHubPocStats(githubPocStatsRes.data);
      setExploitdbStats(exploitdbStatsRes.data);
      setCisaKevStats(cisaKevStatsRes.data);
      setLlmStatus(llmStatusRes.data);
      // Update running jobs state
      const runningJobs = bulkJobsRes.data.filter(job => job.status === 'running' || job.status === 'pending');
      setHasRunningJobs(runningJobs.length > 0);
      // Update specific job types that are running
      const activeJobTypes = new Set(runningJobs.map(job => job.job_type));
      setRunningJobTypes(activeJobTypes);
    } catch (error) {
      console.error('Error fetching data:', error);
    } finally {
@ -157,6 +220,42 @@ function App() {
    }
  };
  const syncExploitDB = async (cveId = null) => {
    try {
      const response = await axios.post(`${API_BASE_URL}/api/sync-exploitdb`, {
        cve_id: cveId,
        batch_size: 30
      });
      console.log('ExploitDB sync response:', response.data);
      fetchData();
    } catch (error) {
      console.error('Error syncing ExploitDB:', error);
    }
  };
  const syncCISAKEV = async (cveId = null) => {
    try {
      const response = await axios.post(`${API_BASE_URL}/api/sync-cisa-kev`, {
        cve_id: cveId,
        batch_size: 100
      });
      console.log('CISA KEV sync response:', response.data);
      fetchData();
    } catch (error) {
      console.error('Error syncing CISA KEV:', error);
    }
  };
  const syncReferences = async () => {
    try {
      // Placeholder for future implementation
      console.log('Sync References - Not implemented yet');
      alert('Sync References functionality will be implemented in a future update');
    } catch (error) {
      console.error('Error syncing references:', error);
    }
  };
  const regenerateRules = async (force = false) => {
    try {
      const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, {
@ -235,6 +334,8 @@ function App() {
          <p className="text-3xl font-bold text-purple-600">{stats.cves_with_pocs || 0}</p>
          <p className="text-sm text-gray-500">{(stats.poc_coverage || 0).toFixed(1)}% coverage</p>
          <p className="text-sm text-gray-500">GitHub PoCs: {gitHubPocStats.cves_with_github_pocs || 0}</p>
          <p className="text-sm text-gray-500">ExploitDB: {exploitdbStats.total_exploitdb_cves || 0}</p>
          <p className="text-sm text-gray-500">CISA KEV: {cisaKevStats.total_kev_cves || 0}</p>
        </div>
        <div className="bg-white p-6 rounded-lg shadow">
          <h3 className="text-lg font-medium text-gray-900">Recent CVEs (7d)</h3>
@ -244,82 +345,193 @@ function App() {
          <h3 className="text-lg font-medium text-gray-900">High Quality PoCs</h3>
          <p className="text-3xl font-bold text-indigo-600">{pocStats.high_quality_cves || 0}</p>
          <p className="text-sm text-gray-500">Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}</p>
-          <p className="text-sm text-gray-500">GitHub PoCs Avg: {(gitHubPocStats.average_quality_score || 0).toFixed(1)}</p>
+          <p className="text-sm text-gray-500">GitHub: {(gitHubPocStats.average_quality_score || 0).toFixed(1)}</p>
          <p className="text-sm text-gray-500">ExploitDB: {exploitdbStats.total_exploits || 0} exploits</p>
          <p className="text-sm text-gray-500">CISA KEV: {(cisaKevStats.average_threat_score || 0).toFixed(1)} threat</p>
        </div>
      </div>
-      {/* Bulk Processing Controls */}
+      {/* Data Synchronization Controls */}
      <div className="bg-white rounded-lg shadow p-6">
-        <h2 className="text-xl font-bold text-gray-900 mb-4">Bulk Processing</h2>
+        <h2 className="text-xl font-bold text-gray-900 mb-6">Data Synchronization</h2>
-        <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-5 gap-4">
+        
        {/* Phase 1: CVE Data Syncing */}
        <div className="mb-6">
          <h3 className="text-lg font-medium text-gray-900 mb-3">Phase 1: CVE Data Syncing</h3>
          <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
            <button
              onClick={() => startBulkSeed(2002)}
              disabled={isBulkSeedRunning()}
              className={`px-4 py-2 rounded-md text-white ${
                isBulkSeedRunning() 
                  ? 'bg-gray-400 cursor-not-allowed' 
                  : 'bg-blue-600 hover:bg-blue-700'
              }`}
            >
              {isBulkSeedRunning() ? 'Processing...' : 'Sync NVD CVEs'}
            </button>
            <button
              onClick={startIncrementalUpdate}
              disabled={isIncrementalUpdateRunning()}
              className={`px-4 py-2 rounded-md text-white ${
                isIncrementalUpdateRunning() 
                  ? 'bg-gray-400 cursor-not-allowed' 
                  : 'bg-green-600 hover:bg-green-700'
              }`}
            >
              {isIncrementalUpdateRunning() ? 'Processing...' : 'Incremental Update'}
            </button>
          </div>
        </div>
        {/* Phase 2: Exploit Data Syncing */}
        <div className="mb-6">
          <h3 className="text-lg font-medium text-gray-900 mb-3">Phase 2: Exploit Data Syncing</h3>
          <div className="relative inline-block text-left">
            <button
              onClick={() => setExploitSyncDropdownOpen(!exploitSyncDropdownOpen)}
              className={`inline-flex items-center justify-center w-full rounded-md border shadow-sm px-4 py-2 text-sm font-medium ${
                areAnyExploitSyncsRunning() 
                  ? 'border-blue-300 bg-blue-50 text-blue-700' 
                  : 'border-gray-300 bg-white text-gray-700 hover:bg-gray-50'
              }`}
            >
              {areAnyExploitSyncsRunning() ? 'Exploit Syncs Running...' : 'Sync Exploit Data'}
              <svg className="-mr-1 ml-2 h-5 w-5" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor">
                <path fillRule="evenodd" d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" clipRule="evenodd" />
              </svg>
            </button>
            {exploitSyncDropdownOpen && (
              <div className="origin-top-right absolute right-0 mt-2 w-56 rounded-md shadow-lg bg-white ring-1 ring-black ring-opacity-5 z-10">
                <div className="py-1">
                  <button
                    onClick={() => {
                      if (!isNomiSecSyncRunning()) {
                        syncNomiSec();
                        setExploitSyncDropdownOpen(false);
                      }
                    }}
                    disabled={isNomiSecSyncRunning()}
                    className={`block w-full text-left px-4 py-2 text-sm ${
                      isNomiSecSyncRunning() 
                        ? 'text-gray-400 cursor-not-allowed bg-gray-50' 
                        : 'text-gray-700 hover:bg-purple-50 hover:text-purple-900'
                    }`}
                  >
                    <span className={`inline-block w-3 h-3 rounded-full mr-2 ${
                      isNomiSecSyncRunning() ? 'bg-gray-400' : 'bg-purple-600'
                    }`}></span>
                    {isNomiSecSyncRunning() ? 'Syncing nomi-sec PoCs...' : 'Sync nomi-sec PoCs'}
                  </button>
                  <button
                    onClick={() => {
                      if (!isGitHubPocSyncRunning()) {
                        syncGitHubPocs();
                        setExploitSyncDropdownOpen(false);
                      }
                    }}
                    disabled={isGitHubPocSyncRunning()}
                    className={`block w-full text-left px-4 py-2 text-sm ${
                      isGitHubPocSyncRunning() 
                        ? 'text-gray-400 cursor-not-allowed bg-gray-50' 
                        : 'text-gray-700 hover:bg-green-50 hover:text-green-900'
                    }`}
                  >
                    <span className={`inline-block w-3 h-3 rounded-full mr-2 ${
                      isGitHubPocSyncRunning() ? 'bg-gray-400' : 'bg-green-600'
                    }`}></span>
                    {isGitHubPocSyncRunning() ? 'Syncing GitHub PoCs...' : 'Sync GitHub PoCs'}
                  </button>
                  <button
                    onClick={() => {
                      if (!isExploitDBSyncRunning()) {
                        syncExploitDB();
                        setExploitSyncDropdownOpen(false);
                      }
                    }}
                    disabled={isExploitDBSyncRunning()}
                    className={`block w-full text-left px-4 py-2 text-sm ${
                      isExploitDBSyncRunning() 
                        ? 'text-gray-400 cursor-not-allowed bg-gray-50' 
                        : 'text-gray-700 hover:bg-red-50 hover:text-red-900'
                    }`}
                  >
                    <span className={`inline-block w-3 h-3 rounded-full mr-2 ${
                      isExploitDBSyncRunning() ? 'bg-gray-400' : 'bg-red-600'
                    }`}></span>
                    {isExploitDBSyncRunning() ? 'Syncing ExploitDB...' : 'Sync ExploitDB'}
                  </button>
                  <button
                    onClick={() => {
                      if (!isCISAKEVSyncRunning()) {
                        syncCISAKEV();
                        setExploitSyncDropdownOpen(false);
                      }
                    }}
                    disabled={isCISAKEVSyncRunning()}
                    className={`block w-full text-left px-4 py-2 text-sm ${
                      isCISAKEVSyncRunning() 
                        ? 'text-gray-400 cursor-not-allowed bg-gray-50' 
                        : 'text-gray-700 hover:bg-yellow-50 hover:text-yellow-900'
                    }`}
                  >
                    <span className={`inline-block w-3 h-3 rounded-full mr-2 ${
                      isCISAKEVSyncRunning() ? 'bg-gray-400' : 'bg-yellow-600'
                    }`}></span>
                    {isCISAKEVSyncRunning() ? 'Syncing CISA KEV...' : 'Sync CISA KEV'}
                  </button>
                </div>
              </div>
            )}
          </div>
        </div>
        {/* Phase 3: Reference Data Syncing */}
        <div className="mb-6">
          <h3 className="text-lg font-medium text-gray-900 mb-3">Phase 3: Reference Data Syncing</h3>
          <button
-            onClick={() => startBulkSeed(2002)}
+            onClick={syncReferences}
            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
-                : 'bg-blue-600 hover:bg-blue-700'
+                : 'bg-orange-600 hover:bg-orange-700'
            }`}
          >
-            {hasRunningJobs ? 'Processing...' : 'Sync NVD CVEs'}
+            {hasRunningJobs ? 'Processing...' : 'Sync References (Coming Soon)'}
          </button>
          <button
            onClick={startIncrementalUpdate}
            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-green-600 hover:bg-green-700'
            }`}
          >
            {hasRunningJobs ? 'Processing...' : 'Incremental Update'}
          </button>
          <button
            onClick={() => syncNomiSec()}
            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-purple-600 hover:bg-purple-700'
            }`}
          >
            {hasRunningJobs ? 'Processing...' : 'Sync nomi-sec PoCs'}
          </button>
          <button
            onClick={() => syncGitHubPocs()}
            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-green-600 hover:bg-green-700'
            }`}
          >
            {hasRunningJobs ? 'Processing...' : 'Sync GitHub PoCs'}
          </button>
          <button
            onClick={() => regenerateRules()}
            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-indigo-600 hover:bg-indigo-700'
            }`}
          >
            {hasRunningJobs ? 'Processing...' : 'Regenerate Rules'}
          </button>
          <button
            onClick={() => generateLlmRules()}
            disabled={hasRunningJobs || llmStatus.status !== 'ready'}
            className={`px-4 py-2 rounded-md text-white ${
              hasRunningJobs || llmStatus.status !== 'ready'
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-violet-600 hover:bg-violet-700'
            }`}
            title={llmStatus.status !== 'ready' ? 'LLM not configured' : ''}
          >
            {hasRunningJobs ? 'Processing...' : 'Generate LLM Rules'}
          </button>
        </div>
        {/* Phase 4: Rule Generation */}
        <div>
          <h3 className="text-lg font-medium text-gray-900 mb-3">Phase 4: Rule Generation</h3>
          <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
            <button
              onClick={() => regenerateRules()}
              disabled={isRuleGenerationRunning()}
              className={`px-4 py-2 rounded-md text-white ${
                isRuleGenerationRunning() 
                  ? 'bg-gray-400 cursor-not-allowed' 
                  : 'bg-indigo-600 hover:bg-indigo-700'
              }`}
            >
              {isRuleGenerationRunning() ? 'Processing...' : 'Regenerate Rules'}
            </button>
            <button
              onClick={() => generateLlmRules()}
              disabled={isRuleGenerationRunning() || llmStatus.status !== 'ready'}
              className={`px-4 py-2 rounded-md text-white ${
                isRuleGenerationRunning() || llmStatus.status !== 'ready'
                  ? 'bg-gray-400 cursor-not-allowed' 
                  : 'bg-violet-600 hover:bg-violet-700'
              }`}
              title={llmStatus.status !== 'ready' ? 'LLM not configured' : ''}
            >
              {isRuleGenerationRunning() ? 'Processing...' : 'Generate LLM Rules'}
            </button>
          </div>
        </div>
      </div>
      {/* LLM Configuration */}
		`@ -0,0 +1 @@`
							`Subproject commit 99e10e9ba8fa6e0db140c33eb19ea76e9a144fb8`