From 696a1a346248f9052eb736976ca602a40bb86d92 Mon Sep 17 00:00:00 2001 From: bpmcdevitt Date: Thu, 10 Jul 2025 16:19:43 -0500 Subject: [PATCH] add kev support, exploitDB mirror support --- .gitmodules | 3 + backend/bulk_seeder.py | 104 ++++- backend/cisa_kev_client.py | 516 +++++++++++++++++++++ backend/exploitdb_client_local.py | 725 ++++++++++++++++++++++++++++++ backend/main.py | 308 +++++++++++++ docker-compose.yml | 17 + exploit-db-mirror | 1 + frontend/src/App.js | 340 +++++++++++--- 8 files changed, 1945 insertions(+), 69 deletions(-) create mode 100644 backend/cisa_kev_client.py create mode 100644 backend/exploitdb_client_local.py create mode 160000 exploit-db-mirror diff --git a/.gitmodules b/.gitmodules index 218e4ae..77533a8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "github_poc_collector"] path = github_poc_collector url = https://git.mcdevitt.tech/bpmcdevitt/github_poc_collector +[submodule "exploit-db-mirror"] + path = exploit-db-mirror + url = https://git.mcdevitt.tech/bpmcdevitt/exploit-db-mirror.git diff --git a/backend/bulk_seeder.py b/backend/bulk_seeder.py index 89e78ed..c860d36 100644 --- a/backend/bulk_seeder.py +++ b/backend/bulk_seeder.py @@ -10,6 +10,8 @@ from typing import Optional from sqlalchemy.orm import Session from nvd_bulk_processor import NVDBulkProcessor from nomi_sec_client import NomiSecClient +from exploitdb_client_local import ExploitDBLocalClient +from cisa_kev_client import CISAKEVClient # Configure logging logging.basicConfig(level=logging.INFO) @@ -22,11 +24,15 @@ class BulkSeeder: self.db_session = db_session self.nvd_processor = NVDBulkProcessor(db_session) self.nomi_sec_client = NomiSecClient(db_session) + self.exploitdb_client = ExploitDBLocalClient(db_session) + self.cisa_kev_client = CISAKEVClient(db_session) async def full_bulk_seed(self, start_year: int = 2002, end_year: Optional[int] = None, skip_nvd: bool = False, - skip_nomi_sec: bool = False) -> dict: + skip_nomi_sec: bool = False, + skip_exploitdb: bool = False, + skip_cisa_kev: bool = False) -> dict: """ Perform complete bulk seeding operation @@ -35,6 +41,8 @@ class BulkSeeder: end_year: Ending year for NVD data (default: current year) skip_nvd: Skip NVD bulk processing (default: False) skip_nomi_sec: Skip nomi-sec PoC synchronization (default: False) + skip_exploitdb: Skip ExploitDB synchronization (default: False) + skip_cisa_kev: Skip CISA KEV synchronization (default: False) Returns: Dictionary containing operation results @@ -46,6 +54,9 @@ class BulkSeeder: 'start_time': datetime.utcnow(), 'nvd_results': None, 'nomi_sec_results': None, + 'exploitdb_results': None, + 'cisa_kev_results': None, + 'sigma_results': None, 'total_time': None, 'status': 'running' } @@ -76,11 +87,33 @@ class BulkSeeder: else: logger.info("Phase 2: Skipping nomi-sec PoC synchronization") - # Phase 3: Generate Enhanced SIGMA Rules - logger.info("Phase 3: Generating enhanced SIGMA rules...") + # Phase 3: ExploitDB Synchronization + if not skip_exploitdb: + logger.info("Phase 3: Starting ExploitDB synchronization...") + exploitdb_results = await self.exploitdb_client.bulk_sync_exploitdb( + batch_size=30 # Smaller batches for git API stability + ) + results['exploitdb_results'] = exploitdb_results + logger.info(f"Phase 3 complete: {exploitdb_results['total_exploits_found']} exploits found") + else: + logger.info("Phase 3: Skipping ExploitDB synchronization") + + # Phase 4: CISA KEV Synchronization + if not skip_cisa_kev: + logger.info("Phase 4: Starting CISA KEV synchronization...") + cisa_kev_results = await self.cisa_kev_client.bulk_sync_kev_data( + batch_size=100 # Can handle larger batches since data is already filtered + ) + results['cisa_kev_results'] = cisa_kev_results + logger.info(f"Phase 4 complete: {cisa_kev_results['total_kev_found']} KEV entries found") + else: + logger.info("Phase 4: Skipping CISA KEV synchronization") + + # Phase 5: Generate Enhanced SIGMA Rules + logger.info("Phase 5: Generating enhanced SIGMA rules...") sigma_results = await self.generate_enhanced_sigma_rules() results['sigma_results'] = sigma_results - logger.info(f"Phase 3 complete: {sigma_results['rules_generated']} rules generated") + logger.info(f"Phase 5 complete: {sigma_results['rules_generated']} rules generated") results['status'] = 'completed' results['end_time'] = datetime.utcnow() @@ -107,6 +140,8 @@ class BulkSeeder: 'start_time': datetime.utcnow(), 'nvd_update': None, 'nomi_sec_update': None, + 'exploitdb_update': None, + 'cisa_kev_update': None, 'status': 'running' } @@ -125,6 +160,16 @@ class BulkSeeder: recent_cves = await self._get_recently_modified_cves() nomi_sec_update = await self._sync_specific_cves(recent_cves) results['nomi_sec_update'] = nomi_sec_update + + # Update ExploitDB data for modified CVEs + logger.info("Updating ExploitDB data for modified CVEs...") + exploitdb_update = await self._sync_specific_cves_exploitdb(recent_cves) + results['exploitdb_update'] = exploitdb_update + + # Update CISA KEV data for modified CVEs + logger.info("Updating CISA KEV data for modified CVEs...") + cisa_kev_update = await self._sync_specific_cves_cisa_kev(recent_cves) + results['cisa_kev_update'] = cisa_kev_update results['status'] = 'completed' results['end_time'] = datetime.utcnow() @@ -221,6 +266,53 @@ class BulkSeeder: 'total_pocs_found': total_pocs_found } + async def _sync_specific_cves_exploitdb(self, cve_ids: list) -> dict: + """Sync ExploitDB data for specific CVEs""" + total_processed = 0 + total_exploits_found = 0 + + for cve_id in cve_ids: + try: + result = await self.exploitdb_client.sync_cve_exploits(cve_id) + total_processed += 1 + total_exploits_found += result.get('exploits_found', 0) + + # Small delay to avoid overwhelming the API + await asyncio.sleep(0.5) + + except Exception as e: + logger.error(f"Error syncing ExploitDB for {cve_id}: {e}") + continue + + return { + 'total_processed': total_processed, + 'total_exploits_found': total_exploits_found + } + + async def _sync_specific_cves_cisa_kev(self, cve_ids: list) -> dict: + """Sync CISA KEV data for specific CVEs""" + total_processed = 0 + total_kev_found = 0 + + for cve_id in cve_ids: + try: + result = await self.cisa_kev_client.sync_cve_kev_data(cve_id) + total_processed += 1 + if result.get('kev_found', False): + total_kev_found += 1 + + # Small delay to be respectful to CISA + await asyncio.sleep(0.2) + + except Exception as e: + logger.error(f"Error syncing CISA KEV for {cve_id}: {e}") + continue + + return { + 'total_processed': total_processed, + 'total_kev_found': total_kev_found + } + async def get_seeding_status(self) -> dict: """Get current seeding status and statistics""" from main import CVE, SigmaRule, BulkProcessingJob @@ -270,7 +362,9 @@ class BulkSeeder: }, 'recent_jobs': job_status, 'nvd_data_status': await self._get_nvd_data_status(), - 'nomi_sec_status': await self.nomi_sec_client.get_sync_status() + 'nomi_sec_status': await self.nomi_sec_client.get_sync_status(), + 'exploitdb_status': await self.exploitdb_client.get_exploitdb_sync_status(), + 'cisa_kev_status': await self.cisa_kev_client.get_kev_sync_status() } async def _get_nvd_data_status(self) -> dict: diff --git a/backend/cisa_kev_client.py b/backend/cisa_kev_client.py new file mode 100644 index 0000000..204bde9 --- /dev/null +++ b/backend/cisa_kev_client.py @@ -0,0 +1,516 @@ +""" +CISA Known Exploited Vulnerabilities (KEV) Integration Client +Interfaces with the CISA KEV catalog at https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json +""" + +import aiohttp +import asyncio +import json +import logging +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple +from sqlalchemy.orm import Session +from sqlalchemy import text +import re + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class CISAKEVClient: + """Client for interfacing with CISA Known Exploited Vulnerabilities catalog""" + + def __init__(self, db_session: Session): + self.db_session = db_session + + # CISA KEV URLs + self.kev_catalog_url = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" + self.kev_schema_url = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities_schema.json" + + # Cache for KEV data + self.kev_cache = None + self.cache_timestamp = None + self.cache_ttl = 3600 # 1 hour cache + + # Rate limiting + self.rate_limit_delay = 1.0 # 1 second between requests + self.last_request_time = 0 + + async def _make_request(self, session: aiohttp.ClientSession, url: str) -> Optional[dict]: + """Make a rate-limited request to CISA""" + try: + # Rate limiting + current_time = asyncio.get_event_loop().time() + time_since_last = current_time - self.last_request_time + if time_since_last < self.rate_limit_delay: + await asyncio.sleep(self.rate_limit_delay - time_since_last) + + async with session.get(url, timeout=30) as response: + self.last_request_time = asyncio.get_event_loop().time() + + if response.status == 200: + return await response.json() + else: + logger.warning(f"CISA KEV request failed: {response.status} for {url}") + return None + + except Exception as e: + logger.error(f"Error making request to {url}: {e}") + return None + + async def fetch_kev_catalog(self, force_refresh: bool = False) -> Optional[dict]: + """Fetch the CISA KEV catalog with caching""" + # Check cache first + if not force_refresh and self.kev_cache and self.cache_timestamp: + cache_age = datetime.now().timestamp() - self.cache_timestamp + if cache_age < self.cache_ttl: + logger.info("Using cached CISA KEV data") + return self.kev_cache + + logger.info("Fetching fresh CISA KEV catalog...") + + async with aiohttp.ClientSession() as session: + data = await self._make_request(session, self.kev_catalog_url) + + if data: + self.kev_cache = data + self.cache_timestamp = datetime.now().timestamp() + logger.info(f"Fetched CISA KEV catalog with {data.get('count', 0)} vulnerabilities") + return data + else: + logger.error("Failed to fetch CISA KEV catalog") + return None + + def analyze_kev_vulnerability(self, vuln: dict) -> dict: + """Analyze a CISA KEV vulnerability entry to extract threat intelligence""" + analysis = { + 'cve_id': vuln.get('cveID'), + 'vendor_project': vuln.get('vendorProject'), + 'product': vuln.get('product'), + 'vulnerability_name': vuln.get('vulnerabilityName'), + 'date_added': vuln.get('dateAdded'), + 'short_description': vuln.get('shortDescription'), + 'required_action': vuln.get('requiredAction'), + 'due_date': vuln.get('dueDate'), + 'known_ransomware_use': vuln.get('knownRansomwareCampaignUse', 'Unknown'), + 'notes': vuln.get('notes', ''), + 'cwes': vuln.get('cwes', []) + } + + # Calculate threat priority score based on various factors + threat_score = self._calculate_threat_score(vuln) + analysis['threat_score'] = threat_score + analysis['threat_level'] = self._get_threat_level(threat_score) + + # Extract indicators from descriptions and required actions + indicators = self._extract_kev_indicators(vuln) + analysis['indicators'] = indicators + + # Categorize the vulnerability type + analysis['vulnerability_category'] = self._categorize_vulnerability(vuln) + + return analysis + + def _calculate_threat_score(self, vuln: dict) -> int: + """Calculate threat priority score (0-100) based on KEV data""" + score = 0 + + # Base score for being in CISA KEV + score += 50 # High base score since it's actively exploited + + # Ransomware usage factor (0-25 points) + ransomware_use = vuln.get('knownRansomwareCampaignUse', 'Unknown').lower() + if ransomware_use == 'known': + score += 25 + elif ransomware_use == 'unknown': + score += 10 # Still concerning but less certain + + # Recency factor (0-15 points) + try: + date_added = datetime.strptime(vuln.get('dateAdded', ''), '%Y-%m-%d') + days_since_added = (datetime.now() - date_added).days + if days_since_added <= 30: # Very recent + score += 15 + elif days_since_added <= 90: # Recent + score += 10 + elif days_since_added <= 365: # This year + score += 5 + except: + pass + + # Urgency based on due date (0-10 points) + try: + due_date = datetime.strptime(vuln.get('dueDate', ''), '%Y-%m-%d') + days_to_due = (due_date - datetime.now()).days + if days_to_due <= 0: # Overdue + score += 10 + elif days_to_due <= 7: # Due soon + score += 8 + elif days_to_due <= 30: # Due this month + score += 5 + except: + pass + + return min(score, 100) # Cap at 100 + + def _get_threat_level(self, score: int) -> str: + """Get threat level based on score""" + if score >= 85: + return 'critical' + elif score >= 70: + return 'high' + elif score >= 55: + return 'medium' + else: + return 'low' + + def _extract_kev_indicators(self, vuln: dict) -> dict: + """Extract threat indicators from CISA KEV vulnerability data""" + indicators = { + 'attack_vectors': [], + 'affected_products': [], + 'required_actions': [], + 'ransomware_indicators': [], + 'vulnerability_types': [], + 'mitigation_techniques': [] + } + + # Extract from vulnerability name and description + text_sources = [ + vuln.get('vulnerabilityName', ''), + vuln.get('shortDescription', ''), + vuln.get('requiredAction', ''), + vuln.get('notes', '') + ] + + full_text = ' '.join(text_sources).lower() + + # Attack vector patterns + attack_vector_patterns = [ + r'\b(remote code execution|rce)\b', + r'\b(sql injection|sqli)\b', + r'\b(cross.?site scripting|xss)\b', + r'\b(buffer overflow)\b', + r'\b(privilege escalation)\b', + r'\b(authentication bypass)\b', + r'\b(directory traversal|path traversal)\b', + r'\b(file upload)\b', + r'\b(command injection)\b', + r'\b(deserialization)\b', + r'\b(memory corruption)\b', + r'\b(use.?after.?free)\b', + r'\b(heap overflow)\b', + r'\b(stack overflow)\b' + ] + + for pattern in attack_vector_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators['attack_vectors'].extend(matches) + + # Product and vendor information + vendor = vuln.get('vendorProject', '') + product = vuln.get('product', '') + if vendor and product: + indicators['affected_products'].append(f"{vendor} {product}") + + # Required action keywords + required_action = vuln.get('requiredAction', '') + action_patterns = [ + r'\b(patch|update|upgrade)\b', + r'\b(disable|remove|uninstall)\b', + r'\b(configure|reconfigure)\b', + r'\b(firewall|network segmentation)\b', + r'\b(monitor|logging)\b', + r'\b(backup|restore)\b' + ] + + for pattern in action_patterns: + matches = re.findall(pattern, required_action, re.IGNORECASE) + indicators['required_actions'].extend(matches) + + # Ransomware indicators + if vuln.get('knownRansomwareCampaignUse') == 'Known': + indicators['ransomware_indicators'].append('known_ransomware_use') + + # Vulnerability type classification + vuln_type_patterns = [ + r'\b(zero.?day|0.?day)\b', + r'\b(denial.?of.?service|dos)\b', + r'\b(information disclosure)\b', + r'\b(data breach)\b', + r'\b(backdoor)\b', + r'\b(trojan)\b', + r'\b(webshell)\b' + ] + + for pattern in vuln_type_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators['vulnerability_types'].extend(matches) + + # Clean up and deduplicate + for key in indicators: + indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 2])) + indicators[key] = indicators[key][:10] # Limit to 10 items per category + + return indicators + + def _categorize_vulnerability(self, vuln: dict) -> str: + """Categorize the vulnerability based on CISA KEV data""" + vuln_name = vuln.get('vulnerabilityName', '').lower() + description = vuln.get('shortDescription', '').lower() + full_text = f"{vuln_name} {description}" + + # Check for specific vulnerability categories + if re.search(r'\b(remote code execution|rce)\b', full_text): + return 'remote_code_execution' + elif re.search(r'\b(sql injection|sqli)\b', full_text): + return 'injection' + elif re.search(r'\b(cross.?site scripting|xss)\b', full_text): + return 'cross_site_scripting' + elif re.search(r'\b(privilege escalation)\b', full_text): + return 'privilege_escalation' + elif re.search(r'\b(authentication bypass)\b', full_text): + return 'authentication_bypass' + elif re.search(r'\b(buffer overflow|memory corruption)\b', full_text): + return 'memory_corruption' + elif re.search(r'\b(directory traversal|path traversal)\b', full_text): + return 'path_traversal' + elif re.search(r'\b(denial.?of.?service|dos)\b', full_text): + return 'denial_of_service' + elif re.search(r'\b(information disclosure)\b', full_text): + return 'information_disclosure' + else: + return 'other' + + async def sync_cve_kev_data(self, cve_id: str) -> dict: + """Synchronize CISA KEV data for a specific CVE""" + from main import CVE, SigmaRule + + # Get existing CVE + cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first() + if not cve: + logger.warning(f"CVE {cve_id} not found in database") + return {"error": "CVE not found"} + + # Fetch CISA KEV catalog + kev_catalog = await self.fetch_kev_catalog() + if not kev_catalog: + return {"error": "Failed to fetch CISA KEV catalog"} + + # Search for the CVE in the KEV catalog + kev_entry = None + for vuln in kev_catalog.get('vulnerabilities', []): + if vuln.get('cveID') == cve_id: + kev_entry = vuln + break + + if not kev_entry: + logger.info(f"CVE {cve_id} not found in CISA KEV catalog") + return {"cve_id": cve_id, "kev_found": False} + + # Analyze the KEV entry + kev_analysis = self.analyze_kev_vulnerability(kev_entry) + + # Update CVE with CISA KEV data + if not cve.poc_data: + cve.poc_data = {} + + cve.poc_data['cisa_kev'] = { + 'vulnerability_data': kev_analysis, + 'catalog_version': kev_catalog.get('catalogVersion'), + 'date_released': kev_catalog.get('dateReleased'), + 'synced_at': datetime.utcnow().isoformat(), + 'source': 'cisa_kev' + } + + cve.updated_at = datetime.utcnow() + + # Update SIGMA rule with CISA KEV data + sigma_rule = self.db_session.query(SigmaRule).filter( + SigmaRule.cve_id == cve_id + ).first() + + if sigma_rule: + # Update rule with CISA KEV data + if not sigma_rule.nomi_sec_data: + sigma_rule.nomi_sec_data = {} + + sigma_rule.nomi_sec_data['cisa_kev'] = { + 'threat_level': kev_analysis['threat_level'], + 'threat_score': kev_analysis['threat_score'], + 'known_ransomware_use': kev_analysis['known_ransomware_use'], + 'vulnerability_category': kev_analysis['vulnerability_category'], + 'due_date': kev_analysis['due_date'], + 'indicators': kev_analysis['indicators'], + 'source': 'cisa_kev' + } + + # Update exploit indicators with CISA KEV data + existing_indicators = json.loads(sigma_rule.exploit_indicators) if sigma_rule.exploit_indicators else {} + kev_indicators = kev_analysis['indicators'] + + for key, values in kev_indicators.items(): + if key not in existing_indicators: + existing_indicators[key] = [] + existing_indicators[key].extend(values) + existing_indicators[key] = list(set(existing_indicators[key])) + + sigma_rule.exploit_indicators = json.dumps(existing_indicators) + sigma_rule.updated_at = datetime.utcnow() + + self.db_session.commit() + + logger.info(f"Synchronized CISA KEV data for {cve_id} (Threat Level: {kev_analysis['threat_level']})") + + return { + "cve_id": cve_id, + "kev_found": True, + "threat_level": kev_analysis['threat_level'], + "threat_score": kev_analysis['threat_score'], + "known_ransomware_use": kev_analysis['known_ransomware_use'], + "vulnerability_category": kev_analysis['vulnerability_category'], + "source": "cisa_kev" + } + + async def bulk_sync_kev_data(self, batch_size: int = 100, cancellation_flag: Optional[callable] = None) -> dict: + """Synchronize CISA KEV data for all matching CVEs""" + from main import CVE, BulkProcessingJob + + # Create bulk processing job + job = BulkProcessingJob( + job_type='cisa_kev_sync', + status='running', + started_at=datetime.utcnow(), + job_metadata={'batch_size': batch_size} + ) + self.db_session.add(job) + self.db_session.commit() + + total_processed = 0 + total_found = 0 + results = [] + + try: + # Fetch CISA KEV catalog first + kev_catalog = await self.fetch_kev_catalog(force_refresh=True) + if not kev_catalog: + raise Exception("Failed to fetch CISA KEV catalog") + + # Extract all CVE IDs from the KEV catalog + kev_cve_ids = [vuln.get('cveID') for vuln in kev_catalog.get('vulnerabilities', []) if vuln.get('cveID')] + + # Get CVEs that exist in our database and match KEV catalog + cves = self.db_session.query(CVE).filter(CVE.cve_id.in_(kev_cve_ids)).all() + + job.total_items = len(cves) + self.db_session.commit() + + logger.info(f"Found {len(cves)} CVEs matching CISA KEV catalog") + + # Process in batches + for i in range(0, len(cves), batch_size): + # Check for cancellation + if cancellation_flag and cancellation_flag(): + logger.info("CISA KEV sync cancelled by user") + job.status = 'cancelled' + job.cancelled_at = datetime.utcnow() + job.error_message = "Job cancelled by user" + break + + batch = cves[i:i + batch_size] + + for cve in batch: + # Check for cancellation + if cancellation_flag and cancellation_flag(): + logger.info("CISA KEV sync cancelled by user") + job.status = 'cancelled' + job.cancelled_at = datetime.utcnow() + job.error_message = "Job cancelled by user" + break + + try: + result = await self.sync_cve_kev_data(cve.cve_id) + total_processed += 1 + + if result.get("kev_found", False): + total_found += 1 + results.append(result) + + job.processed_items += 1 + + # Small delay to be respectful to CISA + await asyncio.sleep(0.1) + + except Exception as e: + logger.error(f"Error syncing CISA KEV for {cve.cve_id}: {e}") + job.failed_items += 1 + + # Break out of outer loop if cancelled + if job.status == 'cancelled': + break + + # Commit after each batch + self.db_session.commit() + logger.info(f"Processed CISA KEV batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}") + + # Update job status + if job.status != 'cancelled': + job.status = 'completed' + job.completed_at = datetime.utcnow() + + job.job_metadata.update({ + 'total_processed': total_processed, + 'total_kev_found': total_found, + 'cves_with_kev': len(results), + 'catalog_version': kev_catalog.get('catalogVersion'), + 'source': 'cisa_kev' + }) + + except Exception as e: + job.status = 'failed' + job.error_message = str(e) + job.completed_at = datetime.utcnow() + logger.error(f"Bulk CISA KEV sync job failed: {e}") + + finally: + self.db_session.commit() + + return { + 'job_id': str(job.id), + 'status': job.status, + 'total_processed': total_processed, + 'total_kev_found': total_found, + 'cves_with_kev': len(results), + 'source': 'cisa_kev' + } + + async def get_kev_sync_status(self) -> dict: + """Get CISA KEV synchronization status""" + from main import CVE + + # Count CVEs with CISA KEV data + total_cves = self.db_session.query(CVE).count() + + result = self.db_session.execute( + text("SELECT COUNT(*) FROM cves WHERE poc_data::text LIKE '%\"cisa_kev\"%'") + ) + cves_with_kev = result.scalar() + + # Get catalog information if available + catalog_info = {} + if self.kev_cache: + catalog_info = { + 'catalog_version': self.kev_cache.get('catalogVersion'), + 'date_released': self.kev_cache.get('dateReleased'), + 'total_vulnerabilities': self.kev_cache.get('count', 0), + 'cache_age_seconds': int(datetime.now().timestamp() - (self.cache_timestamp or 0)) + } + + return { + 'total_cves': total_cves, + 'cves_with_kev_data': cves_with_kev, + 'kev_coverage': (cves_with_kev / total_cves * 100) if total_cves > 0 else 0, + 'kev_sync_status': 'active' if cves_with_kev > 0 else 'pending', + 'catalog_info': catalog_info, + 'source': 'cisa_kev' + } \ No newline at end of file diff --git a/backend/exploitdb_client_local.py b/backend/exploitdb_client_local.py new file mode 100644 index 0000000..91ef50f --- /dev/null +++ b/backend/exploitdb_client_local.py @@ -0,0 +1,725 @@ +""" +ExploitDB Local Filesystem Integration Client +Interfaces with the local ExploitDB submodule at exploit-db-mirror/ +""" + +import os +import re +import json +import logging +from datetime import datetime +from typing import Dict, List, Optional, Tuple +from sqlalchemy.orm import Session +from pathlib import Path + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class ExploitDBLocalClient: + """Client for interfacing with local ExploitDB mirror filesystem""" + + def __init__(self, db_session: Session): + self.db_session = db_session + + # Path to the local exploit-db-mirror submodule (in container: /app/exploit-db-mirror) + self.exploitdb_path = Path("/app/exploit-db-mirror") + self.exploits_path = self.exploitdb_path / "exploits" + + # ExploitDB URL pattern for mapping + self.exploit_url_pattern = re.compile(r'https?://(?:www\.)?exploit-db\.com/exploits/(\d+)') + + # Cache for file searches + self.file_cache = {} + + # Build file index on initialization + self._build_file_index() + + def _build_file_index(self): + """Build an index of exploit ID to file path for fast lookups""" + logger.info("Building ExploitDB file index...") + self.file_index = {} + + if not self.exploits_path.exists(): + logger.error(f"ExploitDB path not found: {self.exploits_path}") + return + + # Walk through all exploit files + for root, dirs, files in os.walk(self.exploits_path): + for file in files: + # Extract exploit ID from filename (e.g., "12345.py" -> "12345") + match = re.match(r'^(\d+)\.(\w+)$', file) + if match: + exploit_id = match.group(1) + file_extension = match.group(2) + file_path = Path(root) / file + + # Store in index + self.file_index[exploit_id] = { + 'path': file_path, + 'filename': file, + 'extension': file_extension, + 'category': self._extract_category_from_path(file_path), + 'subcategory': self._extract_subcategory_from_path(file_path) + } + + logger.info(f"Built index with {len(self.file_index)} exploits") + + def _extract_category_from_path(self, file_path: Path) -> str: + """Extract category from file path (e.g., linux, windows, etc.)""" + parts = file_path.parts + exploits_index = None + + for i, part in enumerate(parts): + if part == "exploits": + exploits_index = i + break + + if exploits_index and exploits_index + 1 < len(parts): + return parts[exploits_index + 1] + + return "unknown" + + def _extract_subcategory_from_path(self, file_path: Path) -> str: + """Extract subcategory from file path (e.g., local, remote, webapps, etc.)""" + parts = file_path.parts + exploits_index = None + + for i, part in enumerate(parts): + if part == "exploits": + exploits_index = i + break + + if exploits_index and exploits_index + 2 < len(parts): + return parts[exploits_index + 2] + + return "unknown" + + def extract_exploit_id_from_url(self, url: str) -> Optional[str]: + """Extract exploit ID from ExploitDB URL""" + match = self.exploit_url_pattern.search(url) + if match: + return match.group(1) + return None + + def get_exploit_details(self, exploit_id: str) -> Optional[dict]: + """Get exploit details from local filesystem""" + if exploit_id not in self.file_index: + logger.debug(f"Exploit {exploit_id} not found in local index") + return None + + file_info = self.file_index[exploit_id] + file_path = file_info['path'] + + try: + # Read file content + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + # Get file stats + stat = file_path.stat() + + return { + 'id': exploit_id, + 'filename': file_info['filename'], + 'path': str(file_path.relative_to(self.exploitdb_path)), + 'full_path': str(file_path), + 'category': file_info['category'], + 'subcategory': file_info['subcategory'], + 'extension': file_info['extension'], + 'content': content, + 'size': stat.st_size, + 'modified_time': datetime.fromtimestamp(stat.st_mtime), + 'local_url': f"file://{file_path}" + } + + except Exception as e: + logger.error(f"Error reading exploit file {file_path}: {e}") + return None + + def analyze_exploit_content(self, exploit_data: dict) -> dict: + """Analyze exploit content to extract indicators""" + if not exploit_data or not exploit_data.get('content'): + return {} + + content = exploit_data['content'] + indicators = { + 'processes': [], + 'files': [], + 'network': [], + 'registry': [], + 'commands': [], + 'urls': [], + 'techniques': [], + 'languages': [], + 'platforms': [], + 'syscalls': [], + 'functions': [] + } + + # Determine programming language from extension + extension = exploit_data.get('extension', '').lower() + language_map = { + 'py': 'python', + 'rb': 'ruby', + 'pl': 'perl', + 'c': 'c', + 'cpp': 'cpp', + 'cc': 'cpp', + 'cxx': 'cpp', + 'sh': 'bash', + 'ps1': 'powershell', + 'java': 'java', + 'js': 'javascript', + 'php': 'php', + 'asp': 'asp', + 'aspx': 'aspx', + 'jsp': 'jsp', + 'go': 'go', + 'rs': 'rust', + 'asm': 'assembly', + 's': 'assembly', + 'nasm': 'assembly' + } + + if extension in language_map: + indicators['languages'].append(language_map[extension]) + + # Extract platform from path + category = exploit_data.get('category', '').lower() + if category in ['linux', 'windows', 'osx', 'macos', 'android', 'freebsd', 'solaris']: + indicators['platforms'].append(category) + + # Extract indicators from content + content_lower = content.lower() + + # Process patterns - enhanced for different languages + process_patterns = [ + r'\b(cmd\.exe|powershell\.exe|bash|sh|python|ruby|perl|java)\b', + r'\b(system|exec|popen|subprocess|shell_exec|eval|execve|execl|execlp)\b', + r'\b(createprocess|shellexecute|winexec|createthread)\b', + r'\b(mshta|rundll32|regsvr32|wscript|cscript|certutil|bitsadmin)\b', + r'\b(/bin/sh|/bin/bash|/usr/bin/python|/usr/bin/perl)\b' + ] + + for pattern in process_patterns: + matches = re.findall(pattern, content, re.IGNORECASE) + indicators['processes'].extend(matches) + + # File patterns - enhanced + file_patterns = [ + r'\b([a-zA-Z]:\\[^\\\s"\']+\\[^\\\s"\']+\.[a-zA-Z0-9]+)\b', # Windows paths + r'\b(/[^/\s"\']+/[^/\s"\']+\.[a-zA-Z0-9]+)\b', # Unix paths + r'\b(\w+\.(exe|dll|so|dylib|bat|ps1|py|sh|jar|war|php|jsp|asp|aspx|txt|log|conf))\b', + r'\b(/tmp/[^\s"\']+)\b', # Temp files + r'\b(/etc/[^\s"\']+)\b', # Config files + r'\b(/var/[^\s"\']+)\b', # Var files + r'\b(/proc/[^\s"\']+)\b' # Proc files + ] + + for pattern in file_patterns: + matches = re.findall(pattern, content, re.IGNORECASE) + if matches and isinstance(matches[0], tuple): + indicators['files'].extend([m[0] for m in matches]) + else: + indicators['files'].extend(matches) + + # Network patterns - enhanced + network_patterns = [ + r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', # IP addresses + r'\b(https?://[^\s<>"\']+)\b', # URLs + r'\b([a-zA-Z0-9-]+\.[a-zA-Z]{2,})\b', # Domain names + r'\b(bind|connect|listen|socket|recv|send|accept)\b', # Network functions + r'\b(AF_INET|SOCK_STREAM|SOCK_DGRAM)\b', # Socket constants + r'\b(nc|netcat|ncat|telnet|ssh|ftp|wget|curl)\b' # Network tools + ] + + for pattern in network_patterns: + matches = re.findall(pattern, content, re.IGNORECASE) + if 'http' in pattern: + indicators['urls'].extend(matches) + else: + indicators['network'].extend(matches) + + # Command patterns - enhanced + command_patterns = [ + r'\b(curl|wget|nc|netcat|telnet|ssh|ftp)\b', + r'\b(whoami|id|uname|systeminfo|ipconfig|ifconfig|netstat|ps|top|lsof)\b', + r'\b(cat|type|dir|ls|find|grep|awk|sed|sort|uniq)\b', + r'\b(echo|printf|print)\b', + r'\b(base64|decode|encode|openssl|gpg)\b', + r'\b(sudo|su|chmod|chown|mount|umount)\b', + r'\b(service|systemctl|chkconfig|update-rc\.d)\b' + ] + + for pattern in command_patterns: + matches = re.findall(pattern, content, re.IGNORECASE) + indicators['commands'].extend(matches) + + # Registry patterns (Windows-specific) + registry_patterns = [ + r'\b(HKEY_[A-Z_]+)\b', + r'\b(HKLM|HKCU|HKCR|HKU|HKCC)\b', + r'\b(reg\s+add|reg\s+query|reg\s+delete|regedit)\b', + r'\b(SOFTWARE\\\\[^\\\s"\']+)\b', + r'\b(SYSTEM\\\\[^\\\s"\']+)\b', + r'\b(CurrentVersion\\\\[^\\\s"\']+)\b' + ] + + for pattern in registry_patterns: + matches = re.findall(pattern, content, re.IGNORECASE) + indicators['registry'].extend(matches) + + # System call patterns (Linux/Unix) + syscall_patterns = [ + r'\b(open|close|read|write|lseek|stat|fstat|lstat)\b', + r'\b(fork|vfork|clone|execve|wait|waitpid)\b', + r'\b(socket|bind|listen|accept|connect|send|recv)\b', + r'\b(mmap|munmap|mprotect|brk|sbrk)\b', + r'\b(ptrace|kill|signal|alarm)\b' + ] + + for pattern in syscall_patterns: + matches = re.findall(pattern, content, re.IGNORECASE) + indicators['syscalls'].extend(matches) + + # Function patterns + function_patterns = [ + r'\b(main|printf|scanf|malloc|free|strcpy|strcat|strlen)\b', + r'\b(gets|puts|fgets|fputs|fopen|fclose|fread|fwrite)\b', + r'\b(sprintf|snprintf|memcpy|memset|strcmp|strncmp)\b' + ] + + for pattern in function_patterns: + matches = re.findall(pattern, content, re.IGNORECASE) + indicators['functions'].extend(matches) + + # Clean up and deduplicate + for key in indicators: + # Remove empty strings and duplicates + indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 1])) + # Limit to reasonable number of indicators + indicators[key] = indicators[key][:25] + + return indicators + + def calculate_exploit_quality_score(self, exploit_data: dict) -> dict: + """Calculate quality score for an exploit""" + quality_score = 0 + factors = {} + + # File size factor (0-25 points) + file_size = exploit_data.get('size', 0) + if file_size > 0: + if file_size > 10000: # Large files (10KB+) + size_score = 25 + elif file_size > 5000: # Medium files (5KB+) + size_score = 20 + elif file_size > 1000: # Small files (1KB+) + size_score = 15 + elif file_size > 500: # Very small files (500B+) + size_score = 10 + else: # Tiny files + size_score = 5 + quality_score += size_score + factors['size_score'] = size_score + + # Content analysis factor (0-30 points) + content = exploit_data.get('content', '') + if content: + content_score = 0 + lines = content.split('\n') + + # Check for comments and documentation + comment_lines = 0 + for line in lines: + stripped = line.strip() + if (stripped.startswith('#') or stripped.startswith('//') or + stripped.startswith('/*') or stripped.startswith('*') or + stripped.startswith('"""') or stripped.startswith("'''")): + comment_lines += 1 + + comment_score = min(comment_lines, 10) # Up to 10 points + content_score += comment_score + + # Check for function definitions + function_patterns = [ + r'\bdef\s+\w+', # Python + r'\bfunction\s+\w+', # JavaScript + r'\bvoid\s+\w+', # C/C++ + r'\bint\s+\w+', # C/C++ + r'\bchar\s+\w+', # C/C++ + r'\bsub\s+\w+', # Perl + r'^\w+\s*\(', # Generic function calls + ] + + function_count = 0 + for pattern in function_patterns: + matches = re.findall(pattern, content, re.IGNORECASE | re.MULTILINE) + function_count += len(matches) + + function_score = min(function_count, 15) # Up to 15 points + content_score += function_score + + # Check for include/import statements + include_patterns = [ + r'#include\s*[<"]', # C/C++ + r'import\s+\w+', # Python/Java + r'require\s+\w+', # Ruby/Perl + r'use\s+\w+', # Perl + ] + + include_count = 0 + for pattern in include_patterns: + matches = re.findall(pattern, content, re.IGNORECASE) + include_count += len(matches) + + include_score = min(include_count, 5) # Up to 5 points + content_score += include_score + + quality_score += content_score + factors['content_score'] = content_score + + # Platform/category factor (0-20 points) + category = exploit_data.get('category', '').lower() + subcategory = exploit_data.get('subcategory', '').lower() + + platform_score = 0 + if category in ['linux', 'windows', 'osx', 'macos']: + platform_score += 10 + elif category in ['android', 'freebsd', 'solaris']: + platform_score += 8 + elif category in ['multiple', 'unix']: + platform_score += 6 + + if subcategory in ['local', 'remote']: + platform_score += 10 + elif subcategory in ['webapps', 'dos']: + platform_score += 8 + elif subcategory in ['shellcode']: + platform_score += 6 + + quality_score += platform_score + factors['platform_score'] = platform_score + + # Language factor (0-15 points) + extension = exploit_data.get('extension', '').lower() + + lang_score = 0 + if extension in ['c', 'cpp', 'cc', 'cxx']: # Compiled languages + lang_score = 15 + elif extension in ['py', 'rb', 'pl', 'java']: # High-level languages + lang_score = 12 + elif extension in ['sh', 'ps1', 'bat']: # Scripting languages + lang_score = 8 + elif extension in ['asm', 's', 'nasm']: # Assembly + lang_score = 10 + elif extension in ['php', 'asp', 'aspx', 'jsp']: # Web languages + lang_score = 6 + elif extension in ['txt', 'html']: # Text/docs + lang_score = 3 + + quality_score += lang_score + factors['language_score'] = lang_score + + # File age factor (0-10 points) - newer exploits might be more relevant + modified_time = exploit_data.get('modified_time') + if modified_time: + days_old = (datetime.now() - modified_time).days + if days_old < 365: # Less than 1 year + age_score = 10 + elif days_old < 365 * 3: # Less than 3 years + age_score = 8 + elif days_old < 365 * 5: # Less than 5 years + age_score = 6 + elif days_old < 365 * 10: # Less than 10 years + age_score = 4 + else: # Very old + age_score = 2 + + quality_score += age_score + factors['age_score'] = age_score + + # Determine quality tier + quality_tier = self._get_exploit_quality_tier(quality_score) + + return { + 'quality_score': quality_score, + 'factors': factors, + 'quality_tier': quality_tier + } + + def _get_exploit_quality_tier(self, score: int) -> str: + """Get quality tier based on score""" + if score >= 80: + return 'excellent' + elif score >= 65: + return 'good' + elif score >= 45: + return 'fair' + elif score >= 25: + return 'poor' + else: + return 'very_poor' + + async def sync_cve_exploits(self, cve_id: str) -> dict: + """Synchronize ExploitDB data for a specific CVE using local filesystem""" + from main import CVE, SigmaRule + + # Get existing CVE + cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first() + if not cve: + logger.warning(f"CVE {cve_id} not found in database") + return {"error": "CVE not found"} + + # Extract ExploitDB URLs from reference URLs + exploit_urls = [] + if cve.reference_urls: + for url in cve.reference_urls: + exploit_id = self.extract_exploit_id_from_url(url) + if exploit_id: + exploit_urls.append((url, exploit_id)) + + if not exploit_urls: + logger.info(f"No ExploitDB URLs found for {cve_id}") + return {"cve_id": cve_id, "exploits_found": 0} + + # Fetch exploit details from local filesystem + exploit_data = [] + total_quality_score = 0 + + for url, exploit_id in exploit_urls: + try: + details = self.get_exploit_details(exploit_id) + if details: + # Analyze exploit content + indicators = self.analyze_exploit_content(details) + quality_analysis = self.calculate_exploit_quality_score(details) + + exploit_entry = { + 'id': exploit_id, + 'url': url, + 'filename': details.get('filename'), + 'path': details.get('path'), + 'category': details.get('category'), + 'subcategory': details.get('subcategory'), + 'extension': details.get('extension'), + 'size': details.get('size'), + 'modified_time': details.get('modified_time').isoformat() if details.get('modified_time') else None, + 'local_url': details.get('local_url'), + 'indicators': indicators, + 'quality_analysis': quality_analysis + } + + exploit_data.append(exploit_entry) + total_quality_score += quality_analysis['quality_score'] + logger.info(f"Successfully processed exploit {exploit_id} from local filesystem") + + except Exception as e: + logger.error(f"Error processing exploit {exploit_id}: {e}") + + # Update CVE with ExploitDB data + if exploit_data: + # Store in existing poc_data field + if not cve.poc_data: + cve.poc_data = {} + + cve.poc_data['exploitdb'] = { + 'exploits': exploit_data, + 'total_exploits': len(exploit_data), + 'average_quality': total_quality_score // len(exploit_data) if exploit_data else 0, + 'synced_at': datetime.utcnow().isoformat(), + 'source': 'local_filesystem' + } + + cve.updated_at = datetime.utcnow() + + # Update SIGMA rule with ExploitDB data + sigma_rule = self.db_session.query(SigmaRule).filter( + SigmaRule.cve_id == cve_id + ).first() + + if sigma_rule: + # Combine indicators from all exploits + combined_indicators = {} + for exploit in exploit_data: + for key, values in exploit['indicators'].items(): + if key not in combined_indicators: + combined_indicators[key] = [] + combined_indicators[key].extend(values) + + # Deduplicate + for key in combined_indicators: + combined_indicators[key] = list(set(combined_indicators[key])) + + # Update rule with ExploitDB data + if not sigma_rule.nomi_sec_data: + sigma_rule.nomi_sec_data = {} + + sigma_rule.nomi_sec_data['exploitdb'] = { + 'total_exploits': len(exploit_data), + 'average_quality': total_quality_score // len(exploit_data) if exploit_data else 0, + 'best_exploit': max(exploit_data, key=lambda x: x['quality_analysis']['quality_score']) if exploit_data else None, + 'indicators': combined_indicators, + 'source': 'local_filesystem' + } + + # Update exploit indicators + existing_indicators = json.loads(sigma_rule.exploit_indicators) if sigma_rule.exploit_indicators else {} + for key, values in combined_indicators.items(): + if key not in existing_indicators: + existing_indicators[key] = [] + existing_indicators[key].extend(values) + existing_indicators[key] = list(set(existing_indicators[key])) + + sigma_rule.exploit_indicators = json.dumps(existing_indicators) + sigma_rule.updated_at = datetime.utcnow() + + self.db_session.commit() + + logger.info(f"Synchronized {len(exploit_data)} ExploitDB exploits for {cve_id} from local filesystem") + + return { + "cve_id": cve_id, + "exploits_found": len(exploit_data), + "total_quality_score": total_quality_score, + "average_quality": total_quality_score // len(exploit_data) if exploit_data else 0, + "exploit_urls": [e['url'] for e in exploit_data], + "source": "local_filesystem" + } + + async def bulk_sync_exploitdb(self, batch_size: int = 50, cancellation_flag: Optional[callable] = None) -> dict: + """Synchronize ExploitDB data for all CVEs with ExploitDB references using local filesystem""" + from main import CVE, BulkProcessingJob + from sqlalchemy import text + + # Create bulk processing job + job = BulkProcessingJob( + job_type='exploitdb_sync_local', + status='running', + started_at=datetime.utcnow(), + job_metadata={'batch_size': batch_size, 'source': 'local_filesystem'} + ) + self.db_session.add(job) + self.db_session.commit() + + total_processed = 0 + total_found = 0 + results = [] + + try: + # Get all CVEs with ExploitDB references using text search + cves = self.db_session.query(CVE).filter( + text("reference_urls::text LIKE '%exploit-db%'") + ).all() + + job.total_items = len(cves) + self.db_session.commit() + + logger.info(f"Found {len(cves)} CVEs with ExploitDB references for local sync") + + # Process in batches + for i in range(0, len(cves), batch_size): + # Check for cancellation + if cancellation_flag and cancellation_flag(): + logger.info("ExploitDB local sync cancelled by user") + job.status = 'cancelled' + job.cancelled_at = datetime.utcnow() + job.error_message = "Job cancelled by user" + break + + batch = cves[i:i + batch_size] + + for cve in batch: + # Check for cancellation + if cancellation_flag and cancellation_flag(): + logger.info("ExploitDB local sync cancelled by user") + job.status = 'cancelled' + job.cancelled_at = datetime.utcnow() + job.error_message = "Job cancelled by user" + break + + try: + result = await self.sync_cve_exploits(cve.cve_id) + total_processed += 1 + + if result.get("exploits_found", 0) > 0: + total_found += result["exploits_found"] + results.append(result) + + job.processed_items += 1 + + # Very small delay for responsiveness + # No need for long delays with local filesystem + + except Exception as e: + logger.error(f"Error syncing ExploitDB for {cve.cve_id}: {e}") + job.failed_items += 1 + + # Break out of outer loop if cancelled + if job.status == 'cancelled': + break + + # Commit after each batch + self.db_session.commit() + logger.info(f"Processed ExploitDB local batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}") + + # Update job status + if job.status != 'cancelled': + job.status = 'completed' + job.completed_at = datetime.utcnow() + + job.job_metadata.update({ + 'total_processed': total_processed, + 'total_exploits_found': total_found, + 'cves_with_exploits': len(results), + 'source': 'local_filesystem' + }) + + except Exception as e: + job.status = 'failed' + job.error_message = str(e) + job.completed_at = datetime.utcnow() + logger.error(f"Bulk ExploitDB local sync job failed: {e}") + + finally: + self.db_session.commit() + + return { + 'job_id': str(job.id), + 'status': job.status, + 'total_processed': total_processed, + 'total_exploits_found': total_found, + 'cves_with_exploits': len(results), + 'source': 'local_filesystem' + } + + async def get_exploitdb_sync_status(self) -> dict: + """Get ExploitDB synchronization status for local filesystem""" + from main import CVE + from sqlalchemy import text + + # Count CVEs with ExploitDB references + total_cves = self.db_session.query(CVE).count() + + # Count CVEs with ExploitDB data + result = self.db_session.execute( + text("SELECT COUNT(*) FROM cves WHERE poc_data::text LIKE '%\"exploitdb\"%'") + ) + cves_with_exploitdb = result.scalar() + + # Count CVEs with ExploitDB URLs in references + result2 = self.db_session.execute( + text("SELECT COUNT(*) FROM cves WHERE reference_urls::text LIKE '%exploit-db%'") + ) + cves_with_exploitdb_refs = result2.scalar() + + return { + 'total_cves': total_cves, + 'cves_with_exploitdb_refs': cves_with_exploitdb_refs, + 'cves_with_exploitdb_data': cves_with_exploitdb, + 'exploitdb_coverage': (cves_with_exploitdb / cves_with_exploitdb_refs * 100) if cves_with_exploitdb_refs > 0 else 0, + 'exploitdb_sync_status': 'active' if cves_with_exploitdb > 0 else 'pending', + 'exploitdb_local_index_size': len(self.file_index), + 'source': 'local_filesystem' + } \ No newline at end of file diff --git a/backend/main.py b/backend/main.py index e5a29d1..876b3da 100644 --- a/backend/main.py +++ b/backend/main.py @@ -154,6 +154,14 @@ class GitHubPoCSyncRequest(BaseModel): cve_id: Optional[str] = None batch_size: int = 50 +class ExploitDBSyncRequest(BaseModel): + cve_id: Optional[str] = None + batch_size: int = 30 + +class CISAKEVSyncRequest(BaseModel): + cve_id: Optional[str] = None + batch_size: int = 100 + class RuleRegenRequest(BaseModel): force: bool = False @@ -1224,6 +1232,238 @@ async def sync_github_pocs(background_tasks: BackgroundTasks, "batch_size": request.batch_size } +@app.post("/api/sync-exploitdb") +async def sync_exploitdb(background_tasks: BackgroundTasks, + request: ExploitDBSyncRequest, + db: Session = Depends(get_db)): + """Synchronize ExploitDB data from git mirror""" + + # Create job record + job = BulkProcessingJob( + job_type='exploitdb_sync', + status='pending', + job_metadata={ + 'cve_id': request.cve_id, + 'batch_size': request.batch_size + } + ) + db.add(job) + db.commit() + db.refresh(job) + + job_id = str(job.id) + running_jobs[job_id] = job + job_cancellation_flags[job_id] = False + + async def sync_task(): + # Create a new database session for the background task + task_db = SessionLocal() + try: + # Get the job in the new session + task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first() + if not task_job: + logger.error(f"Job {job_id} not found in task session") + return + + task_job.status = 'running' + task_job.started_at = datetime.utcnow() + task_db.commit() + + from exploitdb_client_local import ExploitDBLocalClient + client = ExploitDBLocalClient(task_db) + + if request.cve_id: + # Sync specific CVE + if job_cancellation_flags.get(job_id, False): + logger.info(f"Job {job_id} cancelled before starting") + return + + result = await client.sync_cve_exploits(request.cve_id) + logger.info(f"ExploitDB sync for {request.cve_id}: {result}") + else: + # Sync all CVEs with cancellation support + result = await client.bulk_sync_exploitdb( + batch_size=request.batch_size, + cancellation_flag=lambda: job_cancellation_flags.get(job_id, False) + ) + logger.info(f"ExploitDB bulk sync completed: {result}") + + # Update job status if not cancelled + if not job_cancellation_flags.get(job_id, False): + task_job.status = 'completed' + task_job.completed_at = datetime.utcnow() + task_db.commit() + + except Exception as e: + if not job_cancellation_flags.get(job_id, False): + # Get the job again in case it was modified + task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first() + if task_job: + task_job.status = 'failed' + task_job.error_message = str(e) + task_job.completed_at = datetime.utcnow() + task_db.commit() + + logger.error(f"ExploitDB sync failed: {e}") + import traceback + traceback.print_exc() + finally: + # Clean up tracking and close the task session + running_jobs.pop(job_id, None) + job_cancellation_flags.pop(job_id, None) + task_db.close() + + background_tasks.add_task(sync_task) + + return { + "message": f"ExploitDB sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"), + "status": "started", + "job_id": job_id, + "cve_id": request.cve_id, + "batch_size": request.batch_size + } + +@app.post("/api/sync-cisa-kev") +async def sync_cisa_kev(background_tasks: BackgroundTasks, + request: CISAKEVSyncRequest, + db: Session = Depends(get_db)): + """Synchronize CISA Known Exploited Vulnerabilities data""" + + # Create job record + job = BulkProcessingJob( + job_type='cisa_kev_sync', + status='pending', + job_metadata={ + 'cve_id': request.cve_id, + 'batch_size': request.batch_size + } + ) + db.add(job) + db.commit() + db.refresh(job) + + job_id = str(job.id) + running_jobs[job_id] = job + job_cancellation_flags[job_id] = False + + async def sync_task(): + # Create a new database session for the background task + task_db = SessionLocal() + try: + # Get the job in the new session + task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first() + if not task_job: + logger.error(f"Job {job_id} not found in task session") + return + + task_job.status = 'running' + task_job.started_at = datetime.utcnow() + task_db.commit() + + from cisa_kev_client import CISAKEVClient + client = CISAKEVClient(task_db) + + if request.cve_id: + # Sync specific CVE + if job_cancellation_flags.get(job_id, False): + logger.info(f"Job {job_id} cancelled before starting") + return + + result = await client.sync_cve_kev_data(request.cve_id) + logger.info(f"CISA KEV sync for {request.cve_id}: {result}") + else: + # Sync all CVEs with cancellation support + result = await client.bulk_sync_kev_data( + batch_size=request.batch_size, + cancellation_flag=lambda: job_cancellation_flags.get(job_id, False) + ) + logger.info(f"CISA KEV bulk sync completed: {result}") + + # Update job status if not cancelled + if not job_cancellation_flags.get(job_id, False): + task_job.status = 'completed' + task_job.completed_at = datetime.utcnow() + task_db.commit() + + except Exception as e: + if not job_cancellation_flags.get(job_id, False): + # Get the job again in case it was modified + task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first() + if task_job: + task_job.status = 'failed' + task_job.error_message = str(e) + task_job.completed_at = datetime.utcnow() + task_db.commit() + + logger.error(f"CISA KEV sync failed: {e}") + import traceback + traceback.print_exc() + finally: + # Clean up tracking and close the task session + running_jobs.pop(job_id, None) + job_cancellation_flags.pop(job_id, None) + task_db.close() + + background_tasks.add_task(sync_task) + + return { + "message": f"CISA KEV sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"), + "status": "started", + "job_id": job_id, + "cve_id": request.cve_id, + "batch_size": request.batch_size + } + +@app.get("/api/exploitdb-stats") +async def get_exploitdb_stats(db: Session = Depends(get_db)): + """Get ExploitDB-related statistics""" + + try: + from exploitdb_client_local import ExploitDBLocalClient + client = ExploitDBLocalClient(db) + + # Get sync status + status = await client.get_exploitdb_sync_status() + + # Get quality distribution from ExploitDB data + quality_distribution = {} + from sqlalchemy import text + cves_with_exploitdb = db.query(CVE).filter( + text("poc_data::text LIKE '%\"exploitdb\"%'") + ).all() + + for cve in cves_with_exploitdb: + if cve.poc_data and 'exploitdb' in cve.poc_data: + exploits = cve.poc_data['exploitdb'].get('exploits', []) + for exploit in exploits: + quality_tier = exploit.get('quality_analysis', {}).get('quality_tier', 'unknown') + quality_distribution[quality_tier] = quality_distribution.get(quality_tier, 0) + 1 + + # Get category distribution + category_distribution = {} + for cve in cves_with_exploitdb: + if cve.poc_data and 'exploitdb' in cve.poc_data: + exploits = cve.poc_data['exploitdb'].get('exploits', []) + for exploit in exploits: + category = exploit.get('category', 'unknown') + category_distribution[category] = category_distribution.get(category, 0) + 1 + + return { + "exploitdb_sync_status": status, + "quality_distribution": quality_distribution, + "category_distribution": category_distribution, + "total_exploitdb_cves": len(cves_with_exploitdb), + "total_exploits": sum( + len(cve.poc_data.get('exploitdb', {}).get('exploits', [])) + for cve in cves_with_exploitdb + if cve.poc_data and 'exploitdb' in cve.poc_data + ) + } + + except Exception as e: + logger.error(f"Error getting ExploitDB stats: {e}") + raise HTTPException(status_code=500, detail=str(e)) + @app.get("/api/github-poc-stats") async def get_github_poc_stats(db: Session = Depends(get_db)): """Get GitHub PoC-related statistics""" @@ -1298,6 +1538,74 @@ async def get_github_poc_status(db: Session = Depends(get_db)): logger.error(f"Error checking GitHub PoC status: {e}") return {"error": str(e)} +@app.get("/api/cisa-kev-stats") +async def get_cisa_kev_stats(db: Session = Depends(get_db)): + """Get CISA KEV-related statistics""" + + try: + from cisa_kev_client import CISAKEVClient + client = CISAKEVClient(db) + + # Get sync status + status = await client.get_kev_sync_status() + + # Get threat level distribution from CISA KEV data + threat_level_distribution = {} + from sqlalchemy import text + cves_with_kev = db.query(CVE).filter( + text("poc_data::text LIKE '%\"cisa_kev\"%'") + ).all() + + for cve in cves_with_kev: + if cve.poc_data and 'cisa_kev' in cve.poc_data: + vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {}) + threat_level = vuln_data.get('threat_level', 'unknown') + threat_level_distribution[threat_level] = threat_level_distribution.get(threat_level, 0) + 1 + + # Get vulnerability category distribution + category_distribution = {} + for cve in cves_with_kev: + if cve.poc_data and 'cisa_kev' in cve.poc_data: + vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {}) + category = vuln_data.get('vulnerability_category', 'unknown') + category_distribution[category] = category_distribution.get(category, 0) + 1 + + # Get ransomware usage statistics + ransomware_stats = {'known': 0, 'unknown': 0} + for cve in cves_with_kev: + if cve.poc_data and 'cisa_kev' in cve.poc_data: + vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {}) + ransomware_use = vuln_data.get('known_ransomware_use', 'Unknown').lower() + if ransomware_use == 'known': + ransomware_stats['known'] += 1 + else: + ransomware_stats['unknown'] += 1 + + # Calculate average threat score + threat_scores = [] + for cve in cves_with_kev: + if cve.poc_data and 'cisa_kev' in cve.poc_data: + vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {}) + threat_score = vuln_data.get('threat_score', 0) + if threat_score: + threat_scores.append(threat_score) + + avg_threat_score = sum(threat_scores) / len(threat_scores) if threat_scores else 0 + + return { + "cisa_kev_sync_status": status, + "threat_level_distribution": threat_level_distribution, + "category_distribution": category_distribution, + "ransomware_stats": ransomware_stats, + "average_threat_score": round(avg_threat_score, 2), + "total_kev_cves": len(cves_with_kev), + "total_with_threat_scores": len(threat_scores) + } + + except Exception as e: + logger.error(f"Error getting CISA KEV stats: {e}") + raise HTTPException(status_code=500, detail=str(e)) + @app.get("/api/bulk-jobs") async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)): """Get bulk processing job status""" diff --git a/docker-compose.yml b/docker-compose.yml index af060de..8758c71 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,12 +24,18 @@ services: DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db NVD_API_KEY: ${NVD_API_KEY:-} GITHUB_TOKEN: ${GITHUB_TOKEN} + OPENAI_API_KEY: ${OPENAI_API_KEY:-} + ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} + OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434} + LLM_PROVIDER: ${LLM_PROVIDER:-ollama} + LLM_MODEL: ${LLM_MODEL:-llama3.2} depends_on: db: condition: service_healthy volumes: - ./backend:/app - ./github_poc_collector:/github_poc_collector + - ./exploit-db-mirror:/app/exploit-db-mirror command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload frontend: @@ -51,6 +57,17 @@ services: volumes: - redis_data:/data + ollama: + image: ollama/ollama:latest + ports: + - "11434:11434" + volumes: + - ollama_data:/root/.ollama + environment: + - OLLAMA_HOST=0.0.0.0 + restart: unless-stopped + volumes: postgres_data: redis_data: + ollama_data: diff --git a/exploit-db-mirror b/exploit-db-mirror new file mode 160000 index 0000000..99e10e9 --- /dev/null +++ b/exploit-db-mirror @@ -0,0 +1 @@ +Subproject commit 99e10e9ba8fa6e0db140c33eb19ea76e9a144fb8 diff --git a/frontend/src/App.js b/frontend/src/App.js index 9a815d4..4480cd1 100644 --- a/frontend/src/App.js +++ b/frontend/src/App.js @@ -19,18 +19,73 @@ function App() { const [bulkStatus, setBulkStatus] = useState({}); const [pocStats, setPocStats] = useState({}); const [gitHubPocStats, setGitHubPocStats] = useState({}); + const [exploitdbStats, setExploitdbStats] = useState({}); + const [cisaKevStats, setCisaKevStats] = useState({}); const [bulkProcessing, setBulkProcessing] = useState(false); const [hasRunningJobs, setHasRunningJobs] = useState(false); + const [runningJobTypes, setRunningJobTypes] = useState(new Set()); const [llmStatus, setLlmStatus] = useState({}); + const [exploitSyncDropdownOpen, setExploitSyncDropdownOpen] = useState(false); useEffect(() => { fetchData(); }, []); + // Close dropdown when clicking outside + useEffect(() => { + const handleClickOutside = (event) => { + if (exploitSyncDropdownOpen && !event.target.closest('.relative')) { + setExploitSyncDropdownOpen(false); + } + }; + + document.addEventListener('mousedown', handleClickOutside); + return () => { + document.removeEventListener('mousedown', handleClickOutside); + }; + }, [exploitSyncDropdownOpen]); + + // Helper functions to check if specific job types are running + const isJobTypeRunning = (jobType) => { + return runningJobTypes.has(jobType); + }; + + const isBulkSeedRunning = () => { + return isJobTypeRunning('nvd_bulk_seed') || isJobTypeRunning('bulk_seed'); + }; + + const isIncrementalUpdateRunning = () => { + return isJobTypeRunning('incremental_update'); + }; + + const isNomiSecSyncRunning = () => { + return isJobTypeRunning('nomi_sec_sync'); + }; + + const isGitHubPocSyncRunning = () => { + return isJobTypeRunning('github_poc_sync'); + }; + + const isExploitDBSyncRunning = () => { + return isJobTypeRunning('exploitdb_sync') || isJobTypeRunning('exploitdb_sync_local'); + }; + + const isCISAKEVSyncRunning = () => { + return isJobTypeRunning('cisa_kev_sync'); + }; + + const isRuleGenerationRunning = () => { + return isJobTypeRunning('rule_regeneration') || isJobTypeRunning('llm_rule_generation'); + }; + + const areAnyExploitSyncsRunning = () => { + return isNomiSecSyncRunning() || isGitHubPocSyncRunning() || isExploitDBSyncRunning() || isCISAKEVSyncRunning(); + }; + const fetchData = async () => { try { setLoading(true); - const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes, llmStatusRes] = await Promise.all([ + const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes, exploitdbStatsRes, cisaKevStatsRes, llmStatusRes] = await Promise.all([ axios.get(`${API_BASE_URL}/api/cves`), axios.get(`${API_BASE_URL}/api/sigma-rules`), axios.get(`${API_BASE_URL}/api/stats`), @@ -38,6 +93,8 @@ function App() { axios.get(`${API_BASE_URL}/api/bulk-status`), axios.get(`${API_BASE_URL}/api/poc-stats`), axios.get(`${API_BASE_URL}/api/github-poc-stats`).catch(err => ({ data: {} })), + axios.get(`${API_BASE_URL}/api/exploitdb-stats`).catch(err => ({ data: {} })), + axios.get(`${API_BASE_URL}/api/cisa-kev-stats`).catch(err => ({ data: {} })), axios.get(`${API_BASE_URL}/api/llm-status`).catch(err => ({ data: {} })) ]); @@ -48,11 +105,17 @@ function App() { setBulkStatus(bulkStatusRes.data); setPocStats(pocStatsRes.data); setGitHubPocStats(githubPocStatsRes.data); + setExploitdbStats(exploitdbStatsRes.data); + setCisaKevStats(cisaKevStatsRes.data); setLlmStatus(llmStatusRes.data); // Update running jobs state const runningJobs = bulkJobsRes.data.filter(job => job.status === 'running' || job.status === 'pending'); setHasRunningJobs(runningJobs.length > 0); + + // Update specific job types that are running + const activeJobTypes = new Set(runningJobs.map(job => job.job_type)); + setRunningJobTypes(activeJobTypes); } catch (error) { console.error('Error fetching data:', error); } finally { @@ -157,6 +220,42 @@ function App() { } }; + const syncExploitDB = async (cveId = null) => { + try { + const response = await axios.post(`${API_BASE_URL}/api/sync-exploitdb`, { + cve_id: cveId, + batch_size: 30 + }); + console.log('ExploitDB sync response:', response.data); + fetchData(); + } catch (error) { + console.error('Error syncing ExploitDB:', error); + } + }; + + const syncCISAKEV = async (cveId = null) => { + try { + const response = await axios.post(`${API_BASE_URL}/api/sync-cisa-kev`, { + cve_id: cveId, + batch_size: 100 + }); + console.log('CISA KEV sync response:', response.data); + fetchData(); + } catch (error) { + console.error('Error syncing CISA KEV:', error); + } + }; + + const syncReferences = async () => { + try { + // Placeholder for future implementation + console.log('Sync References - Not implemented yet'); + alert('Sync References functionality will be implemented in a future update'); + } catch (error) { + console.error('Error syncing references:', error); + } + }; + const regenerateRules = async (force = false) => { try { const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, { @@ -235,6 +334,8 @@ function App() {

{stats.cves_with_pocs || 0}

{(stats.poc_coverage || 0).toFixed(1)}% coverage

GitHub PoCs: {gitHubPocStats.cves_with_github_pocs || 0}

+

ExploitDB: {exploitdbStats.total_exploitdb_cves || 0}

+

CISA KEV: {cisaKevStats.total_kev_cves || 0}

Recent CVEs (7d)

@@ -244,82 +345,193 @@ function App() {

High Quality PoCs

{pocStats.high_quality_cves || 0}

Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}

-

GitHub PoCs Avg: {(gitHubPocStats.average_quality_score || 0).toFixed(1)}

+

GitHub: {(gitHubPocStats.average_quality_score || 0).toFixed(1)}

+

ExploitDB: {exploitdbStats.total_exploits || 0} exploits

+

CISA KEV: {(cisaKevStats.average_threat_score || 0).toFixed(1)} threat

- {/* Bulk Processing Controls */} + {/* Data Synchronization Controls */}
-

Bulk Processing

-
+

Data Synchronization

+ + {/* Phase 1: CVE Data Syncing */} +
+

Phase 1: CVE Data Syncing

+
+ + +
+
+ + {/* Phase 2: Exploit Data Syncing */} +
+

Phase 2: Exploit Data Syncing

+
+ + + {exploitSyncDropdownOpen && ( +
+
+ + + + +
+
+ )} +
+
+ + {/* Phase 3: Reference Data Syncing */} +
+

Phase 3: Reference Data Syncing

- - - - -
+ + {/* Phase 4: Rule Generation */} +
+

Phase 4: Rule Generation

+
+ + +
+
{/* LLM Configuration */}