add kev support, exploitDB mirror support

This commit is contained in:
Brendan McDevitt 2025-07-10 16:19:43 -05:00
parent 20b3a63c78
commit 696a1a3462
8 changed files with 1945 additions and 69 deletions

3
.gitmodules vendored
View file

@ -1,3 +1,6 @@
[submodule "github_poc_collector"] [submodule "github_poc_collector"]
path = github_poc_collector path = github_poc_collector
url = https://git.mcdevitt.tech/bpmcdevitt/github_poc_collector url = https://git.mcdevitt.tech/bpmcdevitt/github_poc_collector
[submodule "exploit-db-mirror"]
path = exploit-db-mirror
url = https://git.mcdevitt.tech/bpmcdevitt/exploit-db-mirror.git

View file

@ -10,6 +10,8 @@ from typing import Optional
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from nvd_bulk_processor import NVDBulkProcessor from nvd_bulk_processor import NVDBulkProcessor
from nomi_sec_client import NomiSecClient from nomi_sec_client import NomiSecClient
from exploitdb_client_local import ExploitDBLocalClient
from cisa_kev_client import CISAKEVClient
# Configure logging # Configure logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
@ -22,11 +24,15 @@ class BulkSeeder:
self.db_session = db_session self.db_session = db_session
self.nvd_processor = NVDBulkProcessor(db_session) self.nvd_processor = NVDBulkProcessor(db_session)
self.nomi_sec_client = NomiSecClient(db_session) self.nomi_sec_client = NomiSecClient(db_session)
self.exploitdb_client = ExploitDBLocalClient(db_session)
self.cisa_kev_client = CISAKEVClient(db_session)
async def full_bulk_seed(self, start_year: int = 2002, async def full_bulk_seed(self, start_year: int = 2002,
end_year: Optional[int] = None, end_year: Optional[int] = None,
skip_nvd: bool = False, skip_nvd: bool = False,
skip_nomi_sec: bool = False) -> dict: skip_nomi_sec: bool = False,
skip_exploitdb: bool = False,
skip_cisa_kev: bool = False) -> dict:
""" """
Perform complete bulk seeding operation Perform complete bulk seeding operation
@ -35,6 +41,8 @@ class BulkSeeder:
end_year: Ending year for NVD data (default: current year) end_year: Ending year for NVD data (default: current year)
skip_nvd: Skip NVD bulk processing (default: False) skip_nvd: Skip NVD bulk processing (default: False)
skip_nomi_sec: Skip nomi-sec PoC synchronization (default: False) skip_nomi_sec: Skip nomi-sec PoC synchronization (default: False)
skip_exploitdb: Skip ExploitDB synchronization (default: False)
skip_cisa_kev: Skip CISA KEV synchronization (default: False)
Returns: Returns:
Dictionary containing operation results Dictionary containing operation results
@ -46,6 +54,9 @@ class BulkSeeder:
'start_time': datetime.utcnow(), 'start_time': datetime.utcnow(),
'nvd_results': None, 'nvd_results': None,
'nomi_sec_results': None, 'nomi_sec_results': None,
'exploitdb_results': None,
'cisa_kev_results': None,
'sigma_results': None,
'total_time': None, 'total_time': None,
'status': 'running' 'status': 'running'
} }
@ -76,11 +87,33 @@ class BulkSeeder:
else: else:
logger.info("Phase 2: Skipping nomi-sec PoC synchronization") logger.info("Phase 2: Skipping nomi-sec PoC synchronization")
# Phase 3: Generate Enhanced SIGMA Rules # Phase 3: ExploitDB Synchronization
logger.info("Phase 3: Generating enhanced SIGMA rules...") if not skip_exploitdb:
logger.info("Phase 3: Starting ExploitDB synchronization...")
exploitdb_results = await self.exploitdb_client.bulk_sync_exploitdb(
batch_size=30 # Smaller batches for git API stability
)
results['exploitdb_results'] = exploitdb_results
logger.info(f"Phase 3 complete: {exploitdb_results['total_exploits_found']} exploits found")
else:
logger.info("Phase 3: Skipping ExploitDB synchronization")
# Phase 4: CISA KEV Synchronization
if not skip_cisa_kev:
logger.info("Phase 4: Starting CISA KEV synchronization...")
cisa_kev_results = await self.cisa_kev_client.bulk_sync_kev_data(
batch_size=100 # Can handle larger batches since data is already filtered
)
results['cisa_kev_results'] = cisa_kev_results
logger.info(f"Phase 4 complete: {cisa_kev_results['total_kev_found']} KEV entries found")
else:
logger.info("Phase 4: Skipping CISA KEV synchronization")
# Phase 5: Generate Enhanced SIGMA Rules
logger.info("Phase 5: Generating enhanced SIGMA rules...")
sigma_results = await self.generate_enhanced_sigma_rules() sigma_results = await self.generate_enhanced_sigma_rules()
results['sigma_results'] = sigma_results results['sigma_results'] = sigma_results
logger.info(f"Phase 3 complete: {sigma_results['rules_generated']} rules generated") logger.info(f"Phase 5 complete: {sigma_results['rules_generated']} rules generated")
results['status'] = 'completed' results['status'] = 'completed'
results['end_time'] = datetime.utcnow() results['end_time'] = datetime.utcnow()
@ -107,6 +140,8 @@ class BulkSeeder:
'start_time': datetime.utcnow(), 'start_time': datetime.utcnow(),
'nvd_update': None, 'nvd_update': None,
'nomi_sec_update': None, 'nomi_sec_update': None,
'exploitdb_update': None,
'cisa_kev_update': None,
'status': 'running' 'status': 'running'
} }
@ -125,6 +160,16 @@ class BulkSeeder:
recent_cves = await self._get_recently_modified_cves() recent_cves = await self._get_recently_modified_cves()
nomi_sec_update = await self._sync_specific_cves(recent_cves) nomi_sec_update = await self._sync_specific_cves(recent_cves)
results['nomi_sec_update'] = nomi_sec_update results['nomi_sec_update'] = nomi_sec_update
# Update ExploitDB data for modified CVEs
logger.info("Updating ExploitDB data for modified CVEs...")
exploitdb_update = await self._sync_specific_cves_exploitdb(recent_cves)
results['exploitdb_update'] = exploitdb_update
# Update CISA KEV data for modified CVEs
logger.info("Updating CISA KEV data for modified CVEs...")
cisa_kev_update = await self._sync_specific_cves_cisa_kev(recent_cves)
results['cisa_kev_update'] = cisa_kev_update
results['status'] = 'completed' results['status'] = 'completed'
results['end_time'] = datetime.utcnow() results['end_time'] = datetime.utcnow()
@ -221,6 +266,53 @@ class BulkSeeder:
'total_pocs_found': total_pocs_found 'total_pocs_found': total_pocs_found
} }
async def _sync_specific_cves_exploitdb(self, cve_ids: list) -> dict:
"""Sync ExploitDB data for specific CVEs"""
total_processed = 0
total_exploits_found = 0
for cve_id in cve_ids:
try:
result = await self.exploitdb_client.sync_cve_exploits(cve_id)
total_processed += 1
total_exploits_found += result.get('exploits_found', 0)
# Small delay to avoid overwhelming the API
await asyncio.sleep(0.5)
except Exception as e:
logger.error(f"Error syncing ExploitDB for {cve_id}: {e}")
continue
return {
'total_processed': total_processed,
'total_exploits_found': total_exploits_found
}
async def _sync_specific_cves_cisa_kev(self, cve_ids: list) -> dict:
"""Sync CISA KEV data for specific CVEs"""
total_processed = 0
total_kev_found = 0
for cve_id in cve_ids:
try:
result = await self.cisa_kev_client.sync_cve_kev_data(cve_id)
total_processed += 1
if result.get('kev_found', False):
total_kev_found += 1
# Small delay to be respectful to CISA
await asyncio.sleep(0.2)
except Exception as e:
logger.error(f"Error syncing CISA KEV for {cve_id}: {e}")
continue
return {
'total_processed': total_processed,
'total_kev_found': total_kev_found
}
async def get_seeding_status(self) -> dict: async def get_seeding_status(self) -> dict:
"""Get current seeding status and statistics""" """Get current seeding status and statistics"""
from main import CVE, SigmaRule, BulkProcessingJob from main import CVE, SigmaRule, BulkProcessingJob
@ -270,7 +362,9 @@ class BulkSeeder:
}, },
'recent_jobs': job_status, 'recent_jobs': job_status,
'nvd_data_status': await self._get_nvd_data_status(), 'nvd_data_status': await self._get_nvd_data_status(),
'nomi_sec_status': await self.nomi_sec_client.get_sync_status() 'nomi_sec_status': await self.nomi_sec_client.get_sync_status(),
'exploitdb_status': await self.exploitdb_client.get_exploitdb_sync_status(),
'cisa_kev_status': await self.cisa_kev_client.get_kev_sync_status()
} }
async def _get_nvd_data_status(self) -> dict: async def _get_nvd_data_status(self) -> dict:

516
backend/cisa_kev_client.py Normal file
View file

@ -0,0 +1,516 @@
"""
CISA Known Exploited Vulnerabilities (KEV) Integration Client
Interfaces with the CISA KEV catalog at https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json
"""
import aiohttp
import asyncio
import json
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple
from sqlalchemy.orm import Session
from sqlalchemy import text
import re
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CISAKEVClient:
"""Client for interfacing with CISA Known Exploited Vulnerabilities catalog"""
def __init__(self, db_session: Session):
self.db_session = db_session
# CISA KEV URLs
self.kev_catalog_url = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json"
self.kev_schema_url = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities_schema.json"
# Cache for KEV data
self.kev_cache = None
self.cache_timestamp = None
self.cache_ttl = 3600 # 1 hour cache
# Rate limiting
self.rate_limit_delay = 1.0 # 1 second between requests
self.last_request_time = 0
async def _make_request(self, session: aiohttp.ClientSession, url: str) -> Optional[dict]:
"""Make a rate-limited request to CISA"""
try:
# Rate limiting
current_time = asyncio.get_event_loop().time()
time_since_last = current_time - self.last_request_time
if time_since_last < self.rate_limit_delay:
await asyncio.sleep(self.rate_limit_delay - time_since_last)
async with session.get(url, timeout=30) as response:
self.last_request_time = asyncio.get_event_loop().time()
if response.status == 200:
return await response.json()
else:
logger.warning(f"CISA KEV request failed: {response.status} for {url}")
return None
except Exception as e:
logger.error(f"Error making request to {url}: {e}")
return None
async def fetch_kev_catalog(self, force_refresh: bool = False) -> Optional[dict]:
"""Fetch the CISA KEV catalog with caching"""
# Check cache first
if not force_refresh and self.kev_cache and self.cache_timestamp:
cache_age = datetime.now().timestamp() - self.cache_timestamp
if cache_age < self.cache_ttl:
logger.info("Using cached CISA KEV data")
return self.kev_cache
logger.info("Fetching fresh CISA KEV catalog...")
async with aiohttp.ClientSession() as session:
data = await self._make_request(session, self.kev_catalog_url)
if data:
self.kev_cache = data
self.cache_timestamp = datetime.now().timestamp()
logger.info(f"Fetched CISA KEV catalog with {data.get('count', 0)} vulnerabilities")
return data
else:
logger.error("Failed to fetch CISA KEV catalog")
return None
def analyze_kev_vulnerability(self, vuln: dict) -> dict:
"""Analyze a CISA KEV vulnerability entry to extract threat intelligence"""
analysis = {
'cve_id': vuln.get('cveID'),
'vendor_project': vuln.get('vendorProject'),
'product': vuln.get('product'),
'vulnerability_name': vuln.get('vulnerabilityName'),
'date_added': vuln.get('dateAdded'),
'short_description': vuln.get('shortDescription'),
'required_action': vuln.get('requiredAction'),
'due_date': vuln.get('dueDate'),
'known_ransomware_use': vuln.get('knownRansomwareCampaignUse', 'Unknown'),
'notes': vuln.get('notes', ''),
'cwes': vuln.get('cwes', [])
}
# Calculate threat priority score based on various factors
threat_score = self._calculate_threat_score(vuln)
analysis['threat_score'] = threat_score
analysis['threat_level'] = self._get_threat_level(threat_score)
# Extract indicators from descriptions and required actions
indicators = self._extract_kev_indicators(vuln)
analysis['indicators'] = indicators
# Categorize the vulnerability type
analysis['vulnerability_category'] = self._categorize_vulnerability(vuln)
return analysis
def _calculate_threat_score(self, vuln: dict) -> int:
"""Calculate threat priority score (0-100) based on KEV data"""
score = 0
# Base score for being in CISA KEV
score += 50 # High base score since it's actively exploited
# Ransomware usage factor (0-25 points)
ransomware_use = vuln.get('knownRansomwareCampaignUse', 'Unknown').lower()
if ransomware_use == 'known':
score += 25
elif ransomware_use == 'unknown':
score += 10 # Still concerning but less certain
# Recency factor (0-15 points)
try:
date_added = datetime.strptime(vuln.get('dateAdded', ''), '%Y-%m-%d')
days_since_added = (datetime.now() - date_added).days
if days_since_added <= 30: # Very recent
score += 15
elif days_since_added <= 90: # Recent
score += 10
elif days_since_added <= 365: # This year
score += 5
except:
pass
# Urgency based on due date (0-10 points)
try:
due_date = datetime.strptime(vuln.get('dueDate', ''), '%Y-%m-%d')
days_to_due = (due_date - datetime.now()).days
if days_to_due <= 0: # Overdue
score += 10
elif days_to_due <= 7: # Due soon
score += 8
elif days_to_due <= 30: # Due this month
score += 5
except:
pass
return min(score, 100) # Cap at 100
def _get_threat_level(self, score: int) -> str:
"""Get threat level based on score"""
if score >= 85:
return 'critical'
elif score >= 70:
return 'high'
elif score >= 55:
return 'medium'
else:
return 'low'
def _extract_kev_indicators(self, vuln: dict) -> dict:
"""Extract threat indicators from CISA KEV vulnerability data"""
indicators = {
'attack_vectors': [],
'affected_products': [],
'required_actions': [],
'ransomware_indicators': [],
'vulnerability_types': [],
'mitigation_techniques': []
}
# Extract from vulnerability name and description
text_sources = [
vuln.get('vulnerabilityName', ''),
vuln.get('shortDescription', ''),
vuln.get('requiredAction', ''),
vuln.get('notes', '')
]
full_text = ' '.join(text_sources).lower()
# Attack vector patterns
attack_vector_patterns = [
r'\b(remote code execution|rce)\b',
r'\b(sql injection|sqli)\b',
r'\b(cross.?site scripting|xss)\b',
r'\b(buffer overflow)\b',
r'\b(privilege escalation)\b',
r'\b(authentication bypass)\b',
r'\b(directory traversal|path traversal)\b',
r'\b(file upload)\b',
r'\b(command injection)\b',
r'\b(deserialization)\b',
r'\b(memory corruption)\b',
r'\b(use.?after.?free)\b',
r'\b(heap overflow)\b',
r'\b(stack overflow)\b'
]
for pattern in attack_vector_patterns:
matches = re.findall(pattern, full_text, re.IGNORECASE)
indicators['attack_vectors'].extend(matches)
# Product and vendor information
vendor = vuln.get('vendorProject', '')
product = vuln.get('product', '')
if vendor and product:
indicators['affected_products'].append(f"{vendor} {product}")
# Required action keywords
required_action = vuln.get('requiredAction', '')
action_patterns = [
r'\b(patch|update|upgrade)\b',
r'\b(disable|remove|uninstall)\b',
r'\b(configure|reconfigure)\b',
r'\b(firewall|network segmentation)\b',
r'\b(monitor|logging)\b',
r'\b(backup|restore)\b'
]
for pattern in action_patterns:
matches = re.findall(pattern, required_action, re.IGNORECASE)
indicators['required_actions'].extend(matches)
# Ransomware indicators
if vuln.get('knownRansomwareCampaignUse') == 'Known':
indicators['ransomware_indicators'].append('known_ransomware_use')
# Vulnerability type classification
vuln_type_patterns = [
r'\b(zero.?day|0.?day)\b',
r'\b(denial.?of.?service|dos)\b',
r'\b(information disclosure)\b',
r'\b(data breach)\b',
r'\b(backdoor)\b',
r'\b(trojan)\b',
r'\b(webshell)\b'
]
for pattern in vuln_type_patterns:
matches = re.findall(pattern, full_text, re.IGNORECASE)
indicators['vulnerability_types'].extend(matches)
# Clean up and deduplicate
for key in indicators:
indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 2]))
indicators[key] = indicators[key][:10] # Limit to 10 items per category
return indicators
def _categorize_vulnerability(self, vuln: dict) -> str:
"""Categorize the vulnerability based on CISA KEV data"""
vuln_name = vuln.get('vulnerabilityName', '').lower()
description = vuln.get('shortDescription', '').lower()
full_text = f"{vuln_name} {description}"
# Check for specific vulnerability categories
if re.search(r'\b(remote code execution|rce)\b', full_text):
return 'remote_code_execution'
elif re.search(r'\b(sql injection|sqli)\b', full_text):
return 'injection'
elif re.search(r'\b(cross.?site scripting|xss)\b', full_text):
return 'cross_site_scripting'
elif re.search(r'\b(privilege escalation)\b', full_text):
return 'privilege_escalation'
elif re.search(r'\b(authentication bypass)\b', full_text):
return 'authentication_bypass'
elif re.search(r'\b(buffer overflow|memory corruption)\b', full_text):
return 'memory_corruption'
elif re.search(r'\b(directory traversal|path traversal)\b', full_text):
return 'path_traversal'
elif re.search(r'\b(denial.?of.?service|dos)\b', full_text):
return 'denial_of_service'
elif re.search(r'\b(information disclosure)\b', full_text):
return 'information_disclosure'
else:
return 'other'
async def sync_cve_kev_data(self, cve_id: str) -> dict:
"""Synchronize CISA KEV data for a specific CVE"""
from main import CVE, SigmaRule
# Get existing CVE
cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first()
if not cve:
logger.warning(f"CVE {cve_id} not found in database")
return {"error": "CVE not found"}
# Fetch CISA KEV catalog
kev_catalog = await self.fetch_kev_catalog()
if not kev_catalog:
return {"error": "Failed to fetch CISA KEV catalog"}
# Search for the CVE in the KEV catalog
kev_entry = None
for vuln in kev_catalog.get('vulnerabilities', []):
if vuln.get('cveID') == cve_id:
kev_entry = vuln
break
if not kev_entry:
logger.info(f"CVE {cve_id} not found in CISA KEV catalog")
return {"cve_id": cve_id, "kev_found": False}
# Analyze the KEV entry
kev_analysis = self.analyze_kev_vulnerability(kev_entry)
# Update CVE with CISA KEV data
if not cve.poc_data:
cve.poc_data = {}
cve.poc_data['cisa_kev'] = {
'vulnerability_data': kev_analysis,
'catalog_version': kev_catalog.get('catalogVersion'),
'date_released': kev_catalog.get('dateReleased'),
'synced_at': datetime.utcnow().isoformat(),
'source': 'cisa_kev'
}
cve.updated_at = datetime.utcnow()
# Update SIGMA rule with CISA KEV data
sigma_rule = self.db_session.query(SigmaRule).filter(
SigmaRule.cve_id == cve_id
).first()
if sigma_rule:
# Update rule with CISA KEV data
if not sigma_rule.nomi_sec_data:
sigma_rule.nomi_sec_data = {}
sigma_rule.nomi_sec_data['cisa_kev'] = {
'threat_level': kev_analysis['threat_level'],
'threat_score': kev_analysis['threat_score'],
'known_ransomware_use': kev_analysis['known_ransomware_use'],
'vulnerability_category': kev_analysis['vulnerability_category'],
'due_date': kev_analysis['due_date'],
'indicators': kev_analysis['indicators'],
'source': 'cisa_kev'
}
# Update exploit indicators with CISA KEV data
existing_indicators = json.loads(sigma_rule.exploit_indicators) if sigma_rule.exploit_indicators else {}
kev_indicators = kev_analysis['indicators']
for key, values in kev_indicators.items():
if key not in existing_indicators:
existing_indicators[key] = []
existing_indicators[key].extend(values)
existing_indicators[key] = list(set(existing_indicators[key]))
sigma_rule.exploit_indicators = json.dumps(existing_indicators)
sigma_rule.updated_at = datetime.utcnow()
self.db_session.commit()
logger.info(f"Synchronized CISA KEV data for {cve_id} (Threat Level: {kev_analysis['threat_level']})")
return {
"cve_id": cve_id,
"kev_found": True,
"threat_level": kev_analysis['threat_level'],
"threat_score": kev_analysis['threat_score'],
"known_ransomware_use": kev_analysis['known_ransomware_use'],
"vulnerability_category": kev_analysis['vulnerability_category'],
"source": "cisa_kev"
}
async def bulk_sync_kev_data(self, batch_size: int = 100, cancellation_flag: Optional[callable] = None) -> dict:
"""Synchronize CISA KEV data for all matching CVEs"""
from main import CVE, BulkProcessingJob
# Create bulk processing job
job = BulkProcessingJob(
job_type='cisa_kev_sync',
status='running',
started_at=datetime.utcnow(),
job_metadata={'batch_size': batch_size}
)
self.db_session.add(job)
self.db_session.commit()
total_processed = 0
total_found = 0
results = []
try:
# Fetch CISA KEV catalog first
kev_catalog = await self.fetch_kev_catalog(force_refresh=True)
if not kev_catalog:
raise Exception("Failed to fetch CISA KEV catalog")
# Extract all CVE IDs from the KEV catalog
kev_cve_ids = [vuln.get('cveID') for vuln in kev_catalog.get('vulnerabilities', []) if vuln.get('cveID')]
# Get CVEs that exist in our database and match KEV catalog
cves = self.db_session.query(CVE).filter(CVE.cve_id.in_(kev_cve_ids)).all()
job.total_items = len(cves)
self.db_session.commit()
logger.info(f"Found {len(cves)} CVEs matching CISA KEV catalog")
# Process in batches
for i in range(0, len(cves), batch_size):
# Check for cancellation
if cancellation_flag and cancellation_flag():
logger.info("CISA KEV sync cancelled by user")
job.status = 'cancelled'
job.cancelled_at = datetime.utcnow()
job.error_message = "Job cancelled by user"
break
batch = cves[i:i + batch_size]
for cve in batch:
# Check for cancellation
if cancellation_flag and cancellation_flag():
logger.info("CISA KEV sync cancelled by user")
job.status = 'cancelled'
job.cancelled_at = datetime.utcnow()
job.error_message = "Job cancelled by user"
break
try:
result = await self.sync_cve_kev_data(cve.cve_id)
total_processed += 1
if result.get("kev_found", False):
total_found += 1
results.append(result)
job.processed_items += 1
# Small delay to be respectful to CISA
await asyncio.sleep(0.1)
except Exception as e:
logger.error(f"Error syncing CISA KEV for {cve.cve_id}: {e}")
job.failed_items += 1
# Break out of outer loop if cancelled
if job.status == 'cancelled':
break
# Commit after each batch
self.db_session.commit()
logger.info(f"Processed CISA KEV batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}")
# Update job status
if job.status != 'cancelled':
job.status = 'completed'
job.completed_at = datetime.utcnow()
job.job_metadata.update({
'total_processed': total_processed,
'total_kev_found': total_found,
'cves_with_kev': len(results),
'catalog_version': kev_catalog.get('catalogVersion'),
'source': 'cisa_kev'
})
except Exception as e:
job.status = 'failed'
job.error_message = str(e)
job.completed_at = datetime.utcnow()
logger.error(f"Bulk CISA KEV sync job failed: {e}")
finally:
self.db_session.commit()
return {
'job_id': str(job.id),
'status': job.status,
'total_processed': total_processed,
'total_kev_found': total_found,
'cves_with_kev': len(results),
'source': 'cisa_kev'
}
async def get_kev_sync_status(self) -> dict:
"""Get CISA KEV synchronization status"""
from main import CVE
# Count CVEs with CISA KEV data
total_cves = self.db_session.query(CVE).count()
result = self.db_session.execute(
text("SELECT COUNT(*) FROM cves WHERE poc_data::text LIKE '%\"cisa_kev\"%'")
)
cves_with_kev = result.scalar()
# Get catalog information if available
catalog_info = {}
if self.kev_cache:
catalog_info = {
'catalog_version': self.kev_cache.get('catalogVersion'),
'date_released': self.kev_cache.get('dateReleased'),
'total_vulnerabilities': self.kev_cache.get('count', 0),
'cache_age_seconds': int(datetime.now().timestamp() - (self.cache_timestamp or 0))
}
return {
'total_cves': total_cves,
'cves_with_kev_data': cves_with_kev,
'kev_coverage': (cves_with_kev / total_cves * 100) if total_cves > 0 else 0,
'kev_sync_status': 'active' if cves_with_kev > 0 else 'pending',
'catalog_info': catalog_info,
'source': 'cisa_kev'
}

View file

@ -0,0 +1,725 @@
"""
ExploitDB Local Filesystem Integration Client
Interfaces with the local ExploitDB submodule at exploit-db-mirror/
"""
import os
import re
import json
import logging
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from sqlalchemy.orm import Session
from pathlib import Path
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class ExploitDBLocalClient:
"""Client for interfacing with local ExploitDB mirror filesystem"""
def __init__(self, db_session: Session):
self.db_session = db_session
# Path to the local exploit-db-mirror submodule (in container: /app/exploit-db-mirror)
self.exploitdb_path = Path("/app/exploit-db-mirror")
self.exploits_path = self.exploitdb_path / "exploits"
# ExploitDB URL pattern for mapping
self.exploit_url_pattern = re.compile(r'https?://(?:www\.)?exploit-db\.com/exploits/(\d+)')
# Cache for file searches
self.file_cache = {}
# Build file index on initialization
self._build_file_index()
def _build_file_index(self):
"""Build an index of exploit ID to file path for fast lookups"""
logger.info("Building ExploitDB file index...")
self.file_index = {}
if not self.exploits_path.exists():
logger.error(f"ExploitDB path not found: {self.exploits_path}")
return
# Walk through all exploit files
for root, dirs, files in os.walk(self.exploits_path):
for file in files:
# Extract exploit ID from filename (e.g., "12345.py" -> "12345")
match = re.match(r'^(\d+)\.(\w+)$', file)
if match:
exploit_id = match.group(1)
file_extension = match.group(2)
file_path = Path(root) / file
# Store in index
self.file_index[exploit_id] = {
'path': file_path,
'filename': file,
'extension': file_extension,
'category': self._extract_category_from_path(file_path),
'subcategory': self._extract_subcategory_from_path(file_path)
}
logger.info(f"Built index with {len(self.file_index)} exploits")
def _extract_category_from_path(self, file_path: Path) -> str:
"""Extract category from file path (e.g., linux, windows, etc.)"""
parts = file_path.parts
exploits_index = None
for i, part in enumerate(parts):
if part == "exploits":
exploits_index = i
break
if exploits_index and exploits_index + 1 < len(parts):
return parts[exploits_index + 1]
return "unknown"
def _extract_subcategory_from_path(self, file_path: Path) -> str:
"""Extract subcategory from file path (e.g., local, remote, webapps, etc.)"""
parts = file_path.parts
exploits_index = None
for i, part in enumerate(parts):
if part == "exploits":
exploits_index = i
break
if exploits_index and exploits_index + 2 < len(parts):
return parts[exploits_index + 2]
return "unknown"
def extract_exploit_id_from_url(self, url: str) -> Optional[str]:
"""Extract exploit ID from ExploitDB URL"""
match = self.exploit_url_pattern.search(url)
if match:
return match.group(1)
return None
def get_exploit_details(self, exploit_id: str) -> Optional[dict]:
"""Get exploit details from local filesystem"""
if exploit_id not in self.file_index:
logger.debug(f"Exploit {exploit_id} not found in local index")
return None
file_info = self.file_index[exploit_id]
file_path = file_info['path']
try:
# Read file content
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# Get file stats
stat = file_path.stat()
return {
'id': exploit_id,
'filename': file_info['filename'],
'path': str(file_path.relative_to(self.exploitdb_path)),
'full_path': str(file_path),
'category': file_info['category'],
'subcategory': file_info['subcategory'],
'extension': file_info['extension'],
'content': content,
'size': stat.st_size,
'modified_time': datetime.fromtimestamp(stat.st_mtime),
'local_url': f"file://{file_path}"
}
except Exception as e:
logger.error(f"Error reading exploit file {file_path}: {e}")
return None
def analyze_exploit_content(self, exploit_data: dict) -> dict:
"""Analyze exploit content to extract indicators"""
if not exploit_data or not exploit_data.get('content'):
return {}
content = exploit_data['content']
indicators = {
'processes': [],
'files': [],
'network': [],
'registry': [],
'commands': [],
'urls': [],
'techniques': [],
'languages': [],
'platforms': [],
'syscalls': [],
'functions': []
}
# Determine programming language from extension
extension = exploit_data.get('extension', '').lower()
language_map = {
'py': 'python',
'rb': 'ruby',
'pl': 'perl',
'c': 'c',
'cpp': 'cpp',
'cc': 'cpp',
'cxx': 'cpp',
'sh': 'bash',
'ps1': 'powershell',
'java': 'java',
'js': 'javascript',
'php': 'php',
'asp': 'asp',
'aspx': 'aspx',
'jsp': 'jsp',
'go': 'go',
'rs': 'rust',
'asm': 'assembly',
's': 'assembly',
'nasm': 'assembly'
}
if extension in language_map:
indicators['languages'].append(language_map[extension])
# Extract platform from path
category = exploit_data.get('category', '').lower()
if category in ['linux', 'windows', 'osx', 'macos', 'android', 'freebsd', 'solaris']:
indicators['platforms'].append(category)
# Extract indicators from content
content_lower = content.lower()
# Process patterns - enhanced for different languages
process_patterns = [
r'\b(cmd\.exe|powershell\.exe|bash|sh|python|ruby|perl|java)\b',
r'\b(system|exec|popen|subprocess|shell_exec|eval|execve|execl|execlp)\b',
r'\b(createprocess|shellexecute|winexec|createthread)\b',
r'\b(mshta|rundll32|regsvr32|wscript|cscript|certutil|bitsadmin)\b',
r'\b(/bin/sh|/bin/bash|/usr/bin/python|/usr/bin/perl)\b'
]
for pattern in process_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
indicators['processes'].extend(matches)
# File patterns - enhanced
file_patterns = [
r'\b([a-zA-Z]:\\[^\\\s"\']+\\[^\\\s"\']+\.[a-zA-Z0-9]+)\b', # Windows paths
r'\b(/[^/\s"\']+/[^/\s"\']+\.[a-zA-Z0-9]+)\b', # Unix paths
r'\b(\w+\.(exe|dll|so|dylib|bat|ps1|py|sh|jar|war|php|jsp|asp|aspx|txt|log|conf))\b',
r'\b(/tmp/[^\s"\']+)\b', # Temp files
r'\b(/etc/[^\s"\']+)\b', # Config files
r'\b(/var/[^\s"\']+)\b', # Var files
r'\b(/proc/[^\s"\']+)\b' # Proc files
]
for pattern in file_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
if matches and isinstance(matches[0], tuple):
indicators['files'].extend([m[0] for m in matches])
else:
indicators['files'].extend(matches)
# Network patterns - enhanced
network_patterns = [
r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', # IP addresses
r'\b(https?://[^\s<>"\']+)\b', # URLs
r'\b([a-zA-Z0-9-]+\.[a-zA-Z]{2,})\b', # Domain names
r'\b(bind|connect|listen|socket|recv|send|accept)\b', # Network functions
r'\b(AF_INET|SOCK_STREAM|SOCK_DGRAM)\b', # Socket constants
r'\b(nc|netcat|ncat|telnet|ssh|ftp|wget|curl)\b' # Network tools
]
for pattern in network_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
if 'http' in pattern:
indicators['urls'].extend(matches)
else:
indicators['network'].extend(matches)
# Command patterns - enhanced
command_patterns = [
r'\b(curl|wget|nc|netcat|telnet|ssh|ftp)\b',
r'\b(whoami|id|uname|systeminfo|ipconfig|ifconfig|netstat|ps|top|lsof)\b',
r'\b(cat|type|dir|ls|find|grep|awk|sed|sort|uniq)\b',
r'\b(echo|printf|print)\b',
r'\b(base64|decode|encode|openssl|gpg)\b',
r'\b(sudo|su|chmod|chown|mount|umount)\b',
r'\b(service|systemctl|chkconfig|update-rc\.d)\b'
]
for pattern in command_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
indicators['commands'].extend(matches)
# Registry patterns (Windows-specific)
registry_patterns = [
r'\b(HKEY_[A-Z_]+)\b',
r'\b(HKLM|HKCU|HKCR|HKU|HKCC)\b',
r'\b(reg\s+add|reg\s+query|reg\s+delete|regedit)\b',
r'\b(SOFTWARE\\\\[^\\\s"\']+)\b',
r'\b(SYSTEM\\\\[^\\\s"\']+)\b',
r'\b(CurrentVersion\\\\[^\\\s"\']+)\b'
]
for pattern in registry_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
indicators['registry'].extend(matches)
# System call patterns (Linux/Unix)
syscall_patterns = [
r'\b(open|close|read|write|lseek|stat|fstat|lstat)\b',
r'\b(fork|vfork|clone|execve|wait|waitpid)\b',
r'\b(socket|bind|listen|accept|connect|send|recv)\b',
r'\b(mmap|munmap|mprotect|brk|sbrk)\b',
r'\b(ptrace|kill|signal|alarm)\b'
]
for pattern in syscall_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
indicators['syscalls'].extend(matches)
# Function patterns
function_patterns = [
r'\b(main|printf|scanf|malloc|free|strcpy|strcat|strlen)\b',
r'\b(gets|puts|fgets|fputs|fopen|fclose|fread|fwrite)\b',
r'\b(sprintf|snprintf|memcpy|memset|strcmp|strncmp)\b'
]
for pattern in function_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
indicators['functions'].extend(matches)
# Clean up and deduplicate
for key in indicators:
# Remove empty strings and duplicates
indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 1]))
# Limit to reasonable number of indicators
indicators[key] = indicators[key][:25]
return indicators
def calculate_exploit_quality_score(self, exploit_data: dict) -> dict:
"""Calculate quality score for an exploit"""
quality_score = 0
factors = {}
# File size factor (0-25 points)
file_size = exploit_data.get('size', 0)
if file_size > 0:
if file_size > 10000: # Large files (10KB+)
size_score = 25
elif file_size > 5000: # Medium files (5KB+)
size_score = 20
elif file_size > 1000: # Small files (1KB+)
size_score = 15
elif file_size > 500: # Very small files (500B+)
size_score = 10
else: # Tiny files
size_score = 5
quality_score += size_score
factors['size_score'] = size_score
# Content analysis factor (0-30 points)
content = exploit_data.get('content', '')
if content:
content_score = 0
lines = content.split('\n')
# Check for comments and documentation
comment_lines = 0
for line in lines:
stripped = line.strip()
if (stripped.startswith('#') or stripped.startswith('//') or
stripped.startswith('/*') or stripped.startswith('*') or
stripped.startswith('"""') or stripped.startswith("'''")):
comment_lines += 1
comment_score = min(comment_lines, 10) # Up to 10 points
content_score += comment_score
# Check for function definitions
function_patterns = [
r'\bdef\s+\w+', # Python
r'\bfunction\s+\w+', # JavaScript
r'\bvoid\s+\w+', # C/C++
r'\bint\s+\w+', # C/C++
r'\bchar\s+\w+', # C/C++
r'\bsub\s+\w+', # Perl
r'^\w+\s*\(', # Generic function calls
]
function_count = 0
for pattern in function_patterns:
matches = re.findall(pattern, content, re.IGNORECASE | re.MULTILINE)
function_count += len(matches)
function_score = min(function_count, 15) # Up to 15 points
content_score += function_score
# Check for include/import statements
include_patterns = [
r'#include\s*[<"]', # C/C++
r'import\s+\w+', # Python/Java
r'require\s+\w+', # Ruby/Perl
r'use\s+\w+', # Perl
]
include_count = 0
for pattern in include_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
include_count += len(matches)
include_score = min(include_count, 5) # Up to 5 points
content_score += include_score
quality_score += content_score
factors['content_score'] = content_score
# Platform/category factor (0-20 points)
category = exploit_data.get('category', '').lower()
subcategory = exploit_data.get('subcategory', '').lower()
platform_score = 0
if category in ['linux', 'windows', 'osx', 'macos']:
platform_score += 10
elif category in ['android', 'freebsd', 'solaris']:
platform_score += 8
elif category in ['multiple', 'unix']:
platform_score += 6
if subcategory in ['local', 'remote']:
platform_score += 10
elif subcategory in ['webapps', 'dos']:
platform_score += 8
elif subcategory in ['shellcode']:
platform_score += 6
quality_score += platform_score
factors['platform_score'] = platform_score
# Language factor (0-15 points)
extension = exploit_data.get('extension', '').lower()
lang_score = 0
if extension in ['c', 'cpp', 'cc', 'cxx']: # Compiled languages
lang_score = 15
elif extension in ['py', 'rb', 'pl', 'java']: # High-level languages
lang_score = 12
elif extension in ['sh', 'ps1', 'bat']: # Scripting languages
lang_score = 8
elif extension in ['asm', 's', 'nasm']: # Assembly
lang_score = 10
elif extension in ['php', 'asp', 'aspx', 'jsp']: # Web languages
lang_score = 6
elif extension in ['txt', 'html']: # Text/docs
lang_score = 3
quality_score += lang_score
factors['language_score'] = lang_score
# File age factor (0-10 points) - newer exploits might be more relevant
modified_time = exploit_data.get('modified_time')
if modified_time:
days_old = (datetime.now() - modified_time).days
if days_old < 365: # Less than 1 year
age_score = 10
elif days_old < 365 * 3: # Less than 3 years
age_score = 8
elif days_old < 365 * 5: # Less than 5 years
age_score = 6
elif days_old < 365 * 10: # Less than 10 years
age_score = 4
else: # Very old
age_score = 2
quality_score += age_score
factors['age_score'] = age_score
# Determine quality tier
quality_tier = self._get_exploit_quality_tier(quality_score)
return {
'quality_score': quality_score,
'factors': factors,
'quality_tier': quality_tier
}
def _get_exploit_quality_tier(self, score: int) -> str:
"""Get quality tier based on score"""
if score >= 80:
return 'excellent'
elif score >= 65:
return 'good'
elif score >= 45:
return 'fair'
elif score >= 25:
return 'poor'
else:
return 'very_poor'
async def sync_cve_exploits(self, cve_id: str) -> dict:
"""Synchronize ExploitDB data for a specific CVE using local filesystem"""
from main import CVE, SigmaRule
# Get existing CVE
cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first()
if not cve:
logger.warning(f"CVE {cve_id} not found in database")
return {"error": "CVE not found"}
# Extract ExploitDB URLs from reference URLs
exploit_urls = []
if cve.reference_urls:
for url in cve.reference_urls:
exploit_id = self.extract_exploit_id_from_url(url)
if exploit_id:
exploit_urls.append((url, exploit_id))
if not exploit_urls:
logger.info(f"No ExploitDB URLs found for {cve_id}")
return {"cve_id": cve_id, "exploits_found": 0}
# Fetch exploit details from local filesystem
exploit_data = []
total_quality_score = 0
for url, exploit_id in exploit_urls:
try:
details = self.get_exploit_details(exploit_id)
if details:
# Analyze exploit content
indicators = self.analyze_exploit_content(details)
quality_analysis = self.calculate_exploit_quality_score(details)
exploit_entry = {
'id': exploit_id,
'url': url,
'filename': details.get('filename'),
'path': details.get('path'),
'category': details.get('category'),
'subcategory': details.get('subcategory'),
'extension': details.get('extension'),
'size': details.get('size'),
'modified_time': details.get('modified_time').isoformat() if details.get('modified_time') else None,
'local_url': details.get('local_url'),
'indicators': indicators,
'quality_analysis': quality_analysis
}
exploit_data.append(exploit_entry)
total_quality_score += quality_analysis['quality_score']
logger.info(f"Successfully processed exploit {exploit_id} from local filesystem")
except Exception as e:
logger.error(f"Error processing exploit {exploit_id}: {e}")
# Update CVE with ExploitDB data
if exploit_data:
# Store in existing poc_data field
if not cve.poc_data:
cve.poc_data = {}
cve.poc_data['exploitdb'] = {
'exploits': exploit_data,
'total_exploits': len(exploit_data),
'average_quality': total_quality_score // len(exploit_data) if exploit_data else 0,
'synced_at': datetime.utcnow().isoformat(),
'source': 'local_filesystem'
}
cve.updated_at = datetime.utcnow()
# Update SIGMA rule with ExploitDB data
sigma_rule = self.db_session.query(SigmaRule).filter(
SigmaRule.cve_id == cve_id
).first()
if sigma_rule:
# Combine indicators from all exploits
combined_indicators = {}
for exploit in exploit_data:
for key, values in exploit['indicators'].items():
if key not in combined_indicators:
combined_indicators[key] = []
combined_indicators[key].extend(values)
# Deduplicate
for key in combined_indicators:
combined_indicators[key] = list(set(combined_indicators[key]))
# Update rule with ExploitDB data
if not sigma_rule.nomi_sec_data:
sigma_rule.nomi_sec_data = {}
sigma_rule.nomi_sec_data['exploitdb'] = {
'total_exploits': len(exploit_data),
'average_quality': total_quality_score // len(exploit_data) if exploit_data else 0,
'best_exploit': max(exploit_data, key=lambda x: x['quality_analysis']['quality_score']) if exploit_data else None,
'indicators': combined_indicators,
'source': 'local_filesystem'
}
# Update exploit indicators
existing_indicators = json.loads(sigma_rule.exploit_indicators) if sigma_rule.exploit_indicators else {}
for key, values in combined_indicators.items():
if key not in existing_indicators:
existing_indicators[key] = []
existing_indicators[key].extend(values)
existing_indicators[key] = list(set(existing_indicators[key]))
sigma_rule.exploit_indicators = json.dumps(existing_indicators)
sigma_rule.updated_at = datetime.utcnow()
self.db_session.commit()
logger.info(f"Synchronized {len(exploit_data)} ExploitDB exploits for {cve_id} from local filesystem")
return {
"cve_id": cve_id,
"exploits_found": len(exploit_data),
"total_quality_score": total_quality_score,
"average_quality": total_quality_score // len(exploit_data) if exploit_data else 0,
"exploit_urls": [e['url'] for e in exploit_data],
"source": "local_filesystem"
}
async def bulk_sync_exploitdb(self, batch_size: int = 50, cancellation_flag: Optional[callable] = None) -> dict:
"""Synchronize ExploitDB data for all CVEs with ExploitDB references using local filesystem"""
from main import CVE, BulkProcessingJob
from sqlalchemy import text
# Create bulk processing job
job = BulkProcessingJob(
job_type='exploitdb_sync_local',
status='running',
started_at=datetime.utcnow(),
job_metadata={'batch_size': batch_size, 'source': 'local_filesystem'}
)
self.db_session.add(job)
self.db_session.commit()
total_processed = 0
total_found = 0
results = []
try:
# Get all CVEs with ExploitDB references using text search
cves = self.db_session.query(CVE).filter(
text("reference_urls::text LIKE '%exploit-db%'")
).all()
job.total_items = len(cves)
self.db_session.commit()
logger.info(f"Found {len(cves)} CVEs with ExploitDB references for local sync")
# Process in batches
for i in range(0, len(cves), batch_size):
# Check for cancellation
if cancellation_flag and cancellation_flag():
logger.info("ExploitDB local sync cancelled by user")
job.status = 'cancelled'
job.cancelled_at = datetime.utcnow()
job.error_message = "Job cancelled by user"
break
batch = cves[i:i + batch_size]
for cve in batch:
# Check for cancellation
if cancellation_flag and cancellation_flag():
logger.info("ExploitDB local sync cancelled by user")
job.status = 'cancelled'
job.cancelled_at = datetime.utcnow()
job.error_message = "Job cancelled by user"
break
try:
result = await self.sync_cve_exploits(cve.cve_id)
total_processed += 1
if result.get("exploits_found", 0) > 0:
total_found += result["exploits_found"]
results.append(result)
job.processed_items += 1
# Very small delay for responsiveness
# No need for long delays with local filesystem
except Exception as e:
logger.error(f"Error syncing ExploitDB for {cve.cve_id}: {e}")
job.failed_items += 1
# Break out of outer loop if cancelled
if job.status == 'cancelled':
break
# Commit after each batch
self.db_session.commit()
logger.info(f"Processed ExploitDB local batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}")
# Update job status
if job.status != 'cancelled':
job.status = 'completed'
job.completed_at = datetime.utcnow()
job.job_metadata.update({
'total_processed': total_processed,
'total_exploits_found': total_found,
'cves_with_exploits': len(results),
'source': 'local_filesystem'
})
except Exception as e:
job.status = 'failed'
job.error_message = str(e)
job.completed_at = datetime.utcnow()
logger.error(f"Bulk ExploitDB local sync job failed: {e}")
finally:
self.db_session.commit()
return {
'job_id': str(job.id),
'status': job.status,
'total_processed': total_processed,
'total_exploits_found': total_found,
'cves_with_exploits': len(results),
'source': 'local_filesystem'
}
async def get_exploitdb_sync_status(self) -> dict:
"""Get ExploitDB synchronization status for local filesystem"""
from main import CVE
from sqlalchemy import text
# Count CVEs with ExploitDB references
total_cves = self.db_session.query(CVE).count()
# Count CVEs with ExploitDB data
result = self.db_session.execute(
text("SELECT COUNT(*) FROM cves WHERE poc_data::text LIKE '%\"exploitdb\"%'")
)
cves_with_exploitdb = result.scalar()
# Count CVEs with ExploitDB URLs in references
result2 = self.db_session.execute(
text("SELECT COUNT(*) FROM cves WHERE reference_urls::text LIKE '%exploit-db%'")
)
cves_with_exploitdb_refs = result2.scalar()
return {
'total_cves': total_cves,
'cves_with_exploitdb_refs': cves_with_exploitdb_refs,
'cves_with_exploitdb_data': cves_with_exploitdb,
'exploitdb_coverage': (cves_with_exploitdb / cves_with_exploitdb_refs * 100) if cves_with_exploitdb_refs > 0 else 0,
'exploitdb_sync_status': 'active' if cves_with_exploitdb > 0 else 'pending',
'exploitdb_local_index_size': len(self.file_index),
'source': 'local_filesystem'
}

View file

@ -154,6 +154,14 @@ class GitHubPoCSyncRequest(BaseModel):
cve_id: Optional[str] = None cve_id: Optional[str] = None
batch_size: int = 50 batch_size: int = 50
class ExploitDBSyncRequest(BaseModel):
cve_id: Optional[str] = None
batch_size: int = 30
class CISAKEVSyncRequest(BaseModel):
cve_id: Optional[str] = None
batch_size: int = 100
class RuleRegenRequest(BaseModel): class RuleRegenRequest(BaseModel):
force: bool = False force: bool = False
@ -1224,6 +1232,238 @@ async def sync_github_pocs(background_tasks: BackgroundTasks,
"batch_size": request.batch_size "batch_size": request.batch_size
} }
@app.post("/api/sync-exploitdb")
async def sync_exploitdb(background_tasks: BackgroundTasks,
request: ExploitDBSyncRequest,
db: Session = Depends(get_db)):
"""Synchronize ExploitDB data from git mirror"""
# Create job record
job = BulkProcessingJob(
job_type='exploitdb_sync',
status='pending',
job_metadata={
'cve_id': request.cve_id,
'batch_size': request.batch_size
}
)
db.add(job)
db.commit()
db.refresh(job)
job_id = str(job.id)
running_jobs[job_id] = job
job_cancellation_flags[job_id] = False
async def sync_task():
# Create a new database session for the background task
task_db = SessionLocal()
try:
# Get the job in the new session
task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
if not task_job:
logger.error(f"Job {job_id} not found in task session")
return
task_job.status = 'running'
task_job.started_at = datetime.utcnow()
task_db.commit()
from exploitdb_client_local import ExploitDBLocalClient
client = ExploitDBLocalClient(task_db)
if request.cve_id:
# Sync specific CVE
if job_cancellation_flags.get(job_id, False):
logger.info(f"Job {job_id} cancelled before starting")
return
result = await client.sync_cve_exploits(request.cve_id)
logger.info(f"ExploitDB sync for {request.cve_id}: {result}")
else:
# Sync all CVEs with cancellation support
result = await client.bulk_sync_exploitdb(
batch_size=request.batch_size,
cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
)
logger.info(f"ExploitDB bulk sync completed: {result}")
# Update job status if not cancelled
if not job_cancellation_flags.get(job_id, False):
task_job.status = 'completed'
task_job.completed_at = datetime.utcnow()
task_db.commit()
except Exception as e:
if not job_cancellation_flags.get(job_id, False):
# Get the job again in case it was modified
task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
if task_job:
task_job.status = 'failed'
task_job.error_message = str(e)
task_job.completed_at = datetime.utcnow()
task_db.commit()
logger.error(f"ExploitDB sync failed: {e}")
import traceback
traceback.print_exc()
finally:
# Clean up tracking and close the task session
running_jobs.pop(job_id, None)
job_cancellation_flags.pop(job_id, None)
task_db.close()
background_tasks.add_task(sync_task)
return {
"message": f"ExploitDB sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
"status": "started",
"job_id": job_id,
"cve_id": request.cve_id,
"batch_size": request.batch_size
}
@app.post("/api/sync-cisa-kev")
async def sync_cisa_kev(background_tasks: BackgroundTasks,
request: CISAKEVSyncRequest,
db: Session = Depends(get_db)):
"""Synchronize CISA Known Exploited Vulnerabilities data"""
# Create job record
job = BulkProcessingJob(
job_type='cisa_kev_sync',
status='pending',
job_metadata={
'cve_id': request.cve_id,
'batch_size': request.batch_size
}
)
db.add(job)
db.commit()
db.refresh(job)
job_id = str(job.id)
running_jobs[job_id] = job
job_cancellation_flags[job_id] = False
async def sync_task():
# Create a new database session for the background task
task_db = SessionLocal()
try:
# Get the job in the new session
task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
if not task_job:
logger.error(f"Job {job_id} not found in task session")
return
task_job.status = 'running'
task_job.started_at = datetime.utcnow()
task_db.commit()
from cisa_kev_client import CISAKEVClient
client = CISAKEVClient(task_db)
if request.cve_id:
# Sync specific CVE
if job_cancellation_flags.get(job_id, False):
logger.info(f"Job {job_id} cancelled before starting")
return
result = await client.sync_cve_kev_data(request.cve_id)
logger.info(f"CISA KEV sync for {request.cve_id}: {result}")
else:
# Sync all CVEs with cancellation support
result = await client.bulk_sync_kev_data(
batch_size=request.batch_size,
cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
)
logger.info(f"CISA KEV bulk sync completed: {result}")
# Update job status if not cancelled
if not job_cancellation_flags.get(job_id, False):
task_job.status = 'completed'
task_job.completed_at = datetime.utcnow()
task_db.commit()
except Exception as e:
if not job_cancellation_flags.get(job_id, False):
# Get the job again in case it was modified
task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
if task_job:
task_job.status = 'failed'
task_job.error_message = str(e)
task_job.completed_at = datetime.utcnow()
task_db.commit()
logger.error(f"CISA KEV sync failed: {e}")
import traceback
traceback.print_exc()
finally:
# Clean up tracking and close the task session
running_jobs.pop(job_id, None)
job_cancellation_flags.pop(job_id, None)
task_db.close()
background_tasks.add_task(sync_task)
return {
"message": f"CISA KEV sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
"status": "started",
"job_id": job_id,
"cve_id": request.cve_id,
"batch_size": request.batch_size
}
@app.get("/api/exploitdb-stats")
async def get_exploitdb_stats(db: Session = Depends(get_db)):
"""Get ExploitDB-related statistics"""
try:
from exploitdb_client_local import ExploitDBLocalClient
client = ExploitDBLocalClient(db)
# Get sync status
status = await client.get_exploitdb_sync_status()
# Get quality distribution from ExploitDB data
quality_distribution = {}
from sqlalchemy import text
cves_with_exploitdb = db.query(CVE).filter(
text("poc_data::text LIKE '%\"exploitdb\"%'")
).all()
for cve in cves_with_exploitdb:
if cve.poc_data and 'exploitdb' in cve.poc_data:
exploits = cve.poc_data['exploitdb'].get('exploits', [])
for exploit in exploits:
quality_tier = exploit.get('quality_analysis', {}).get('quality_tier', 'unknown')
quality_distribution[quality_tier] = quality_distribution.get(quality_tier, 0) + 1
# Get category distribution
category_distribution = {}
for cve in cves_with_exploitdb:
if cve.poc_data and 'exploitdb' in cve.poc_data:
exploits = cve.poc_data['exploitdb'].get('exploits', [])
for exploit in exploits:
category = exploit.get('category', 'unknown')
category_distribution[category] = category_distribution.get(category, 0) + 1
return {
"exploitdb_sync_status": status,
"quality_distribution": quality_distribution,
"category_distribution": category_distribution,
"total_exploitdb_cves": len(cves_with_exploitdb),
"total_exploits": sum(
len(cve.poc_data.get('exploitdb', {}).get('exploits', []))
for cve in cves_with_exploitdb
if cve.poc_data and 'exploitdb' in cve.poc_data
)
}
except Exception as e:
logger.error(f"Error getting ExploitDB stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/github-poc-stats") @app.get("/api/github-poc-stats")
async def get_github_poc_stats(db: Session = Depends(get_db)): async def get_github_poc_stats(db: Session = Depends(get_db)):
"""Get GitHub PoC-related statistics""" """Get GitHub PoC-related statistics"""
@ -1298,6 +1538,74 @@ async def get_github_poc_status(db: Session = Depends(get_db)):
logger.error(f"Error checking GitHub PoC status: {e}") logger.error(f"Error checking GitHub PoC status: {e}")
return {"error": str(e)} return {"error": str(e)}
@app.get("/api/cisa-kev-stats")
async def get_cisa_kev_stats(db: Session = Depends(get_db)):
"""Get CISA KEV-related statistics"""
try:
from cisa_kev_client import CISAKEVClient
client = CISAKEVClient(db)
# Get sync status
status = await client.get_kev_sync_status()
# Get threat level distribution from CISA KEV data
threat_level_distribution = {}
from sqlalchemy import text
cves_with_kev = db.query(CVE).filter(
text("poc_data::text LIKE '%\"cisa_kev\"%'")
).all()
for cve in cves_with_kev:
if cve.poc_data and 'cisa_kev' in cve.poc_data:
vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {})
threat_level = vuln_data.get('threat_level', 'unknown')
threat_level_distribution[threat_level] = threat_level_distribution.get(threat_level, 0) + 1
# Get vulnerability category distribution
category_distribution = {}
for cve in cves_with_kev:
if cve.poc_data and 'cisa_kev' in cve.poc_data:
vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {})
category = vuln_data.get('vulnerability_category', 'unknown')
category_distribution[category] = category_distribution.get(category, 0) + 1
# Get ransomware usage statistics
ransomware_stats = {'known': 0, 'unknown': 0}
for cve in cves_with_kev:
if cve.poc_data and 'cisa_kev' in cve.poc_data:
vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {})
ransomware_use = vuln_data.get('known_ransomware_use', 'Unknown').lower()
if ransomware_use == 'known':
ransomware_stats['known'] += 1
else:
ransomware_stats['unknown'] += 1
# Calculate average threat score
threat_scores = []
for cve in cves_with_kev:
if cve.poc_data and 'cisa_kev' in cve.poc_data:
vuln_data = cve.poc_data['cisa_kev'].get('vulnerability_data', {})
threat_score = vuln_data.get('threat_score', 0)
if threat_score:
threat_scores.append(threat_score)
avg_threat_score = sum(threat_scores) / len(threat_scores) if threat_scores else 0
return {
"cisa_kev_sync_status": status,
"threat_level_distribution": threat_level_distribution,
"category_distribution": category_distribution,
"ransomware_stats": ransomware_stats,
"average_threat_score": round(avg_threat_score, 2),
"total_kev_cves": len(cves_with_kev),
"total_with_threat_scores": len(threat_scores)
}
except Exception as e:
logger.error(f"Error getting CISA KEV stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/bulk-jobs") @app.get("/api/bulk-jobs")
async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)): async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)):
"""Get bulk processing job status""" """Get bulk processing job status"""

View file

@ -24,12 +24,18 @@ services:
DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
NVD_API_KEY: ${NVD_API_KEY:-} NVD_API_KEY: ${NVD_API_KEY:-}
GITHUB_TOKEN: ${GITHUB_TOKEN} GITHUB_TOKEN: ${GITHUB_TOKEN}
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434}
LLM_PROVIDER: ${LLM_PROVIDER:-ollama}
LLM_MODEL: ${LLM_MODEL:-llama3.2}
depends_on: depends_on:
db: db:
condition: service_healthy condition: service_healthy
volumes: volumes:
- ./backend:/app - ./backend:/app
- ./github_poc_collector:/github_poc_collector - ./github_poc_collector:/github_poc_collector
- ./exploit-db-mirror:/app/exploit-db-mirror
command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
frontend: frontend:
@ -51,6 +57,17 @@ services:
volumes: volumes:
- redis_data:/data - redis_data:/data
ollama:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
environment:
- OLLAMA_HOST=0.0.0.0
restart: unless-stopped
volumes: volumes:
postgres_data: postgres_data:
redis_data: redis_data:
ollama_data:

1
exploit-db-mirror Submodule

@ -0,0 +1 @@
Subproject commit 99e10e9ba8fa6e0db140c33eb19ea76e9a144fb8

View file

@ -19,18 +19,73 @@ function App() {
const [bulkStatus, setBulkStatus] = useState({}); const [bulkStatus, setBulkStatus] = useState({});
const [pocStats, setPocStats] = useState({}); const [pocStats, setPocStats] = useState({});
const [gitHubPocStats, setGitHubPocStats] = useState({}); const [gitHubPocStats, setGitHubPocStats] = useState({});
const [exploitdbStats, setExploitdbStats] = useState({});
const [cisaKevStats, setCisaKevStats] = useState({});
const [bulkProcessing, setBulkProcessing] = useState(false); const [bulkProcessing, setBulkProcessing] = useState(false);
const [hasRunningJobs, setHasRunningJobs] = useState(false); const [hasRunningJobs, setHasRunningJobs] = useState(false);
const [runningJobTypes, setRunningJobTypes] = useState(new Set());
const [llmStatus, setLlmStatus] = useState({}); const [llmStatus, setLlmStatus] = useState({});
const [exploitSyncDropdownOpen, setExploitSyncDropdownOpen] = useState(false);
useEffect(() => { useEffect(() => {
fetchData(); fetchData();
}, []); }, []);
// Close dropdown when clicking outside
useEffect(() => {
const handleClickOutside = (event) => {
if (exploitSyncDropdownOpen && !event.target.closest('.relative')) {
setExploitSyncDropdownOpen(false);
}
};
document.addEventListener('mousedown', handleClickOutside);
return () => {
document.removeEventListener('mousedown', handleClickOutside);
};
}, [exploitSyncDropdownOpen]);
// Helper functions to check if specific job types are running
const isJobTypeRunning = (jobType) => {
return runningJobTypes.has(jobType);
};
const isBulkSeedRunning = () => {
return isJobTypeRunning('nvd_bulk_seed') || isJobTypeRunning('bulk_seed');
};
const isIncrementalUpdateRunning = () => {
return isJobTypeRunning('incremental_update');
};
const isNomiSecSyncRunning = () => {
return isJobTypeRunning('nomi_sec_sync');
};
const isGitHubPocSyncRunning = () => {
return isJobTypeRunning('github_poc_sync');
};
const isExploitDBSyncRunning = () => {
return isJobTypeRunning('exploitdb_sync') || isJobTypeRunning('exploitdb_sync_local');
};
const isCISAKEVSyncRunning = () => {
return isJobTypeRunning('cisa_kev_sync');
};
const isRuleGenerationRunning = () => {
return isJobTypeRunning('rule_regeneration') || isJobTypeRunning('llm_rule_generation');
};
const areAnyExploitSyncsRunning = () => {
return isNomiSecSyncRunning() || isGitHubPocSyncRunning() || isExploitDBSyncRunning() || isCISAKEVSyncRunning();
};
const fetchData = async () => { const fetchData = async () => {
try { try {
setLoading(true); setLoading(true);
const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes, llmStatusRes] = await Promise.all([ const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes, exploitdbStatsRes, cisaKevStatsRes, llmStatusRes] = await Promise.all([
axios.get(`${API_BASE_URL}/api/cves`), axios.get(`${API_BASE_URL}/api/cves`),
axios.get(`${API_BASE_URL}/api/sigma-rules`), axios.get(`${API_BASE_URL}/api/sigma-rules`),
axios.get(`${API_BASE_URL}/api/stats`), axios.get(`${API_BASE_URL}/api/stats`),
@ -38,6 +93,8 @@ function App() {
axios.get(`${API_BASE_URL}/api/bulk-status`), axios.get(`${API_BASE_URL}/api/bulk-status`),
axios.get(`${API_BASE_URL}/api/poc-stats`), axios.get(`${API_BASE_URL}/api/poc-stats`),
axios.get(`${API_BASE_URL}/api/github-poc-stats`).catch(err => ({ data: {} })), axios.get(`${API_BASE_URL}/api/github-poc-stats`).catch(err => ({ data: {} })),
axios.get(`${API_BASE_URL}/api/exploitdb-stats`).catch(err => ({ data: {} })),
axios.get(`${API_BASE_URL}/api/cisa-kev-stats`).catch(err => ({ data: {} })),
axios.get(`${API_BASE_URL}/api/llm-status`).catch(err => ({ data: {} })) axios.get(`${API_BASE_URL}/api/llm-status`).catch(err => ({ data: {} }))
]); ]);
@ -48,11 +105,17 @@ function App() {
setBulkStatus(bulkStatusRes.data); setBulkStatus(bulkStatusRes.data);
setPocStats(pocStatsRes.data); setPocStats(pocStatsRes.data);
setGitHubPocStats(githubPocStatsRes.data); setGitHubPocStats(githubPocStatsRes.data);
setExploitdbStats(exploitdbStatsRes.data);
setCisaKevStats(cisaKevStatsRes.data);
setLlmStatus(llmStatusRes.data); setLlmStatus(llmStatusRes.data);
// Update running jobs state // Update running jobs state
const runningJobs = bulkJobsRes.data.filter(job => job.status === 'running' || job.status === 'pending'); const runningJobs = bulkJobsRes.data.filter(job => job.status === 'running' || job.status === 'pending');
setHasRunningJobs(runningJobs.length > 0); setHasRunningJobs(runningJobs.length > 0);
// Update specific job types that are running
const activeJobTypes = new Set(runningJobs.map(job => job.job_type));
setRunningJobTypes(activeJobTypes);
} catch (error) { } catch (error) {
console.error('Error fetching data:', error); console.error('Error fetching data:', error);
} finally { } finally {
@ -157,6 +220,42 @@ function App() {
} }
}; };
const syncExploitDB = async (cveId = null) => {
try {
const response = await axios.post(`${API_BASE_URL}/api/sync-exploitdb`, {
cve_id: cveId,
batch_size: 30
});
console.log('ExploitDB sync response:', response.data);
fetchData();
} catch (error) {
console.error('Error syncing ExploitDB:', error);
}
};
const syncCISAKEV = async (cveId = null) => {
try {
const response = await axios.post(`${API_BASE_URL}/api/sync-cisa-kev`, {
cve_id: cveId,
batch_size: 100
});
console.log('CISA KEV sync response:', response.data);
fetchData();
} catch (error) {
console.error('Error syncing CISA KEV:', error);
}
};
const syncReferences = async () => {
try {
// Placeholder for future implementation
console.log('Sync References - Not implemented yet');
alert('Sync References functionality will be implemented in a future update');
} catch (error) {
console.error('Error syncing references:', error);
}
};
const regenerateRules = async (force = false) => { const regenerateRules = async (force = false) => {
try { try {
const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, { const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, {
@ -235,6 +334,8 @@ function App() {
<p className="text-3xl font-bold text-purple-600">{stats.cves_with_pocs || 0}</p> <p className="text-3xl font-bold text-purple-600">{stats.cves_with_pocs || 0}</p>
<p className="text-sm text-gray-500">{(stats.poc_coverage || 0).toFixed(1)}% coverage</p> <p className="text-sm text-gray-500">{(stats.poc_coverage || 0).toFixed(1)}% coverage</p>
<p className="text-sm text-gray-500">GitHub PoCs: {gitHubPocStats.cves_with_github_pocs || 0}</p> <p className="text-sm text-gray-500">GitHub PoCs: {gitHubPocStats.cves_with_github_pocs || 0}</p>
<p className="text-sm text-gray-500">ExploitDB: {exploitdbStats.total_exploitdb_cves || 0}</p>
<p className="text-sm text-gray-500">CISA KEV: {cisaKevStats.total_kev_cves || 0}</p>
</div> </div>
<div className="bg-white p-6 rounded-lg shadow"> <div className="bg-white p-6 rounded-lg shadow">
<h3 className="text-lg font-medium text-gray-900">Recent CVEs (7d)</h3> <h3 className="text-lg font-medium text-gray-900">Recent CVEs (7d)</h3>
@ -244,82 +345,193 @@ function App() {
<h3 className="text-lg font-medium text-gray-900">High Quality PoCs</h3> <h3 className="text-lg font-medium text-gray-900">High Quality PoCs</h3>
<p className="text-3xl font-bold text-indigo-600">{pocStats.high_quality_cves || 0}</p> <p className="text-3xl font-bold text-indigo-600">{pocStats.high_quality_cves || 0}</p>
<p className="text-sm text-gray-500">Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}</p> <p className="text-sm text-gray-500">Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}</p>
<p className="text-sm text-gray-500">GitHub PoCs Avg: {(gitHubPocStats.average_quality_score || 0).toFixed(1)}</p> <p className="text-sm text-gray-500">GitHub: {(gitHubPocStats.average_quality_score || 0).toFixed(1)}</p>
<p className="text-sm text-gray-500">ExploitDB: {exploitdbStats.total_exploits || 0} exploits</p>
<p className="text-sm text-gray-500">CISA KEV: {(cisaKevStats.average_threat_score || 0).toFixed(1)} threat</p>
</div> </div>
</div> </div>
{/* Bulk Processing Controls */} {/* Data Synchronization Controls */}
<div className="bg-white rounded-lg shadow p-6"> <div className="bg-white rounded-lg shadow p-6">
<h2 className="text-xl font-bold text-gray-900 mb-4">Bulk Processing</h2> <h2 className="text-xl font-bold text-gray-900 mb-6">Data Synchronization</h2>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-5 gap-4">
{/* Phase 1: CVE Data Syncing */}
<div className="mb-6">
<h3 className="text-lg font-medium text-gray-900 mb-3">Phase 1: CVE Data Syncing</h3>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<button
onClick={() => startBulkSeed(2002)}
disabled={isBulkSeedRunning()}
className={`px-4 py-2 rounded-md text-white ${
isBulkSeedRunning()
? 'bg-gray-400 cursor-not-allowed'
: 'bg-blue-600 hover:bg-blue-700'
}`}
>
{isBulkSeedRunning() ? 'Processing...' : 'Sync NVD CVEs'}
</button>
<button
onClick={startIncrementalUpdate}
disabled={isIncrementalUpdateRunning()}
className={`px-4 py-2 rounded-md text-white ${
isIncrementalUpdateRunning()
? 'bg-gray-400 cursor-not-allowed'
: 'bg-green-600 hover:bg-green-700'
}`}
>
{isIncrementalUpdateRunning() ? 'Processing...' : 'Incremental Update'}
</button>
</div>
</div>
{/* Phase 2: Exploit Data Syncing */}
<div className="mb-6">
<h3 className="text-lg font-medium text-gray-900 mb-3">Phase 2: Exploit Data Syncing</h3>
<div className="relative inline-block text-left">
<button
onClick={() => setExploitSyncDropdownOpen(!exploitSyncDropdownOpen)}
className={`inline-flex items-center justify-center w-full rounded-md border shadow-sm px-4 py-2 text-sm font-medium ${
areAnyExploitSyncsRunning()
? 'border-blue-300 bg-blue-50 text-blue-700'
: 'border-gray-300 bg-white text-gray-700 hover:bg-gray-50'
}`}
>
{areAnyExploitSyncsRunning() ? 'Exploit Syncs Running...' : 'Sync Exploit Data'}
<svg className="-mr-1 ml-2 h-5 w-5" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor">
<path fillRule="evenodd" d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" clipRule="evenodd" />
</svg>
</button>
{exploitSyncDropdownOpen && (
<div className="origin-top-right absolute right-0 mt-2 w-56 rounded-md shadow-lg bg-white ring-1 ring-black ring-opacity-5 z-10">
<div className="py-1">
<button
onClick={() => {
if (!isNomiSecSyncRunning()) {
syncNomiSec();
setExploitSyncDropdownOpen(false);
}
}}
disabled={isNomiSecSyncRunning()}
className={`block w-full text-left px-4 py-2 text-sm ${
isNomiSecSyncRunning()
? 'text-gray-400 cursor-not-allowed bg-gray-50'
: 'text-gray-700 hover:bg-purple-50 hover:text-purple-900'
}`}
>
<span className={`inline-block w-3 h-3 rounded-full mr-2 ${
isNomiSecSyncRunning() ? 'bg-gray-400' : 'bg-purple-600'
}`}></span>
{isNomiSecSyncRunning() ? 'Syncing nomi-sec PoCs...' : 'Sync nomi-sec PoCs'}
</button>
<button
onClick={() => {
if (!isGitHubPocSyncRunning()) {
syncGitHubPocs();
setExploitSyncDropdownOpen(false);
}
}}
disabled={isGitHubPocSyncRunning()}
className={`block w-full text-left px-4 py-2 text-sm ${
isGitHubPocSyncRunning()
? 'text-gray-400 cursor-not-allowed bg-gray-50'
: 'text-gray-700 hover:bg-green-50 hover:text-green-900'
}`}
>
<span className={`inline-block w-3 h-3 rounded-full mr-2 ${
isGitHubPocSyncRunning() ? 'bg-gray-400' : 'bg-green-600'
}`}></span>
{isGitHubPocSyncRunning() ? 'Syncing GitHub PoCs...' : 'Sync GitHub PoCs'}
</button>
<button
onClick={() => {
if (!isExploitDBSyncRunning()) {
syncExploitDB();
setExploitSyncDropdownOpen(false);
}
}}
disabled={isExploitDBSyncRunning()}
className={`block w-full text-left px-4 py-2 text-sm ${
isExploitDBSyncRunning()
? 'text-gray-400 cursor-not-allowed bg-gray-50'
: 'text-gray-700 hover:bg-red-50 hover:text-red-900'
}`}
>
<span className={`inline-block w-3 h-3 rounded-full mr-2 ${
isExploitDBSyncRunning() ? 'bg-gray-400' : 'bg-red-600'
}`}></span>
{isExploitDBSyncRunning() ? 'Syncing ExploitDB...' : 'Sync ExploitDB'}
</button>
<button
onClick={() => {
if (!isCISAKEVSyncRunning()) {
syncCISAKEV();
setExploitSyncDropdownOpen(false);
}
}}
disabled={isCISAKEVSyncRunning()}
className={`block w-full text-left px-4 py-2 text-sm ${
isCISAKEVSyncRunning()
? 'text-gray-400 cursor-not-allowed bg-gray-50'
: 'text-gray-700 hover:bg-yellow-50 hover:text-yellow-900'
}`}
>
<span className={`inline-block w-3 h-3 rounded-full mr-2 ${
isCISAKEVSyncRunning() ? 'bg-gray-400' : 'bg-yellow-600'
}`}></span>
{isCISAKEVSyncRunning() ? 'Syncing CISA KEV...' : 'Sync CISA KEV'}
</button>
</div>
</div>
)}
</div>
</div>
{/* Phase 3: Reference Data Syncing */}
<div className="mb-6">
<h3 className="text-lg font-medium text-gray-900 mb-3">Phase 3: Reference Data Syncing</h3>
<button <button
onClick={() => startBulkSeed(2002)} onClick={syncReferences}
disabled={hasRunningJobs} disabled={hasRunningJobs}
className={`px-4 py-2 rounded-md text-white ${ className={`px-4 py-2 rounded-md text-white ${
hasRunningJobs hasRunningJobs
? 'bg-gray-400 cursor-not-allowed' ? 'bg-gray-400 cursor-not-allowed'
: 'bg-blue-600 hover:bg-blue-700' : 'bg-orange-600 hover:bg-orange-700'
}`} }`}
> >
{hasRunningJobs ? 'Processing...' : 'Sync NVD CVEs'} {hasRunningJobs ? 'Processing...' : 'Sync References (Coming Soon)'}
</button>
<button
onClick={startIncrementalUpdate}
disabled={hasRunningJobs}
className={`px-4 py-2 rounded-md text-white ${
hasRunningJobs
? 'bg-gray-400 cursor-not-allowed'
: 'bg-green-600 hover:bg-green-700'
}`}
>
{hasRunningJobs ? 'Processing...' : 'Incremental Update'}
</button>
<button
onClick={() => syncNomiSec()}
disabled={hasRunningJobs}
className={`px-4 py-2 rounded-md text-white ${
hasRunningJobs
? 'bg-gray-400 cursor-not-allowed'
: 'bg-purple-600 hover:bg-purple-700'
}`}
>
{hasRunningJobs ? 'Processing...' : 'Sync nomi-sec PoCs'}
</button>
<button
onClick={() => syncGitHubPocs()}
disabled={hasRunningJobs}
className={`px-4 py-2 rounded-md text-white ${
hasRunningJobs
? 'bg-gray-400 cursor-not-allowed'
: 'bg-green-600 hover:bg-green-700'
}`}
>
{hasRunningJobs ? 'Processing...' : 'Sync GitHub PoCs'}
</button>
<button
onClick={() => regenerateRules()}
disabled={hasRunningJobs}
className={`px-4 py-2 rounded-md text-white ${
hasRunningJobs
? 'bg-gray-400 cursor-not-allowed'
: 'bg-indigo-600 hover:bg-indigo-700'
}`}
>
{hasRunningJobs ? 'Processing...' : 'Regenerate Rules'}
</button>
<button
onClick={() => generateLlmRules()}
disabled={hasRunningJobs || llmStatus.status !== 'ready'}
className={`px-4 py-2 rounded-md text-white ${
hasRunningJobs || llmStatus.status !== 'ready'
? 'bg-gray-400 cursor-not-allowed'
: 'bg-violet-600 hover:bg-violet-700'
}`}
title={llmStatus.status !== 'ready' ? 'LLM not configured' : ''}
>
{hasRunningJobs ? 'Processing...' : 'Generate LLM Rules'}
</button> </button>
</div> </div>
{/* Phase 4: Rule Generation */}
<div>
<h3 className="text-lg font-medium text-gray-900 mb-3">Phase 4: Rule Generation</h3>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<button
onClick={() => regenerateRules()}
disabled={isRuleGenerationRunning()}
className={`px-4 py-2 rounded-md text-white ${
isRuleGenerationRunning()
? 'bg-gray-400 cursor-not-allowed'
: 'bg-indigo-600 hover:bg-indigo-700'
}`}
>
{isRuleGenerationRunning() ? 'Processing...' : 'Regenerate Rules'}
</button>
<button
onClick={() => generateLlmRules()}
disabled={isRuleGenerationRunning() || llmStatus.status !== 'ready'}
className={`px-4 py-2 rounded-md text-white ${
isRuleGenerationRunning() || llmStatus.status !== 'ready'
? 'bg-gray-400 cursor-not-allowed'
: 'bg-violet-600 hover:bg-violet-700'
}`}
title={llmStatus.status !== 'ready' ? 'LLM not configured' : ''}
>
{isRuleGenerationRunning() ? 'Processing...' : 'Generate LLM Rules'}
</button>
</div>
</div>
</div> </div>
{/* LLM Configuration */} {/* LLM Configuration */}