From 3c120462ac9118d3ffeb117358e6147f9035a184 Mon Sep 17 00:00:00 2001 From: bpmcdevitt Date: Thu, 10 Jul 2025 17:30:12 -0500 Subject: [PATCH] add reference data gathering --- backend/cisa_kev_client.py | 45 +++++++++++- backend/main.py | 137 +++++++++++++++++++++++++++++++++++++ backend/requirements.txt | 1 + frontend/src/App.js | 13 ++-- init.sql | 6 ++ 5 files changed, 197 insertions(+), 5 deletions(-) diff --git a/backend/cisa_kev_client.py b/backend/cisa_kev_client.py index 204bde9..cb51228 100644 --- a/backend/cisa_kev_client.py +++ b/backend/cisa_kev_client.py @@ -172,7 +172,10 @@ class CISAKEVClient: 'required_actions': [], 'ransomware_indicators': [], 'vulnerability_types': [], - 'mitigation_techniques': [] + 'mitigation_techniques': [], + 'technical_details': [], + 'impact_analysis': [], + 'urgency_indicators': [] } # Extract from vulnerability name and description @@ -247,6 +250,46 @@ class CISAKEVClient: matches = re.findall(pattern, full_text, re.IGNORECASE) indicators['vulnerability_types'].extend(matches) + # Technical details extraction + technical_patterns = [ + r'\b(port|service)\s+(\d+)\b', + r'\b(protocol)\s+(\w+)\b', + r'\b(version)\s+([\d\.]+)\b', + r'\b(cve-\d{4}-\d{4,7})\b', + r'\b(application|software|system)\s+(\w+)\b' + ] + + for pattern in technical_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + for match in matches: + if isinstance(match, tuple): + indicators['technical_details'].append(' '.join(match)) + else: + indicators['technical_details'].append(match) + + # Impact analysis + impact_keywords = [ + 'critical system', 'data exfiltration', 'system compromise', + 'unauthorized access', 'privilege escalation', 'lateral movement', + 'ransomware deployment', 'data encryption', 'service disruption' + ] + + for keyword in impact_keywords: + if keyword in full_text: + indicators['impact_analysis'].append(keyword) + + # Urgency indicators + urgency_patterns = [ + r'\b(immediate|urgent|critical|emergency)\b', + r'\b(actively exploited|in-the-wild|widespread)\b', + r'\b(patch.{0,10}available|fix.{0,10}available)\b', + r'\b(due.{0,10}date|deadline|must.{0,10}complete)\b' + ] + + for pattern in urgency_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators['urgency_indicators'].extend(matches) + # Clean up and deduplicate for key in indicators: indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 2])) diff --git a/backend/main.py b/backend/main.py index 876b3da..f59bc3a 100644 --- a/backend/main.py +++ b/backend/main.py @@ -57,6 +57,10 @@ class CVE(Base): # nomi-sec PoC fields poc_count = Column(Integer, default=0) poc_data = Column(JSON) # Store nomi-sec PoC metadata + # Reference data fields + reference_data = Column(JSON) # Store extracted reference content and analysis + reference_sync_status = Column(String(20), default='pending') # 'pending', 'processing', 'completed', 'failed' + reference_last_synced = Column(TIMESTAMP) created_at = Column(TIMESTAMP, default=datetime.utcnow) updated_at = Column(TIMESTAMP, default=datetime.utcnow) @@ -162,6 +166,12 @@ class CISAKEVSyncRequest(BaseModel): cve_id: Optional[str] = None batch_size: int = 100 +class ReferenceSyncRequest(BaseModel): + cve_id: Optional[str] = None + batch_size: int = 30 + max_cves: Optional[int] = None + force_resync: bool = False + class RuleRegenRequest(BaseModel): force: bool = False @@ -1414,6 +1424,133 @@ async def sync_cisa_kev(background_tasks: BackgroundTasks, "batch_size": request.batch_size } +@app.post("/api/sync-references") +async def sync_references(request: ReferenceSyncRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)): + """Start reference data synchronization""" + + try: + from reference_client import ReferenceClient + client = ReferenceClient(db) + + # Create job ID + job_id = str(uuid.uuid4()) + + # Add job to tracking + running_jobs[job_id] = { + 'type': 'reference_sync', + 'status': 'running', + 'cve_id': request.cve_id, + 'batch_size': request.batch_size, + 'max_cves': request.max_cves, + 'force_resync': request.force_resync, + 'started_at': datetime.utcnow() + } + + # Create cancellation flag + job_cancellation_flags[job_id] = False + + async def sync_task(): + try: + if request.cve_id: + # Single CVE sync + result = await client.sync_cve_references(request.cve_id) + running_jobs[job_id]['result'] = result + running_jobs[job_id]['status'] = 'completed' + else: + # Bulk sync + result = await client.bulk_sync_references( + batch_size=request.batch_size, + max_cves=request.max_cves, + cancellation_flag=lambda: job_cancellation_flags.get(job_id, False) + ) + running_jobs[job_id]['result'] = result + running_jobs[job_id]['status'] = 'completed' + + running_jobs[job_id]['completed_at'] = datetime.utcnow() + + except Exception as e: + logger.error(f"Reference sync task failed: {e}") + running_jobs[job_id]['status'] = 'failed' + running_jobs[job_id]['error'] = str(e) + running_jobs[job_id]['completed_at'] = datetime.utcnow() + finally: + # Clean up cancellation flag + job_cancellation_flags.pop(job_id, None) + + background_tasks.add_task(sync_task) + + return { + "message": f"Reference sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"), + "status": "started", + "job_id": job_id, + "cve_id": request.cve_id, + "batch_size": request.batch_size, + "max_cves": request.max_cves, + "force_resync": request.force_resync + } + + except Exception as e: + logger.error(f"Failed to start reference sync: {e}") + raise HTTPException(status_code=500, detail=f"Failed to start reference sync: {str(e)}") + +@app.get("/api/reference-stats") +async def get_reference_stats(db: Session = Depends(get_db)): + """Get reference synchronization statistics""" + + try: + from reference_client import ReferenceClient + client = ReferenceClient(db) + + # Get sync status + status = await client.get_reference_sync_status() + + # Get quality distribution from reference data + quality_distribution = {} + from sqlalchemy import text + cves_with_references = db.query(CVE).filter( + text("reference_data::text LIKE '%\"reference_analysis\"%'") + ).all() + + for cve in cves_with_references: + if cve.reference_data and 'reference_analysis' in cve.reference_data: + ref_analysis = cve.reference_data['reference_analysis'] + high_conf_refs = ref_analysis.get('high_confidence_references', 0) + total_refs = ref_analysis.get('reference_count', 0) + + if total_refs > 0: + quality_ratio = high_conf_refs / total_refs + if quality_ratio >= 0.8: + quality_tier = 'excellent' + elif quality_ratio >= 0.6: + quality_tier = 'good' + elif quality_ratio >= 0.4: + quality_tier = 'fair' + else: + quality_tier = 'poor' + + quality_distribution[quality_tier] = quality_distribution.get(quality_tier, 0) + 1 + + # Get reference type distribution + reference_type_distribution = {} + for cve in cves_with_references: + if cve.reference_data and 'reference_analysis' in cve.reference_data: + ref_analysis = cve.reference_data['reference_analysis'] + ref_types = ref_analysis.get('reference_types', []) + for ref_type in ref_types: + reference_type_distribution[ref_type] = reference_type_distribution.get(ref_type, 0) + 1 + + return { + 'reference_sync_status': status, + 'quality_distribution': quality_distribution, + 'reference_type_distribution': reference_type_distribution, + 'total_with_reference_analysis': len(cves_with_references), + 'source': 'reference_extraction' + } + + except Exception as e: + logger.error(f"Failed to get reference stats: {e}") + raise HTTPException(status_code=500, detail=f"Failed to get reference stats: {str(e)}") + @app.get("/api/exploitdb-stats") async def get_exploitdb_stats(db: Session = Depends(get_db)): """Get ExploitDB-related statistics""" diff --git a/backend/requirements.txt b/backend/requirements.txt index 6a03a5d..dc2d6e0 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -22,3 +22,4 @@ langchain-community==0.2.0 langchain-core>=0.2.20 openai>=1.32.0 anthropic==0.40.0 +certifi==2024.2.2 diff --git a/frontend/src/App.js b/frontend/src/App.js index 4480cd1..d98ef26 100644 --- a/frontend/src/App.js +++ b/frontend/src/App.js @@ -248,11 +248,16 @@ function App() { const syncReferences = async () => { try { - // Placeholder for future implementation - console.log('Sync References - Not implemented yet'); - alert('Sync References functionality will be implemented in a future update'); + const response = await axios.post('http://localhost:8000/api/sync-references', { + batch_size: 30, + max_cves: 100, + force_resync: false + }); + console.log('Reference sync response:', response.data); + fetchData(); } catch (error) { console.error('Error syncing references:', error); + alert('Error starting reference sync: ' + (error.response?.data?.detail || error.message)); } }; @@ -499,7 +504,7 @@ function App() { : 'bg-orange-600 hover:bg-orange-700' }`} > - {hasRunningJobs ? 'Processing...' : 'Sync References (Coming Soon)'} + {hasRunningJobs ? 'Processing...' : 'Sync References'} diff --git a/init.sql b/init.sql index 8e9e7d5..defed1d 100644 --- a/init.sql +++ b/init.sql @@ -20,6 +20,10 @@ CREATE TABLE cves ( -- nomi-sec PoC fields poc_count INTEGER DEFAULT 0, poc_data JSON, + -- Reference data fields + reference_data JSON, + reference_sync_status VARCHAR(20) DEFAULT 'pending', + reference_last_synced TIMESTAMP, created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ); @@ -181,5 +185,7 @@ level: {level}', CREATE INDEX idx_cves_cve_id ON cves(cve_id); CREATE INDEX idx_cves_published_date ON cves(published_date); CREATE INDEX idx_cves_severity ON cves(severity); +CREATE INDEX idx_cves_reference_sync_status ON cves(reference_sync_status); +CREATE INDEX idx_cves_reference_last_synced ON cves(reference_last_synced); CREATE INDEX idx_sigma_rules_cve_id ON sigma_rules(cve_id); CREATE INDEX idx_sigma_rules_detection_type ON sigma_rules(detection_type);