add reference data gathering
This commit is contained in:
parent
c1bbea09fe
commit
3c120462ac
5 changed files with 197 additions and 5 deletions
|
@ -172,7 +172,10 @@ class CISAKEVClient:
|
|||
'required_actions': [],
|
||||
'ransomware_indicators': [],
|
||||
'vulnerability_types': [],
|
||||
'mitigation_techniques': []
|
||||
'mitigation_techniques': [],
|
||||
'technical_details': [],
|
||||
'impact_analysis': [],
|
||||
'urgency_indicators': []
|
||||
}
|
||||
|
||||
# Extract from vulnerability name and description
|
||||
|
@ -247,6 +250,46 @@ class CISAKEVClient:
|
|||
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||
indicators['vulnerability_types'].extend(matches)
|
||||
|
||||
# Technical details extraction
|
||||
technical_patterns = [
|
||||
r'\b(port|service)\s+(\d+)\b',
|
||||
r'\b(protocol)\s+(\w+)\b',
|
||||
r'\b(version)\s+([\d\.]+)\b',
|
||||
r'\b(cve-\d{4}-\d{4,7})\b',
|
||||
r'\b(application|software|system)\s+(\w+)\b'
|
||||
]
|
||||
|
||||
for pattern in technical_patterns:
|
||||
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||
for match in matches:
|
||||
if isinstance(match, tuple):
|
||||
indicators['technical_details'].append(' '.join(match))
|
||||
else:
|
||||
indicators['technical_details'].append(match)
|
||||
|
||||
# Impact analysis
|
||||
impact_keywords = [
|
||||
'critical system', 'data exfiltration', 'system compromise',
|
||||
'unauthorized access', 'privilege escalation', 'lateral movement',
|
||||
'ransomware deployment', 'data encryption', 'service disruption'
|
||||
]
|
||||
|
||||
for keyword in impact_keywords:
|
||||
if keyword in full_text:
|
||||
indicators['impact_analysis'].append(keyword)
|
||||
|
||||
# Urgency indicators
|
||||
urgency_patterns = [
|
||||
r'\b(immediate|urgent|critical|emergency)\b',
|
||||
r'\b(actively exploited|in-the-wild|widespread)\b',
|
||||
r'\b(patch.{0,10}available|fix.{0,10}available)\b',
|
||||
r'\b(due.{0,10}date|deadline|must.{0,10}complete)\b'
|
||||
]
|
||||
|
||||
for pattern in urgency_patterns:
|
||||
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||
indicators['urgency_indicators'].extend(matches)
|
||||
|
||||
# Clean up and deduplicate
|
||||
for key in indicators:
|
||||
indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 2]))
|
||||
|
|
137
backend/main.py
137
backend/main.py
|
@ -57,6 +57,10 @@ class CVE(Base):
|
|||
# nomi-sec PoC fields
|
||||
poc_count = Column(Integer, default=0)
|
||||
poc_data = Column(JSON) # Store nomi-sec PoC metadata
|
||||
# Reference data fields
|
||||
reference_data = Column(JSON) # Store extracted reference content and analysis
|
||||
reference_sync_status = Column(String(20), default='pending') # 'pending', 'processing', 'completed', 'failed'
|
||||
reference_last_synced = Column(TIMESTAMP)
|
||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||
|
||||
|
@ -162,6 +166,12 @@ class CISAKEVSyncRequest(BaseModel):
|
|||
cve_id: Optional[str] = None
|
||||
batch_size: int = 100
|
||||
|
||||
class ReferenceSyncRequest(BaseModel):
|
||||
cve_id: Optional[str] = None
|
||||
batch_size: int = 30
|
||||
max_cves: Optional[int] = None
|
||||
force_resync: bool = False
|
||||
|
||||
class RuleRegenRequest(BaseModel):
|
||||
force: bool = False
|
||||
|
||||
|
@ -1414,6 +1424,133 @@ async def sync_cisa_kev(background_tasks: BackgroundTasks,
|
|||
"batch_size": request.batch_size
|
||||
}
|
||||
|
||||
@app.post("/api/sync-references")
|
||||
async def sync_references(request: ReferenceSyncRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
|
||||
"""Start reference data synchronization"""
|
||||
|
||||
try:
|
||||
from reference_client import ReferenceClient
|
||||
client = ReferenceClient(db)
|
||||
|
||||
# Create job ID
|
||||
job_id = str(uuid.uuid4())
|
||||
|
||||
# Add job to tracking
|
||||
running_jobs[job_id] = {
|
||||
'type': 'reference_sync',
|
||||
'status': 'running',
|
||||
'cve_id': request.cve_id,
|
||||
'batch_size': request.batch_size,
|
||||
'max_cves': request.max_cves,
|
||||
'force_resync': request.force_resync,
|
||||
'started_at': datetime.utcnow()
|
||||
}
|
||||
|
||||
# Create cancellation flag
|
||||
job_cancellation_flags[job_id] = False
|
||||
|
||||
async def sync_task():
|
||||
try:
|
||||
if request.cve_id:
|
||||
# Single CVE sync
|
||||
result = await client.sync_cve_references(request.cve_id)
|
||||
running_jobs[job_id]['result'] = result
|
||||
running_jobs[job_id]['status'] = 'completed'
|
||||
else:
|
||||
# Bulk sync
|
||||
result = await client.bulk_sync_references(
|
||||
batch_size=request.batch_size,
|
||||
max_cves=request.max_cves,
|
||||
cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
|
||||
)
|
||||
running_jobs[job_id]['result'] = result
|
||||
running_jobs[job_id]['status'] = 'completed'
|
||||
|
||||
running_jobs[job_id]['completed_at'] = datetime.utcnow()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Reference sync task failed: {e}")
|
||||
running_jobs[job_id]['status'] = 'failed'
|
||||
running_jobs[job_id]['error'] = str(e)
|
||||
running_jobs[job_id]['completed_at'] = datetime.utcnow()
|
||||
finally:
|
||||
# Clean up cancellation flag
|
||||
job_cancellation_flags.pop(job_id, None)
|
||||
|
||||
background_tasks.add_task(sync_task)
|
||||
|
||||
return {
|
||||
"message": f"Reference sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
|
||||
"status": "started",
|
||||
"job_id": job_id,
|
||||
"cve_id": request.cve_id,
|
||||
"batch_size": request.batch_size,
|
||||
"max_cves": request.max_cves,
|
||||
"force_resync": request.force_resync
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start reference sync: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to start reference sync: {str(e)}")
|
||||
|
||||
@app.get("/api/reference-stats")
|
||||
async def get_reference_stats(db: Session = Depends(get_db)):
|
||||
"""Get reference synchronization statistics"""
|
||||
|
||||
try:
|
||||
from reference_client import ReferenceClient
|
||||
client = ReferenceClient(db)
|
||||
|
||||
# Get sync status
|
||||
status = await client.get_reference_sync_status()
|
||||
|
||||
# Get quality distribution from reference data
|
||||
quality_distribution = {}
|
||||
from sqlalchemy import text
|
||||
cves_with_references = db.query(CVE).filter(
|
||||
text("reference_data::text LIKE '%\"reference_analysis\"%'")
|
||||
).all()
|
||||
|
||||
for cve in cves_with_references:
|
||||
if cve.reference_data and 'reference_analysis' in cve.reference_data:
|
||||
ref_analysis = cve.reference_data['reference_analysis']
|
||||
high_conf_refs = ref_analysis.get('high_confidence_references', 0)
|
||||
total_refs = ref_analysis.get('reference_count', 0)
|
||||
|
||||
if total_refs > 0:
|
||||
quality_ratio = high_conf_refs / total_refs
|
||||
if quality_ratio >= 0.8:
|
||||
quality_tier = 'excellent'
|
||||
elif quality_ratio >= 0.6:
|
||||
quality_tier = 'good'
|
||||
elif quality_ratio >= 0.4:
|
||||
quality_tier = 'fair'
|
||||
else:
|
||||
quality_tier = 'poor'
|
||||
|
||||
quality_distribution[quality_tier] = quality_distribution.get(quality_tier, 0) + 1
|
||||
|
||||
# Get reference type distribution
|
||||
reference_type_distribution = {}
|
||||
for cve in cves_with_references:
|
||||
if cve.reference_data and 'reference_analysis' in cve.reference_data:
|
||||
ref_analysis = cve.reference_data['reference_analysis']
|
||||
ref_types = ref_analysis.get('reference_types', [])
|
||||
for ref_type in ref_types:
|
||||
reference_type_distribution[ref_type] = reference_type_distribution.get(ref_type, 0) + 1
|
||||
|
||||
return {
|
||||
'reference_sync_status': status,
|
||||
'quality_distribution': quality_distribution,
|
||||
'reference_type_distribution': reference_type_distribution,
|
||||
'total_with_reference_analysis': len(cves_with_references),
|
||||
'source': 'reference_extraction'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get reference stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get reference stats: {str(e)}")
|
||||
|
||||
@app.get("/api/exploitdb-stats")
|
||||
async def get_exploitdb_stats(db: Session = Depends(get_db)):
|
||||
"""Get ExploitDB-related statistics"""
|
||||
|
|
|
@ -22,3 +22,4 @@ langchain-community==0.2.0
|
|||
langchain-core>=0.2.20
|
||||
openai>=1.32.0
|
||||
anthropic==0.40.0
|
||||
certifi==2024.2.2
|
||||
|
|
|
@ -248,11 +248,16 @@ function App() {
|
|||
|
||||
const syncReferences = async () => {
|
||||
try {
|
||||
// Placeholder for future implementation
|
||||
console.log('Sync References - Not implemented yet');
|
||||
alert('Sync References functionality will be implemented in a future update');
|
||||
const response = await axios.post('http://localhost:8000/api/sync-references', {
|
||||
batch_size: 30,
|
||||
max_cves: 100,
|
||||
force_resync: false
|
||||
});
|
||||
console.log('Reference sync response:', response.data);
|
||||
fetchData();
|
||||
} catch (error) {
|
||||
console.error('Error syncing references:', error);
|
||||
alert('Error starting reference sync: ' + (error.response?.data?.detail || error.message));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -499,7 +504,7 @@ function App() {
|
|||
: 'bg-orange-600 hover:bg-orange-700'
|
||||
}`}
|
||||
>
|
||||
{hasRunningJobs ? 'Processing...' : 'Sync References (Coming Soon)'}
|
||||
{hasRunningJobs ? 'Processing...' : 'Sync References'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
|
|
6
init.sql
6
init.sql
|
@ -20,6 +20,10 @@ CREATE TABLE cves (
|
|||
-- nomi-sec PoC fields
|
||||
poc_count INTEGER DEFAULT 0,
|
||||
poc_data JSON,
|
||||
-- Reference data fields
|
||||
reference_data JSON,
|
||||
reference_sync_status VARCHAR(20) DEFAULT 'pending',
|
||||
reference_last_synced TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
@ -181,5 +185,7 @@ level: {level}',
|
|||
CREATE INDEX idx_cves_cve_id ON cves(cve_id);
|
||||
CREATE INDEX idx_cves_published_date ON cves(published_date);
|
||||
CREATE INDEX idx_cves_severity ON cves(severity);
|
||||
CREATE INDEX idx_cves_reference_sync_status ON cves(reference_sync_status);
|
||||
CREATE INDEX idx_cves_reference_last_synced ON cves(reference_last_synced);
|
||||
CREATE INDEX idx_sigma_rules_cve_id ON sigma_rules(cve_id);
|
||||
CREATE INDEX idx_sigma_rules_detection_type ON sigma_rules(detection_type);
|
||||
|
|
Loading…
Add table
Reference in a new issue