add reference data gathering
This commit is contained in:
parent
c1bbea09fe
commit
3c120462ac
5 changed files with 197 additions and 5 deletions
|
@ -172,7 +172,10 @@ class CISAKEVClient:
|
||||||
'required_actions': [],
|
'required_actions': [],
|
||||||
'ransomware_indicators': [],
|
'ransomware_indicators': [],
|
||||||
'vulnerability_types': [],
|
'vulnerability_types': [],
|
||||||
'mitigation_techniques': []
|
'mitigation_techniques': [],
|
||||||
|
'technical_details': [],
|
||||||
|
'impact_analysis': [],
|
||||||
|
'urgency_indicators': []
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract from vulnerability name and description
|
# Extract from vulnerability name and description
|
||||||
|
@ -247,6 +250,46 @@ class CISAKEVClient:
|
||||||
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||||
indicators['vulnerability_types'].extend(matches)
|
indicators['vulnerability_types'].extend(matches)
|
||||||
|
|
||||||
|
# Technical details extraction
|
||||||
|
technical_patterns = [
|
||||||
|
r'\b(port|service)\s+(\d+)\b',
|
||||||
|
r'\b(protocol)\s+(\w+)\b',
|
||||||
|
r'\b(version)\s+([\d\.]+)\b',
|
||||||
|
r'\b(cve-\d{4}-\d{4,7})\b',
|
||||||
|
r'\b(application|software|system)\s+(\w+)\b'
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in technical_patterns:
|
||||||
|
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||||
|
for match in matches:
|
||||||
|
if isinstance(match, tuple):
|
||||||
|
indicators['technical_details'].append(' '.join(match))
|
||||||
|
else:
|
||||||
|
indicators['technical_details'].append(match)
|
||||||
|
|
||||||
|
# Impact analysis
|
||||||
|
impact_keywords = [
|
||||||
|
'critical system', 'data exfiltration', 'system compromise',
|
||||||
|
'unauthorized access', 'privilege escalation', 'lateral movement',
|
||||||
|
'ransomware deployment', 'data encryption', 'service disruption'
|
||||||
|
]
|
||||||
|
|
||||||
|
for keyword in impact_keywords:
|
||||||
|
if keyword in full_text:
|
||||||
|
indicators['impact_analysis'].append(keyword)
|
||||||
|
|
||||||
|
# Urgency indicators
|
||||||
|
urgency_patterns = [
|
||||||
|
r'\b(immediate|urgent|critical|emergency)\b',
|
||||||
|
r'\b(actively exploited|in-the-wild|widespread)\b',
|
||||||
|
r'\b(patch.{0,10}available|fix.{0,10}available)\b',
|
||||||
|
r'\b(due.{0,10}date|deadline|must.{0,10}complete)\b'
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in urgency_patterns:
|
||||||
|
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||||
|
indicators['urgency_indicators'].extend(matches)
|
||||||
|
|
||||||
# Clean up and deduplicate
|
# Clean up and deduplicate
|
||||||
for key in indicators:
|
for key in indicators:
|
||||||
indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 2]))
|
indicators[key] = list(set([item.strip() for item in indicators[key] if item and len(item.strip()) > 2]))
|
||||||
|
|
137
backend/main.py
137
backend/main.py
|
@ -57,6 +57,10 @@ class CVE(Base):
|
||||||
# nomi-sec PoC fields
|
# nomi-sec PoC fields
|
||||||
poc_count = Column(Integer, default=0)
|
poc_count = Column(Integer, default=0)
|
||||||
poc_data = Column(JSON) # Store nomi-sec PoC metadata
|
poc_data = Column(JSON) # Store nomi-sec PoC metadata
|
||||||
|
# Reference data fields
|
||||||
|
reference_data = Column(JSON) # Store extracted reference content and analysis
|
||||||
|
reference_sync_status = Column(String(20), default='pending') # 'pending', 'processing', 'completed', 'failed'
|
||||||
|
reference_last_synced = Column(TIMESTAMP)
|
||||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||||
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||||
|
|
||||||
|
@ -162,6 +166,12 @@ class CISAKEVSyncRequest(BaseModel):
|
||||||
cve_id: Optional[str] = None
|
cve_id: Optional[str] = None
|
||||||
batch_size: int = 100
|
batch_size: int = 100
|
||||||
|
|
||||||
|
class ReferenceSyncRequest(BaseModel):
|
||||||
|
cve_id: Optional[str] = None
|
||||||
|
batch_size: int = 30
|
||||||
|
max_cves: Optional[int] = None
|
||||||
|
force_resync: bool = False
|
||||||
|
|
||||||
class RuleRegenRequest(BaseModel):
|
class RuleRegenRequest(BaseModel):
|
||||||
force: bool = False
|
force: bool = False
|
||||||
|
|
||||||
|
@ -1414,6 +1424,133 @@ async def sync_cisa_kev(background_tasks: BackgroundTasks,
|
||||||
"batch_size": request.batch_size
|
"batch_size": request.batch_size
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@app.post("/api/sync-references")
|
||||||
|
async def sync_references(request: ReferenceSyncRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
|
||||||
|
"""Start reference data synchronization"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reference_client import ReferenceClient
|
||||||
|
client = ReferenceClient(db)
|
||||||
|
|
||||||
|
# Create job ID
|
||||||
|
job_id = str(uuid.uuid4())
|
||||||
|
|
||||||
|
# Add job to tracking
|
||||||
|
running_jobs[job_id] = {
|
||||||
|
'type': 'reference_sync',
|
||||||
|
'status': 'running',
|
||||||
|
'cve_id': request.cve_id,
|
||||||
|
'batch_size': request.batch_size,
|
||||||
|
'max_cves': request.max_cves,
|
||||||
|
'force_resync': request.force_resync,
|
||||||
|
'started_at': datetime.utcnow()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create cancellation flag
|
||||||
|
job_cancellation_flags[job_id] = False
|
||||||
|
|
||||||
|
async def sync_task():
|
||||||
|
try:
|
||||||
|
if request.cve_id:
|
||||||
|
# Single CVE sync
|
||||||
|
result = await client.sync_cve_references(request.cve_id)
|
||||||
|
running_jobs[job_id]['result'] = result
|
||||||
|
running_jobs[job_id]['status'] = 'completed'
|
||||||
|
else:
|
||||||
|
# Bulk sync
|
||||||
|
result = await client.bulk_sync_references(
|
||||||
|
batch_size=request.batch_size,
|
||||||
|
max_cves=request.max_cves,
|
||||||
|
cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
|
||||||
|
)
|
||||||
|
running_jobs[job_id]['result'] = result
|
||||||
|
running_jobs[job_id]['status'] = 'completed'
|
||||||
|
|
||||||
|
running_jobs[job_id]['completed_at'] = datetime.utcnow()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Reference sync task failed: {e}")
|
||||||
|
running_jobs[job_id]['status'] = 'failed'
|
||||||
|
running_jobs[job_id]['error'] = str(e)
|
||||||
|
running_jobs[job_id]['completed_at'] = datetime.utcnow()
|
||||||
|
finally:
|
||||||
|
# Clean up cancellation flag
|
||||||
|
job_cancellation_flags.pop(job_id, None)
|
||||||
|
|
||||||
|
background_tasks.add_task(sync_task)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": f"Reference sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
|
||||||
|
"status": "started",
|
||||||
|
"job_id": job_id,
|
||||||
|
"cve_id": request.cve_id,
|
||||||
|
"batch_size": request.batch_size,
|
||||||
|
"max_cves": request.max_cves,
|
||||||
|
"force_resync": request.force_resync
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to start reference sync: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to start reference sync: {str(e)}")
|
||||||
|
|
||||||
|
@app.get("/api/reference-stats")
|
||||||
|
async def get_reference_stats(db: Session = Depends(get_db)):
|
||||||
|
"""Get reference synchronization statistics"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from reference_client import ReferenceClient
|
||||||
|
client = ReferenceClient(db)
|
||||||
|
|
||||||
|
# Get sync status
|
||||||
|
status = await client.get_reference_sync_status()
|
||||||
|
|
||||||
|
# Get quality distribution from reference data
|
||||||
|
quality_distribution = {}
|
||||||
|
from sqlalchemy import text
|
||||||
|
cves_with_references = db.query(CVE).filter(
|
||||||
|
text("reference_data::text LIKE '%\"reference_analysis\"%'")
|
||||||
|
).all()
|
||||||
|
|
||||||
|
for cve in cves_with_references:
|
||||||
|
if cve.reference_data and 'reference_analysis' in cve.reference_data:
|
||||||
|
ref_analysis = cve.reference_data['reference_analysis']
|
||||||
|
high_conf_refs = ref_analysis.get('high_confidence_references', 0)
|
||||||
|
total_refs = ref_analysis.get('reference_count', 0)
|
||||||
|
|
||||||
|
if total_refs > 0:
|
||||||
|
quality_ratio = high_conf_refs / total_refs
|
||||||
|
if quality_ratio >= 0.8:
|
||||||
|
quality_tier = 'excellent'
|
||||||
|
elif quality_ratio >= 0.6:
|
||||||
|
quality_tier = 'good'
|
||||||
|
elif quality_ratio >= 0.4:
|
||||||
|
quality_tier = 'fair'
|
||||||
|
else:
|
||||||
|
quality_tier = 'poor'
|
||||||
|
|
||||||
|
quality_distribution[quality_tier] = quality_distribution.get(quality_tier, 0) + 1
|
||||||
|
|
||||||
|
# Get reference type distribution
|
||||||
|
reference_type_distribution = {}
|
||||||
|
for cve in cves_with_references:
|
||||||
|
if cve.reference_data and 'reference_analysis' in cve.reference_data:
|
||||||
|
ref_analysis = cve.reference_data['reference_analysis']
|
||||||
|
ref_types = ref_analysis.get('reference_types', [])
|
||||||
|
for ref_type in ref_types:
|
||||||
|
reference_type_distribution[ref_type] = reference_type_distribution.get(ref_type, 0) + 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
'reference_sync_status': status,
|
||||||
|
'quality_distribution': quality_distribution,
|
||||||
|
'reference_type_distribution': reference_type_distribution,
|
||||||
|
'total_with_reference_analysis': len(cves_with_references),
|
||||||
|
'source': 'reference_extraction'
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to get reference stats: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to get reference stats: {str(e)}")
|
||||||
|
|
||||||
@app.get("/api/exploitdb-stats")
|
@app.get("/api/exploitdb-stats")
|
||||||
async def get_exploitdb_stats(db: Session = Depends(get_db)):
|
async def get_exploitdb_stats(db: Session = Depends(get_db)):
|
||||||
"""Get ExploitDB-related statistics"""
|
"""Get ExploitDB-related statistics"""
|
||||||
|
|
|
@ -22,3 +22,4 @@ langchain-community==0.2.0
|
||||||
langchain-core>=0.2.20
|
langchain-core>=0.2.20
|
||||||
openai>=1.32.0
|
openai>=1.32.0
|
||||||
anthropic==0.40.0
|
anthropic==0.40.0
|
||||||
|
certifi==2024.2.2
|
||||||
|
|
|
@ -248,11 +248,16 @@ function App() {
|
||||||
|
|
||||||
const syncReferences = async () => {
|
const syncReferences = async () => {
|
||||||
try {
|
try {
|
||||||
// Placeholder for future implementation
|
const response = await axios.post('http://localhost:8000/api/sync-references', {
|
||||||
console.log('Sync References - Not implemented yet');
|
batch_size: 30,
|
||||||
alert('Sync References functionality will be implemented in a future update');
|
max_cves: 100,
|
||||||
|
force_resync: false
|
||||||
|
});
|
||||||
|
console.log('Reference sync response:', response.data);
|
||||||
|
fetchData();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error syncing references:', error);
|
console.error('Error syncing references:', error);
|
||||||
|
alert('Error starting reference sync: ' + (error.response?.data?.detail || error.message));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -499,7 +504,7 @@ function App() {
|
||||||
: 'bg-orange-600 hover:bg-orange-700'
|
: 'bg-orange-600 hover:bg-orange-700'
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
{hasRunningJobs ? 'Processing...' : 'Sync References (Coming Soon)'}
|
{hasRunningJobs ? 'Processing...' : 'Sync References'}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
6
init.sql
6
init.sql
|
@ -20,6 +20,10 @@ CREATE TABLE cves (
|
||||||
-- nomi-sec PoC fields
|
-- nomi-sec PoC fields
|
||||||
poc_count INTEGER DEFAULT 0,
|
poc_count INTEGER DEFAULT 0,
|
||||||
poc_data JSON,
|
poc_data JSON,
|
||||||
|
-- Reference data fields
|
||||||
|
reference_data JSON,
|
||||||
|
reference_sync_status VARCHAR(20) DEFAULT 'pending',
|
||||||
|
reference_last_synced TIMESTAMP,
|
||||||
created_at TIMESTAMP DEFAULT NOW(),
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
updated_at TIMESTAMP DEFAULT NOW()
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
);
|
);
|
||||||
|
@ -181,5 +185,7 @@ level: {level}',
|
||||||
CREATE INDEX idx_cves_cve_id ON cves(cve_id);
|
CREATE INDEX idx_cves_cve_id ON cves(cve_id);
|
||||||
CREATE INDEX idx_cves_published_date ON cves(published_date);
|
CREATE INDEX idx_cves_published_date ON cves(published_date);
|
||||||
CREATE INDEX idx_cves_severity ON cves(severity);
|
CREATE INDEX idx_cves_severity ON cves(severity);
|
||||||
|
CREATE INDEX idx_cves_reference_sync_status ON cves(reference_sync_status);
|
||||||
|
CREATE INDEX idx_cves_reference_last_synced ON cves(reference_last_synced);
|
||||||
CREATE INDEX idx_sigma_rules_cve_id ON sigma_rules(cve_id);
|
CREATE INDEX idx_sigma_rules_cve_id ON sigma_rules(cve_id);
|
||||||
CREATE INDEX idx_sigma_rules_detection_type ON sigma_rules(detection_type);
|
CREATE INDEX idx_sigma_rules_detection_type ON sigma_rules(detection_type);
|
||||||
|
|
Loading…
Add table
Reference in a new issue