github_poc_collector/src/exploit_collector.rs

110 lines
3.4 KiB
Rust

use std::env;
use std::path::{Path, PathBuf};
use std::fs::{self, create_dir_all};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use chrono::{Utc, Datelike};
use crate::error::CollectorError;
#[derive(Debug, Serialize, Deserialize)]
pub struct GitHubRepository {
pub name: String,
pub full_name: String,
pub html_url: String,
pub description: Option<String>,
pub stargazers_count: u32,
pub forks_count: u32,
pub created_at: String,
}
pub struct ExploitCollector {
github_token: String,
output_dir: PathBuf,
}
impl ExploitCollector {
pub fn new() -> Result<Self, CollectorError> {
// Create output directory based on the year of the CVE, not current year
let output_dir = Path::new("exploits");
// Ensure the directory exists
create_dir_all(&output_dir)?;
// Get GitHub token from environment
let github_token = env::var("GITHUB_ACCESS_TOKEN")
.map_err(|_| CollectorError::ConfigError("GitHub access token not set".to_string()))?;
Ok(Self {
github_token,
output_dir: output_dir.to_path_buf(),
})
}
pub async fn search_cve_repos(&self, cve_id: &str) -> Result<Vec<GitHubRepository>, CollectorError> {
let client = Client::new();
// GitHub Search API endpoint
let search_url = format!(
"https://api.github.com/search/repositories?q={}+in:name&sort=stars&order=desc",
cve_id
);
let response = client.get(&search_url)
.header("Authorization", format!("token {}", self.github_token))
.header("Accept", "application/vnd.github.v3+json")
.header("User-Agent", "ExploitCollector")
.send()
.await?;
// Parse the response
let search_result: serde_json::Value = response.json().await?;
// Extract repositories
let mut repos: Vec<GitHubRepository> = search_result
.get("items")
.and_then(|items| items.as_array())
.map(|items_array| {
items_array
.iter()
.filter_map(|item| serde_json::from_value(item.clone()).ok())
.collect()
})
.ok_or(CollectorError::NoRepositoriesFound)?;
// Modify repositories to ensure full GitHub URL
for repo in &mut repos {
// Ensure full GitHub URL
if !repo.html_url.starts_with("https://github.com/") {
repo.html_url = format!("https://github.com/{}", repo.full_name);
}
}
if repos.is_empty() {
return Err(CollectorError::NoRepositoriesFound);
}
Ok(repos)
}
pub fn save_repositories(&self, cve_id: &str, repos: &[GitHubRepository]) -> Result<(), CollectorError> {
// Extract year from CVE ID (assumes CVE-YYYY-XXXX format)
let year = cve_id.split('-').nth(1)
.map(|y| y.to_string())
.unwrap_or_else(|| Utc::now().year().to_string());
// Create year-specific directory
let year_dir = self.output_dir.join(year);
create_dir_all(&year_dir)?;
// Create a JSON file for the CVE repositories
let filename = year_dir.join(format!("{}.json", cve_id));
// Serialize repositories to JSON
let json_content = serde_json::to_string_pretty(repos)?;
// Write to file
fs::write(filename, json_content)?;
Ok(())
}
}