Initial Commit. Rust program that searches GitHub for PoC exploit code and stores the results in json files

This commit is contained in:
Brendan McDevitt 2025-03-27 09:33:15 -05:00
parent 856a5dc7ff
commit 9ba02601a0
10 changed files with 2141 additions and 2 deletions

View file

@ -0,0 +1,42 @@
name: Daily Feed Update
on:
schedule:
# Run daily at 1:00 AM UTC (adjust the time as needed)
- cron: '0 1 * * *'
workflow_dispatch:
# Allow manual triggering of the workflow
jobs:
update-feed:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache Cargo dependencies
uses: actions/cache@v3
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Run recent feed update
run: cargo run -- --feed recent
- name: Commit and push results
run: |
git config user.name bpmcdevitt
git config user.email brendan@mcdevitt.tech
git add .
git commit -m "Update recent feed data" || exit 0
git push

6
.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@
# .gitignore
/target
.env
*.log
exploits/*/
!exploits/.gitkeep

1717
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

15
Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
# Cargo.toml
[package]
name = "github_poc_collector"
version = "0.1.0"
edition = "2021"
[dependencies]
reqwest = { version = "0.11", features = ["json"] }
tokio = { version = "1", features = ["full"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "1.0"
flate2 = "1.0"
chrono = { version = "0.4.20-rc.1" }
clap = { version = "4.4", features = ["derive"] }

View file

@ -1,3 +1,6 @@
# github_poc_collector
# Github PoC Exploit Collector
Collects PoC exploit code on Github
## Project Overview
A Rust application that searches a GitHub for repositories related to specific CVE IDs and stores results in yearly directories.
## Project Structure

0
exploits/.gitkeep Normal file
View file

22
src/error.rs Normal file
View file

@ -0,0 +1,22 @@
use thiserror::Error;
#[derive(Debug, Error)]
pub enum CollectorError {
#[error("Network error: {0}")]
NetworkError(#[from] reqwest::Error),
#[error("Configuration error: {0}")]
ConfigError(String),
#[error("File system error: {0}")]
FileSystemError(#[from] std::io::Error),
#[error("JSON parsing error: {0}")]
JsonError(#[from] serde_json::Error),
#[error("No repositories found for CVE")]
NoRepositoriesFound,
#[error("NVD feed error: {0}")]
NvdFeedError(String),
}

110
src/exploit_collector.rs Normal file
View file

@ -0,0 +1,110 @@
use std::env;
use std::path::{Path, PathBuf};
use std::fs::{self, create_dir_all};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use chrono::{Utc, Datelike};
use crate::error::CollectorError;
#[derive(Debug, Serialize, Deserialize)]
pub struct GitHubRepository {
pub name: String,
pub full_name: String,
pub html_url: String,
pub description: Option<String>,
pub stargazers_count: u32,
pub forks_count: u32,
pub created_at: String,
}
pub struct ExploitCollector {
github_token: String,
output_dir: PathBuf,
}
impl ExploitCollector {
pub fn new() -> Result<Self, CollectorError> {
// Create output directory based on the year of the CVE, not current year
let output_dir = Path::new("exploits");
// Ensure the directory exists
create_dir_all(&output_dir)?;
// Get GitHub token from environment
let github_token = env::var("GITHUB_ACCESS_TOKEN")
.map_err(|_| CollectorError::ConfigError("GitHub access token not set".to_string()))?;
Ok(Self {
github_token,
output_dir: output_dir.to_path_buf(),
})
}
pub async fn search_cve_repos(&self, cve_id: &str) -> Result<Vec<GitHubRepository>, CollectorError> {
let client = Client::new();
// GitHub Search API endpoint
let search_url = format!(
"https://api.github.com/search/repositories?q={}+in:name&sort=stars&order=desc",
cve_id
);
let response = client.get(&search_url)
.header("Authorization", format!("token {}", self.github_token))
.header("Accept", "application/vnd.github.v3+json")
.header("User-Agent", "ExploitCollector")
.send()
.await?;
// Parse the response
let search_result: serde_json::Value = response.json().await?;
// Extract repositories
let mut repos: Vec<GitHubRepository> = search_result
.get("items")
.and_then(|items| items.as_array())
.map(|items_array| {
items_array
.iter()
.filter_map(|item| serde_json::from_value(item.clone()).ok())
.collect()
})
.ok_or(CollectorError::NoRepositoriesFound)?;
// Modify repositories to ensure full GitHub URL
for repo in &mut repos {
// Ensure full GitHub URL
if !repo.html_url.starts_with("https://github.com/") {
repo.html_url = format!("https://github.com/{}", repo.full_name);
}
}
if repos.is_empty() {
return Err(CollectorError::NoRepositoriesFound);
}
Ok(repos)
}
pub fn save_repositories(&self, cve_id: &str, repos: &[GitHubRepository]) -> Result<(), CollectorError> {
// Extract year from CVE ID (assumes CVE-YYYY-XXXX format)
let year = cve_id.split('-').nth(1)
.map(|y| y.to_string())
.unwrap_or_else(|| Utc::now().year().to_string());
// Create year-specific directory
let year_dir = self.output_dir.join(year);
create_dir_all(&year_dir)?;
// Create a JSON file for the CVE repositories
let filename = year_dir.join(format!("{}.json", cve_id));
// Serialize repositories to JSON
let json_content = serde_json::to_string_pretty(repos)?;
// Write to file
fs::write(filename, json_content)?;
Ok(())
}
}

65
src/main.rs Normal file
View file

@ -0,0 +1,65 @@
mod error;
mod nvd_fetcher;
mod exploit_collector;
use clap::Parser;
use nvd_fetcher::{NvdCveFetcher, FeedType};
use exploit_collector::ExploitCollector;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
/// Type of NVD feed to download
#[arg(value_enum, long, default_value_t = FeedType::Recent)]
feed: FeedType,
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Parse command-line arguments
let cli = Cli::parse();
// Initialize NVD CVE Fetcher
let nvd_fetcher = NvdCveFetcher::new()?;
// Fetch CVEs based on feed type
let cves = nvd_fetcher.fetch_cves(cli.feed).await?;
println!("Fetched {} CVEs", cves.len());
// Initialize ExploitCollector
let collector = ExploitCollector::new()?;
// Search and collect for each CVE
for cve_id in cves {
println!("Searching for repositories related to: {}", cve_id);
match collector.search_cve_repos(&cve_id).await {
Ok(repos) => {
println!("Found {} repositories", repos.len());
// Save repositories to file
if let Err(e) = collector.save_repositories(&cve_id, &repos) {
eprintln!("Error saving repositories for {}: {}", cve_id, e);
}
// Print repository details (optional)
for repo in &repos {
println!("- {}", repo.full_name);
println!(" URL: {}", repo.html_url);
println!(" Stars: {}", repo.stargazers_count);
if let Some(desc) = &repo.description {
println!(" Description: {}", desc);
}
println!();
}
}
Err(e) => {
eprintln!("Error searching for {}: {}", cve_id, e);
}
}
}
println!("Collection complete.");
Ok(())
}

159
src/nvd_fetcher.rs Normal file
View file

@ -0,0 +1,159 @@
use std::path::{Path, PathBuf};
use std::fs::{create_dir_all, File};
use std::io::{self, Read, Write};
use reqwest::Client;
use flate2::read::GzDecoder;
use serde_json::Value;
use clap::{Parser, ValueEnum};
use chrono::{Utc, Datelike};
use crate::error::CollectorError;
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum, Debug)]
pub enum FeedType {
Recent,
Full,
}
pub struct NvdCveFetcher {
client: Client,
download_dir: PathBuf,
}
impl NvdCveFetcher {
pub fn new() -> io::Result<Self> {
let download_dir = Path::new("nvd_feeds");
create_dir_all(&download_dir)?;
Ok(Self {
client: Client::new(),
download_dir: download_dir.to_path_buf(),
})
}
pub async fn fetch_cves(&self, feed_type: FeedType) -> Result<Vec<String>, CollectorError> {
match feed_type {
FeedType::Recent => self.fetch_recent_cves().await,
FeedType::Full => self.fetch_full_historical_cves().await,
}
}
async fn fetch_recent_cves(&self) -> Result<Vec<String>, CollectorError> {
let modified_url = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-modified.json.gz";
let recent_url = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-recent.json.gz";
let mut all_cve_ids = Vec::new();
// Download and process modified feed
let modified_filepath = self.download_feed(modified_url).await?;
let modified_data = self.decompress_feed(&modified_filepath)?;
let modified_cves = self.extract_cve_ids(&modified_data)?;
all_cve_ids.extend(modified_cves);
// Download and process recent feed
let recent_filepath = self.download_feed(recent_url).await?;
let recent_data = self.decompress_feed(&recent_filepath)?;
let recent_cves = self.extract_cve_ids(&recent_data)?;
all_cve_ids.extend(recent_cves);
// Remove duplicates
all_cve_ids.sort_unstable();
all_cve_ids.dedup();
Ok(all_cve_ids)
}
async fn fetch_full_historical_cves(&self) -> Result<Vec<String>, CollectorError> {
let current_year = Utc::now().year();
let mut all_cve_ids = Vec::new();
for year in 2002..=current_year {
let url = format!(
"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{}.json.gz",
year
);
println!("Downloading CVE feed for year: {}", year);
// Download and process the feed
let filepath = self.download_feed(&url).await?;
let data = self.decompress_feed(&filepath)?;
let cves = self.extract_cve_ids(&data)?;
all_cve_ids.extend(cves);
// Optional: Add a small delay to prevent overwhelming the server
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
}
// Remove duplicates
all_cve_ids.sort_unstable();
all_cve_ids.dedup();
println!("Total unique CVEs found: {}", all_cve_ids.len());
Ok(all_cve_ids)
}
async fn download_feed(&self, url: &str) -> Result<PathBuf, CollectorError> {
// Extract filename from URL
let filename = url.split('/').last().unwrap_or("feed.json.gz");
let filepath = self.download_dir.join(filename);
// Check if file already exists to avoid re-downloading
if filepath.exists() {
return Ok(filepath);
}
// Download the file
let response = self.client.get(url).send().await?;
let bytes = response.bytes().await?;
// Write downloaded bytes to file
let mut file = File::create(&filepath)?;
file.write_all(&bytes)?;
Ok(filepath)
}
fn decompress_feed(&self, filepath: &Path) -> Result<Vec<u8>, CollectorError> {
let file = File::open(filepath)?;
let mut gz = GzDecoder::new(file);
let mut buffer = Vec::new();
gz.read_to_end(&mut buffer)?;
Ok(buffer)
}
fn extract_cve_ids(&self, json_data: &[u8]) -> Result<Vec<String>, CollectorError> {
let json: Value = serde_json::from_slice(json_data)
.map_err(|e| CollectorError::NvdFeedError(e.to_string()))?;
// Extract CVE IDs from the JSON feed
let cve_ids: Vec<String> = json
.get("CVE_Items")
.and_then(|items| items.as_array())
.map(|items_array| {
items_array
.iter()
.filter_map(|item| {
item.get("cve")
.and_then(|cve| cve.get("CVE_data_meta"))
.and_then(|meta| meta.get("ID"))
.and_then(|id| id.as_str())
.map(|s| s.to_string())
})
.collect()
})
.unwrap_or_default();
Ok(cve_ids)
}
}
// Modify main.rs to use CLI parsing
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
/// Type of NVD feed to download
#[arg(value_enum, long, default_value_t = FeedType::Recent)]
feed: FeedType,
}