Initial Commit. Rust program that searches GitHub for PoC exploit code and stores the results in json files
This commit is contained in:
parent
856a5dc7ff
commit
9ba02601a0
10 changed files with 2141 additions and 2 deletions
42
.forgejo/workflows/daily-collect.yaml
Normal file
42
.forgejo/workflows/daily-collect.yaml
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
name: Daily Feed Update
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# Run daily at 1:00 AM UTC (adjust the time as needed)
|
||||||
|
- cron: '0 1 * * *'
|
||||||
|
workflow_dispatch:
|
||||||
|
# Allow manual triggering of the workflow
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
update-feed:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Rust
|
||||||
|
uses: actions-rs/toolchain@v1
|
||||||
|
with:
|
||||||
|
toolchain: stable
|
||||||
|
override: true
|
||||||
|
|
||||||
|
- name: Cache Cargo dependencies
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/.cargo/registry
|
||||||
|
~/.cargo/git
|
||||||
|
target
|
||||||
|
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
||||||
|
|
||||||
|
- name: Run recent feed update
|
||||||
|
run: cargo run -- --feed recent
|
||||||
|
|
||||||
|
- name: Commit and push results
|
||||||
|
run: |
|
||||||
|
git config user.name bpmcdevitt
|
||||||
|
git config user.email brendan@mcdevitt.tech
|
||||||
|
git add .
|
||||||
|
git commit -m "Update recent feed data" || exit 0
|
||||||
|
git push
|
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
# .gitignore
|
||||||
|
/target
|
||||||
|
.env
|
||||||
|
*.log
|
||||||
|
exploits/*/
|
||||||
|
!exploits/.gitkeep
|
1717
Cargo.lock
generated
Normal file
1717
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
15
Cargo.toml
Normal file
15
Cargo.toml
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# Cargo.toml
|
||||||
|
[package]
|
||||||
|
name = "github_poc_collector"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
reqwest = { version = "0.11", features = ["json"] }
|
||||||
|
tokio = { version = "1", features = ["full"] }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
thiserror = "1.0"
|
||||||
|
flate2 = "1.0"
|
||||||
|
chrono = { version = "0.4.20-rc.1" }
|
||||||
|
clap = { version = "4.4", features = ["derive"] }
|
|
@ -1,3 +1,6 @@
|
||||||
# github_poc_collector
|
# Github PoC Exploit Collector
|
||||||
|
|
||||||
Collects PoC exploit code on Github
|
## Project Overview
|
||||||
|
A Rust application that searches a GitHub for repositories related to specific CVE IDs and stores results in yearly directories.
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
0
exploits/.gitkeep
Normal file
0
exploits/.gitkeep
Normal file
22
src/error.rs
Normal file
22
src/error.rs
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum CollectorError {
|
||||||
|
#[error("Network error: {0}")]
|
||||||
|
NetworkError(#[from] reqwest::Error),
|
||||||
|
|
||||||
|
#[error("Configuration error: {0}")]
|
||||||
|
ConfigError(String),
|
||||||
|
|
||||||
|
#[error("File system error: {0}")]
|
||||||
|
FileSystemError(#[from] std::io::Error),
|
||||||
|
|
||||||
|
#[error("JSON parsing error: {0}")]
|
||||||
|
JsonError(#[from] serde_json::Error),
|
||||||
|
|
||||||
|
#[error("No repositories found for CVE")]
|
||||||
|
NoRepositoriesFound,
|
||||||
|
|
||||||
|
#[error("NVD feed error: {0}")]
|
||||||
|
NvdFeedError(String),
|
||||||
|
}
|
110
src/exploit_collector.rs
Normal file
110
src/exploit_collector.rs
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
use std::env;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::fs::{self, create_dir_all};
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use chrono::{Utc, Datelike};
|
||||||
|
use crate::error::CollectorError;
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
|
pub struct GitHubRepository {
|
||||||
|
pub name: String,
|
||||||
|
pub full_name: String,
|
||||||
|
pub html_url: String,
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub stargazers_count: u32,
|
||||||
|
pub forks_count: u32,
|
||||||
|
pub created_at: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ExploitCollector {
|
||||||
|
github_token: String,
|
||||||
|
output_dir: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExploitCollector {
|
||||||
|
pub fn new() -> Result<Self, CollectorError> {
|
||||||
|
// Create output directory based on the year of the CVE, not current year
|
||||||
|
let output_dir = Path::new("exploits");
|
||||||
|
|
||||||
|
// Ensure the directory exists
|
||||||
|
create_dir_all(&output_dir)?;
|
||||||
|
|
||||||
|
// Get GitHub token from environment
|
||||||
|
let github_token = env::var("GITHUB_ACCESS_TOKEN")
|
||||||
|
.map_err(|_| CollectorError::ConfigError("GitHub access token not set".to_string()))?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
github_token,
|
||||||
|
output_dir: output_dir.to_path_buf(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn search_cve_repos(&self, cve_id: &str) -> Result<Vec<GitHubRepository>, CollectorError> {
|
||||||
|
let client = Client::new();
|
||||||
|
|
||||||
|
// GitHub Search API endpoint
|
||||||
|
let search_url = format!(
|
||||||
|
"https://api.github.com/search/repositories?q={}+in:name&sort=stars&order=desc",
|
||||||
|
cve_id
|
||||||
|
);
|
||||||
|
|
||||||
|
let response = client.get(&search_url)
|
||||||
|
.header("Authorization", format!("token {}", self.github_token))
|
||||||
|
.header("Accept", "application/vnd.github.v3+json")
|
||||||
|
.header("User-Agent", "ExploitCollector")
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Parse the response
|
||||||
|
let search_result: serde_json::Value = response.json().await?;
|
||||||
|
|
||||||
|
// Extract repositories
|
||||||
|
let mut repos: Vec<GitHubRepository> = search_result
|
||||||
|
.get("items")
|
||||||
|
.and_then(|items| items.as_array())
|
||||||
|
.map(|items_array| {
|
||||||
|
items_array
|
||||||
|
.iter()
|
||||||
|
.filter_map(|item| serde_json::from_value(item.clone()).ok())
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.ok_or(CollectorError::NoRepositoriesFound)?;
|
||||||
|
|
||||||
|
// Modify repositories to ensure full GitHub URL
|
||||||
|
for repo in &mut repos {
|
||||||
|
// Ensure full GitHub URL
|
||||||
|
if !repo.html_url.starts_with("https://github.com/") {
|
||||||
|
repo.html_url = format!("https://github.com/{}", repo.full_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if repos.is_empty() {
|
||||||
|
return Err(CollectorError::NoRepositoriesFound);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(repos)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn save_repositories(&self, cve_id: &str, repos: &[GitHubRepository]) -> Result<(), CollectorError> {
|
||||||
|
// Extract year from CVE ID (assumes CVE-YYYY-XXXX format)
|
||||||
|
let year = cve_id.split('-').nth(1)
|
||||||
|
.map(|y| y.to_string())
|
||||||
|
.unwrap_or_else(|| Utc::now().year().to_string());
|
||||||
|
|
||||||
|
// Create year-specific directory
|
||||||
|
let year_dir = self.output_dir.join(year);
|
||||||
|
create_dir_all(&year_dir)?;
|
||||||
|
|
||||||
|
// Create a JSON file for the CVE repositories
|
||||||
|
let filename = year_dir.join(format!("{}.json", cve_id));
|
||||||
|
|
||||||
|
// Serialize repositories to JSON
|
||||||
|
let json_content = serde_json::to_string_pretty(repos)?;
|
||||||
|
|
||||||
|
// Write to file
|
||||||
|
fs::write(filename, json_content)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
65
src/main.rs
Normal file
65
src/main.rs
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
mod error;
|
||||||
|
mod nvd_fetcher;
|
||||||
|
mod exploit_collector;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use nvd_fetcher::{NvdCveFetcher, FeedType};
|
||||||
|
use exploit_collector::ExploitCollector;
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
struct Cli {
|
||||||
|
/// Type of NVD feed to download
|
||||||
|
#[arg(value_enum, long, default_value_t = FeedType::Recent)]
|
||||||
|
feed: FeedType,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Parse command-line arguments
|
||||||
|
let cli = Cli::parse();
|
||||||
|
|
||||||
|
// Initialize NVD CVE Fetcher
|
||||||
|
let nvd_fetcher = NvdCveFetcher::new()?;
|
||||||
|
|
||||||
|
// Fetch CVEs based on feed type
|
||||||
|
let cves = nvd_fetcher.fetch_cves(cli.feed).await?;
|
||||||
|
|
||||||
|
println!("Fetched {} CVEs", cves.len());
|
||||||
|
|
||||||
|
// Initialize ExploitCollector
|
||||||
|
let collector = ExploitCollector::new()?;
|
||||||
|
|
||||||
|
// Search and collect for each CVE
|
||||||
|
for cve_id in cves {
|
||||||
|
println!("Searching for repositories related to: {}", cve_id);
|
||||||
|
|
||||||
|
match collector.search_cve_repos(&cve_id).await {
|
||||||
|
Ok(repos) => {
|
||||||
|
println!("Found {} repositories", repos.len());
|
||||||
|
|
||||||
|
// Save repositories to file
|
||||||
|
if let Err(e) = collector.save_repositories(&cve_id, &repos) {
|
||||||
|
eprintln!("Error saving repositories for {}: {}", cve_id, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print repository details (optional)
|
||||||
|
for repo in &repos {
|
||||||
|
println!("- {}", repo.full_name);
|
||||||
|
println!(" URL: {}", repo.html_url);
|
||||||
|
println!(" Stars: {}", repo.stargazers_count);
|
||||||
|
if let Some(desc) = &repo.description {
|
||||||
|
println!(" Description: {}", desc);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Error searching for {}: {}", cve_id, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Collection complete.");
|
||||||
|
Ok(())
|
||||||
|
}
|
159
src/nvd_fetcher.rs
Normal file
159
src/nvd_fetcher.rs
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::fs::{create_dir_all, File};
|
||||||
|
use std::io::{self, Read, Write};
|
||||||
|
use reqwest::Client;
|
||||||
|
use flate2::read::GzDecoder;
|
||||||
|
use serde_json::Value;
|
||||||
|
use clap::{Parser, ValueEnum};
|
||||||
|
use chrono::{Utc, Datelike};
|
||||||
|
use crate::error::CollectorError;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum, Debug)]
|
||||||
|
pub enum FeedType {
|
||||||
|
Recent,
|
||||||
|
Full,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct NvdCveFetcher {
|
||||||
|
client: Client,
|
||||||
|
download_dir: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NvdCveFetcher {
|
||||||
|
pub fn new() -> io::Result<Self> {
|
||||||
|
let download_dir = Path::new("nvd_feeds");
|
||||||
|
create_dir_all(&download_dir)?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
client: Client::new(),
|
||||||
|
download_dir: download_dir.to_path_buf(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn fetch_cves(&self, feed_type: FeedType) -> Result<Vec<String>, CollectorError> {
|
||||||
|
match feed_type {
|
||||||
|
FeedType::Recent => self.fetch_recent_cves().await,
|
||||||
|
FeedType::Full => self.fetch_full_historical_cves().await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_recent_cves(&self) -> Result<Vec<String>, CollectorError> {
|
||||||
|
let modified_url = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-modified.json.gz";
|
||||||
|
let recent_url = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-recent.json.gz";
|
||||||
|
|
||||||
|
let mut all_cve_ids = Vec::new();
|
||||||
|
|
||||||
|
// Download and process modified feed
|
||||||
|
let modified_filepath = self.download_feed(modified_url).await?;
|
||||||
|
let modified_data = self.decompress_feed(&modified_filepath)?;
|
||||||
|
let modified_cves = self.extract_cve_ids(&modified_data)?;
|
||||||
|
all_cve_ids.extend(modified_cves);
|
||||||
|
|
||||||
|
// Download and process recent feed
|
||||||
|
let recent_filepath = self.download_feed(recent_url).await?;
|
||||||
|
let recent_data = self.decompress_feed(&recent_filepath)?;
|
||||||
|
let recent_cves = self.extract_cve_ids(&recent_data)?;
|
||||||
|
all_cve_ids.extend(recent_cves);
|
||||||
|
|
||||||
|
// Remove duplicates
|
||||||
|
all_cve_ids.sort_unstable();
|
||||||
|
all_cve_ids.dedup();
|
||||||
|
|
||||||
|
Ok(all_cve_ids)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_full_historical_cves(&self) -> Result<Vec<String>, CollectorError> {
|
||||||
|
let current_year = Utc::now().year();
|
||||||
|
let mut all_cve_ids = Vec::new();
|
||||||
|
|
||||||
|
for year in 2002..=current_year {
|
||||||
|
let url = format!(
|
||||||
|
"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{}.json.gz",
|
||||||
|
year
|
||||||
|
);
|
||||||
|
|
||||||
|
println!("Downloading CVE feed for year: {}", year);
|
||||||
|
|
||||||
|
// Download and process the feed
|
||||||
|
let filepath = self.download_feed(&url).await?;
|
||||||
|
let data = self.decompress_feed(&filepath)?;
|
||||||
|
let cves = self.extract_cve_ids(&data)?;
|
||||||
|
|
||||||
|
all_cve_ids.extend(cves);
|
||||||
|
|
||||||
|
// Optional: Add a small delay to prevent overwhelming the server
|
||||||
|
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove duplicates
|
||||||
|
all_cve_ids.sort_unstable();
|
||||||
|
all_cve_ids.dedup();
|
||||||
|
|
||||||
|
println!("Total unique CVEs found: {}", all_cve_ids.len());
|
||||||
|
Ok(all_cve_ids)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn download_feed(&self, url: &str) -> Result<PathBuf, CollectorError> {
|
||||||
|
// Extract filename from URL
|
||||||
|
let filename = url.split('/').last().unwrap_or("feed.json.gz");
|
||||||
|
let filepath = self.download_dir.join(filename);
|
||||||
|
|
||||||
|
// Check if file already exists to avoid re-downloading
|
||||||
|
if filepath.exists() {
|
||||||
|
return Ok(filepath);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download the file
|
||||||
|
let response = self.client.get(url).send().await?;
|
||||||
|
let bytes = response.bytes().await?;
|
||||||
|
|
||||||
|
// Write downloaded bytes to file
|
||||||
|
let mut file = File::create(&filepath)?;
|
||||||
|
file.write_all(&bytes)?;
|
||||||
|
|
||||||
|
Ok(filepath)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decompress_feed(&self, filepath: &Path) -> Result<Vec<u8>, CollectorError> {
|
||||||
|
let file = File::open(filepath)?;
|
||||||
|
let mut gz = GzDecoder::new(file);
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
gz.read_to_end(&mut buffer)?;
|
||||||
|
|
||||||
|
Ok(buffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_cve_ids(&self, json_data: &[u8]) -> Result<Vec<String>, CollectorError> {
|
||||||
|
let json: Value = serde_json::from_slice(json_data)
|
||||||
|
.map_err(|e| CollectorError::NvdFeedError(e.to_string()))?;
|
||||||
|
|
||||||
|
// Extract CVE IDs from the JSON feed
|
||||||
|
let cve_ids: Vec<String> = json
|
||||||
|
.get("CVE_Items")
|
||||||
|
.and_then(|items| items.as_array())
|
||||||
|
.map(|items_array| {
|
||||||
|
items_array
|
||||||
|
.iter()
|
||||||
|
.filter_map(|item| {
|
||||||
|
item.get("cve")
|
||||||
|
.and_then(|cve| cve.get("CVE_data_meta"))
|
||||||
|
.and_then(|meta| meta.get("ID"))
|
||||||
|
.and_then(|id| id.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
Ok(cve_ids)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Modify main.rs to use CLI parsing
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
struct Cli {
|
||||||
|
/// Type of NVD feed to download
|
||||||
|
#[arg(value_enum, long, default_value_t = FeedType::Recent)]
|
||||||
|
feed: FeedType,
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue