Initial Commit. Rust program that searches GitHub for PoC exploit code and stores the results in json files
This commit is contained in:
parent
856a5dc7ff
commit
9ba02601a0
10 changed files with 2141 additions and 2 deletions
42
.forgejo/workflows/daily-collect.yaml
Normal file
42
.forgejo/workflows/daily-collect.yaml
Normal file
|
@ -0,0 +1,42 @@
|
|||
name: Daily Feed Update
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run daily at 1:00 AM UTC (adjust the time as needed)
|
||||
- cron: '0 1 * * *'
|
||||
workflow_dispatch:
|
||||
# Allow manual triggering of the workflow
|
||||
|
||||
jobs:
|
||||
update-feed:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
- name: Cache Cargo dependencies
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
target
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
||||
|
||||
- name: Run recent feed update
|
||||
run: cargo run -- --feed recent
|
||||
|
||||
- name: Commit and push results
|
||||
run: |
|
||||
git config user.name bpmcdevitt
|
||||
git config user.email brendan@mcdevitt.tech
|
||||
git add .
|
||||
git commit -m "Update recent feed data" || exit 0
|
||||
git push
|
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
# .gitignore
|
||||
/target
|
||||
.env
|
||||
*.log
|
||||
exploits/*/
|
||||
!exploits/.gitkeep
|
1717
Cargo.lock
generated
Normal file
1717
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
15
Cargo.toml
Normal file
15
Cargo.toml
Normal file
|
@ -0,0 +1,15 @@
|
|||
# Cargo.toml
|
||||
[package]
|
||||
name = "github_poc_collector"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
reqwest = { version = "0.11", features = ["json"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
thiserror = "1.0"
|
||||
flate2 = "1.0"
|
||||
chrono = { version = "0.4.20-rc.1" }
|
||||
clap = { version = "4.4", features = ["derive"] }
|
|
@ -1,3 +1,6 @@
|
|||
# github_poc_collector
|
||||
# Github PoC Exploit Collector
|
||||
|
||||
Collects PoC exploit code on Github
|
||||
## Project Overview
|
||||
A Rust application that searches a GitHub for repositories related to specific CVE IDs and stores results in yearly directories.
|
||||
|
||||
## Project Structure
|
||||
|
|
0
exploits/.gitkeep
Normal file
0
exploits/.gitkeep
Normal file
22
src/error.rs
Normal file
22
src/error.rs
Normal file
|
@ -0,0 +1,22 @@
|
|||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum CollectorError {
|
||||
#[error("Network error: {0}")]
|
||||
NetworkError(#[from] reqwest::Error),
|
||||
|
||||
#[error("Configuration error: {0}")]
|
||||
ConfigError(String),
|
||||
|
||||
#[error("File system error: {0}")]
|
||||
FileSystemError(#[from] std::io::Error),
|
||||
|
||||
#[error("JSON parsing error: {0}")]
|
||||
JsonError(#[from] serde_json::Error),
|
||||
|
||||
#[error("No repositories found for CVE")]
|
||||
NoRepositoriesFound,
|
||||
|
||||
#[error("NVD feed error: {0}")]
|
||||
NvdFeedError(String),
|
||||
}
|
110
src/exploit_collector.rs
Normal file
110
src/exploit_collector.rs
Normal file
|
@ -0,0 +1,110 @@
|
|||
use std::env;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::fs::{self, create_dir_all};
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use chrono::{Utc, Datelike};
|
||||
use crate::error::CollectorError;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GitHubRepository {
|
||||
pub name: String,
|
||||
pub full_name: String,
|
||||
pub html_url: String,
|
||||
pub description: Option<String>,
|
||||
pub stargazers_count: u32,
|
||||
pub forks_count: u32,
|
||||
pub created_at: String,
|
||||
}
|
||||
|
||||
pub struct ExploitCollector {
|
||||
github_token: String,
|
||||
output_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl ExploitCollector {
|
||||
pub fn new() -> Result<Self, CollectorError> {
|
||||
// Create output directory based on the year of the CVE, not current year
|
||||
let output_dir = Path::new("exploits");
|
||||
|
||||
// Ensure the directory exists
|
||||
create_dir_all(&output_dir)?;
|
||||
|
||||
// Get GitHub token from environment
|
||||
let github_token = env::var("GITHUB_ACCESS_TOKEN")
|
||||
.map_err(|_| CollectorError::ConfigError("GitHub access token not set".to_string()))?;
|
||||
|
||||
Ok(Self {
|
||||
github_token,
|
||||
output_dir: output_dir.to_path_buf(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn search_cve_repos(&self, cve_id: &str) -> Result<Vec<GitHubRepository>, CollectorError> {
|
||||
let client = Client::new();
|
||||
|
||||
// GitHub Search API endpoint
|
||||
let search_url = format!(
|
||||
"https://api.github.com/search/repositories?q={}+in:name&sort=stars&order=desc",
|
||||
cve_id
|
||||
);
|
||||
|
||||
let response = client.get(&search_url)
|
||||
.header("Authorization", format!("token {}", self.github_token))
|
||||
.header("Accept", "application/vnd.github.v3+json")
|
||||
.header("User-Agent", "ExploitCollector")
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
// Parse the response
|
||||
let search_result: serde_json::Value = response.json().await?;
|
||||
|
||||
// Extract repositories
|
||||
let mut repos: Vec<GitHubRepository> = search_result
|
||||
.get("items")
|
||||
.and_then(|items| items.as_array())
|
||||
.map(|items_array| {
|
||||
items_array
|
||||
.iter()
|
||||
.filter_map(|item| serde_json::from_value(item.clone()).ok())
|
||||
.collect()
|
||||
})
|
||||
.ok_or(CollectorError::NoRepositoriesFound)?;
|
||||
|
||||
// Modify repositories to ensure full GitHub URL
|
||||
for repo in &mut repos {
|
||||
// Ensure full GitHub URL
|
||||
if !repo.html_url.starts_with("https://github.com/") {
|
||||
repo.html_url = format!("https://github.com/{}", repo.full_name);
|
||||
}
|
||||
}
|
||||
|
||||
if repos.is_empty() {
|
||||
return Err(CollectorError::NoRepositoriesFound);
|
||||
}
|
||||
|
||||
Ok(repos)
|
||||
}
|
||||
|
||||
pub fn save_repositories(&self, cve_id: &str, repos: &[GitHubRepository]) -> Result<(), CollectorError> {
|
||||
// Extract year from CVE ID (assumes CVE-YYYY-XXXX format)
|
||||
let year = cve_id.split('-').nth(1)
|
||||
.map(|y| y.to_string())
|
||||
.unwrap_or_else(|| Utc::now().year().to_string());
|
||||
|
||||
// Create year-specific directory
|
||||
let year_dir = self.output_dir.join(year);
|
||||
create_dir_all(&year_dir)?;
|
||||
|
||||
// Create a JSON file for the CVE repositories
|
||||
let filename = year_dir.join(format!("{}.json", cve_id));
|
||||
|
||||
// Serialize repositories to JSON
|
||||
let json_content = serde_json::to_string_pretty(repos)?;
|
||||
|
||||
// Write to file
|
||||
fs::write(filename, json_content)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
65
src/main.rs
Normal file
65
src/main.rs
Normal file
|
@ -0,0 +1,65 @@
|
|||
mod error;
|
||||
mod nvd_fetcher;
|
||||
mod exploit_collector;
|
||||
|
||||
use clap::Parser;
|
||||
use nvd_fetcher::{NvdCveFetcher, FeedType};
|
||||
use exploit_collector::ExploitCollector;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
/// Type of NVD feed to download
|
||||
#[arg(value_enum, long, default_value_t = FeedType::Recent)]
|
||||
feed: FeedType,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Parse command-line arguments
|
||||
let cli = Cli::parse();
|
||||
|
||||
// Initialize NVD CVE Fetcher
|
||||
let nvd_fetcher = NvdCveFetcher::new()?;
|
||||
|
||||
// Fetch CVEs based on feed type
|
||||
let cves = nvd_fetcher.fetch_cves(cli.feed).await?;
|
||||
|
||||
println!("Fetched {} CVEs", cves.len());
|
||||
|
||||
// Initialize ExploitCollector
|
||||
let collector = ExploitCollector::new()?;
|
||||
|
||||
// Search and collect for each CVE
|
||||
for cve_id in cves {
|
||||
println!("Searching for repositories related to: {}", cve_id);
|
||||
|
||||
match collector.search_cve_repos(&cve_id).await {
|
||||
Ok(repos) => {
|
||||
println!("Found {} repositories", repos.len());
|
||||
|
||||
// Save repositories to file
|
||||
if let Err(e) = collector.save_repositories(&cve_id, &repos) {
|
||||
eprintln!("Error saving repositories for {}: {}", cve_id, e);
|
||||
}
|
||||
|
||||
// Print repository details (optional)
|
||||
for repo in &repos {
|
||||
println!("- {}", repo.full_name);
|
||||
println!(" URL: {}", repo.html_url);
|
||||
println!(" Stars: {}", repo.stargazers_count);
|
||||
if let Some(desc) = &repo.description {
|
||||
println!(" Description: {}", desc);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error searching for {}: {}", cve_id, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("Collection complete.");
|
||||
Ok(())
|
||||
}
|
159
src/nvd_fetcher.rs
Normal file
159
src/nvd_fetcher.rs
Normal file
|
@ -0,0 +1,159 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{self, Read, Write};
|
||||
use reqwest::Client;
|
||||
use flate2::read::GzDecoder;
|
||||
use serde_json::Value;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use chrono::{Utc, Datelike};
|
||||
use crate::error::CollectorError;
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum, Debug)]
|
||||
pub enum FeedType {
|
||||
Recent,
|
||||
Full,
|
||||
}
|
||||
|
||||
pub struct NvdCveFetcher {
|
||||
client: Client,
|
||||
download_dir: PathBuf,
|
||||
}
|
||||
|
||||
impl NvdCveFetcher {
|
||||
pub fn new() -> io::Result<Self> {
|
||||
let download_dir = Path::new("nvd_feeds");
|
||||
create_dir_all(&download_dir)?;
|
||||
|
||||
Ok(Self {
|
||||
client: Client::new(),
|
||||
download_dir: download_dir.to_path_buf(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn fetch_cves(&self, feed_type: FeedType) -> Result<Vec<String>, CollectorError> {
|
||||
match feed_type {
|
||||
FeedType::Recent => self.fetch_recent_cves().await,
|
||||
FeedType::Full => self.fetch_full_historical_cves().await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_recent_cves(&self) -> Result<Vec<String>, CollectorError> {
|
||||
let modified_url = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-modified.json.gz";
|
||||
let recent_url = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-recent.json.gz";
|
||||
|
||||
let mut all_cve_ids = Vec::new();
|
||||
|
||||
// Download and process modified feed
|
||||
let modified_filepath = self.download_feed(modified_url).await?;
|
||||
let modified_data = self.decompress_feed(&modified_filepath)?;
|
||||
let modified_cves = self.extract_cve_ids(&modified_data)?;
|
||||
all_cve_ids.extend(modified_cves);
|
||||
|
||||
// Download and process recent feed
|
||||
let recent_filepath = self.download_feed(recent_url).await?;
|
||||
let recent_data = self.decompress_feed(&recent_filepath)?;
|
||||
let recent_cves = self.extract_cve_ids(&recent_data)?;
|
||||
all_cve_ids.extend(recent_cves);
|
||||
|
||||
// Remove duplicates
|
||||
all_cve_ids.sort_unstable();
|
||||
all_cve_ids.dedup();
|
||||
|
||||
Ok(all_cve_ids)
|
||||
}
|
||||
|
||||
async fn fetch_full_historical_cves(&self) -> Result<Vec<String>, CollectorError> {
|
||||
let current_year = Utc::now().year();
|
||||
let mut all_cve_ids = Vec::new();
|
||||
|
||||
for year in 2002..=current_year {
|
||||
let url = format!(
|
||||
"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{}.json.gz",
|
||||
year
|
||||
);
|
||||
|
||||
println!("Downloading CVE feed for year: {}", year);
|
||||
|
||||
// Download and process the feed
|
||||
let filepath = self.download_feed(&url).await?;
|
||||
let data = self.decompress_feed(&filepath)?;
|
||||
let cves = self.extract_cve_ids(&data)?;
|
||||
|
||||
all_cve_ids.extend(cves);
|
||||
|
||||
// Optional: Add a small delay to prevent overwhelming the server
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
// Remove duplicates
|
||||
all_cve_ids.sort_unstable();
|
||||
all_cve_ids.dedup();
|
||||
|
||||
println!("Total unique CVEs found: {}", all_cve_ids.len());
|
||||
Ok(all_cve_ids)
|
||||
}
|
||||
|
||||
async fn download_feed(&self, url: &str) -> Result<PathBuf, CollectorError> {
|
||||
// Extract filename from URL
|
||||
let filename = url.split('/').last().unwrap_or("feed.json.gz");
|
||||
let filepath = self.download_dir.join(filename);
|
||||
|
||||
// Check if file already exists to avoid re-downloading
|
||||
if filepath.exists() {
|
||||
return Ok(filepath);
|
||||
}
|
||||
|
||||
// Download the file
|
||||
let response = self.client.get(url).send().await?;
|
||||
let bytes = response.bytes().await?;
|
||||
|
||||
// Write downloaded bytes to file
|
||||
let mut file = File::create(&filepath)?;
|
||||
file.write_all(&bytes)?;
|
||||
|
||||
Ok(filepath)
|
||||
}
|
||||
|
||||
fn decompress_feed(&self, filepath: &Path) -> Result<Vec<u8>, CollectorError> {
|
||||
let file = File::open(filepath)?;
|
||||
let mut gz = GzDecoder::new(file);
|
||||
let mut buffer = Vec::new();
|
||||
gz.read_to_end(&mut buffer)?;
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
fn extract_cve_ids(&self, json_data: &[u8]) -> Result<Vec<String>, CollectorError> {
|
||||
let json: Value = serde_json::from_slice(json_data)
|
||||
.map_err(|e| CollectorError::NvdFeedError(e.to_string()))?;
|
||||
|
||||
// Extract CVE IDs from the JSON feed
|
||||
let cve_ids: Vec<String> = json
|
||||
.get("CVE_Items")
|
||||
.and_then(|items| items.as_array())
|
||||
.map(|items_array| {
|
||||
items_array
|
||||
.iter()
|
||||
.filter_map(|item| {
|
||||
item.get("cve")
|
||||
.and_then(|cve| cve.get("CVE_data_meta"))
|
||||
.and_then(|meta| meta.get("ID"))
|
||||
.and_then(|id| id.as_str())
|
||||
.map(|s| s.to_string())
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
Ok(cve_ids)
|
||||
}
|
||||
}
|
||||
|
||||
// Modify main.rs to use CLI parsing
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
/// Type of NVD feed to download
|
||||
#[arg(value_enum, long, default_value_t = FeedType::Recent)]
|
||||
feed: FeedType,
|
||||
}
|
Loading…
Add table
Reference in a new issue