Initial commit. Data extraction vaguely tested; report writing tasks written but untested

This commit is contained in:
2024-05-04 03:17:42 +02:00
commit ef4b4b7390
9 changed files with 1756 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/target

1106
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

19
Cargo.toml Normal file
View File

@@ -0,0 +1,19 @@
[package]
name = "ascertain"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.82"
base64 = "0.22.1"
chrono = { version = "0.4.38", features = ["serde"] }
futures = "0.3.30"
hex = { version = "0.4.3", features = ["serde"] }
openssl = "0.10.64"
serde = { version = "1.0.200", features = ["derive", "rc"] }
serde_json = "1.0.116"
serde_with = { version = "3.8.1", features = ["base64"] }
thiserror = "1.0.59"
tokio = { version = "1.37.0", features = ["rt-multi-thread", "fs", "io-util", "net", "sync", "time", "macros", "parking_lot"] }
toml = "0.8.12"
tracing = { version = "0.1.40" }

17
src/bin/cert_dump.rs Normal file
View File

@@ -0,0 +1,17 @@
use std::io::Read;
use openssl::x509::X509;
pub fn main() -> anyhow::Result<()> {
let mut buf = Vec::new();
std::io::stdin().read_to_end(&mut buf)?;
let cert = if buf.starts_with(b"-----BEGIN CERTIFICATE-----") {
openssl::x509::X509::from_pem(buf.as_slice())?
} else {
X509::from_der(buf.as_slice())?
};
eprint!("{}", toml::to_string(&ascertain::report::CertInfo::extract(cert.as_ref())?)?);
Ok(())
}

277
src/config.rs Normal file
View File

@@ -0,0 +1,277 @@
use std::collections::HashSet;
use std::fmt::Formatter;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::num::NonZeroU16;
use std::ops::Add;
use std::path::Path;
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use serde::{de, Deserialize, Deserializer};
use serde::de::{Error, Unexpected};
use tokio::sync::Semaphore;
use crate::report::JsonConfig;
#[derive(Copy, Clone, Debug)]
pub enum IpRange {
V4{
start: Ipv4Addr,
end: Ipv4Addr,
},
V6{
start: Ipv6Addr,
end: Ipv6Addr,
}
}
impl IpRange {
pub fn contains(&self, addr: IpAddr) -> bool {
match (self, addr) {
(Self::V4{start, end}, IpAddr::V4(addr)) => &addr >= start && &addr <= end,
(Self::V6{start, end}, IpAddr::V6(addr)) => &addr >= start && &addr <= end,
(_, _) => false,
}
}
pub fn empty(&self) -> bool {
match self {
IpRange::V4 { start, end } => {start > end}
IpRange::V6 { start, end } => {start > end}
}
}
/// Actually returns size-1, as 0 can be checked via empty and the maximum (::/0) doesn't fit in u128
pub fn size_128(&self) -> u128 {
match self {
IpRange::V4 { start, end } => {
if start <= end {
(u32::from_be_bytes(start.octets()) as u128) - (u32::from_be_bytes(start.octets()) as u128)
} else {
0
}
}
IpRange::V6 { start, end } => {
if start <= end {
u128::from_be_bytes(start.octets()) - u128::from_be_bytes(end.octets())
} else {
0
}
}
}
}
}
impl Iterator for IpRange {
type Item = IpAddr;
fn next(&mut self) -> Option<Self::Item> {
match self {
IpRange::V4{start, end} => {
if *start <= *end {
let ret = *start;
*start = u32::from_be_bytes(start.octets()).add(1).to_be_bytes().into();
Some(IpAddr::V4(ret))
} else { None }
},
IpRange::V6 {start, end } => {
if *start <= *end {
let ret = *start;
*start = u128::from_be_bytes(start.octets()).add(1).to_be_bytes().into();
Some(IpAddr::V6(ret))
} else { None }
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
if self.empty() {
(0, Some(0))
} else {
let sz = self.size_128();
if sz >= usize::MAX as u128 {
(usize::MAX, None)
} else {
let sz = sz as usize;
(sz+1, Some(sz+1))
}
}
}
// TODO: other methods could have more efficient implementations, but I don't think we need them
}
impl<'de> Deserialize<'de> for IpRange {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> {
struct IpRangeVisitor;
impl<'de> de::Visitor<'de> for IpRangeVisitor {
type Value = IpRange;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
write!(formatter, "ip, ip/prefix, or ip-ip")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> where E: Error {
if let Some((network, pfx)) = v.split_once('/') {
// TODO: check for likely misconfiguration
let network = IpAddr::from_str(network).map_err(|_| E::invalid_value(Unexpected::Str(v), &self))?;
let prefix = u32::from_str_radix(pfx, 10).map_err(|_| E::invalid_value(Unexpected::Str(v), &self))?;
match network {
IpAddr::V4(addr) => {
let addr: u32 = addr.into();
if prefix > 32 {
return Err(E::invalid_value(Unexpected::Str(v), &self))
} else if prefix == 0 {
// This is probably an error, but trust the user ¯\_(ツ)_/¯
return Ok(IpRange::V4 {start: Ipv4Addr::from(1), end: Ipv4Addr::from(!1)})
}
let mask = (1 << (32-prefix)) - 1;
return Ok(IpRange::V4 { start: Ipv4Addr::from(addr & !mask), end: Ipv4Addr::from(addr | mask) })
}
IpAddr::V6(addr) => {
let addr: u128 = addr.into();
if prefix > 128 {
return Err(E::invalid_value(Unexpected::Str(v), &self))
} else if prefix == 0 {
// This is *definitely* an error, but trust the user ¯\_(ツ)_/¯
return Ok(IpRange::V6 {start: Ipv6Addr::from(1), end: Ipv6Addr::from(!1)})
}
let mask = (1 << (128-prefix as u128)) - 1;
return Ok(IpRange::V6 { start: Ipv6Addr::from(addr & !mask), end: Ipv6Addr::from(addr | mask) })
}
}
} else if let Some((n1, n2)) = v.split_once('-') {
let (start, end) = IpAddr::from_str(n1)
.and_then(|start| Ok((start, IpAddr::from_str(n2)?)))
.map_err(|_| E::invalid_value(Unexpected::Str(v), &self))?;
match (start, end) {
(IpAddr::V4(start), IpAddr::V4(end)) => Ok(IpRange::V4 {start, end}),
(IpAddr::V6(start), IpAddr::V6(end)) => Ok(IpRange::V6 {start, end}),
_ => Err(E::invalid_value(Unexpected::Str(v), &self))
}
} else {
match IpAddr::from_str(v) {
Err(_) => Err(E::invalid_value(Unexpected::Str(v), &self)),
Ok(IpAddr::V4(addr)) => Ok(IpRange::V4 {start: addr, end: addr}),
Ok(IpAddr::V6(addr)) => Ok(IpRange::V6 {start: addr, end: addr}),
}
}
}
}
deserializer.deserialize_str(IpRangeVisitor)
}
}
#[derive(Deserialize)]
pub struct TargetConfig {
/// List of IP address ranges to scan. Required
#[serde(default)]
pub hosts: Vec<IpRange>,
/// List of ports to scan on each host. Required
#[serde(default)]
pub ports: Vec<u16>,
/// How many probes are allowed in parallel per host
#[serde(default)]
pub host_parallelism: Option<usize>,
/// How many probes are allowed in parallel, globally
#[serde(default)]
pub global_parallelism: Option<usize>,
/// How long to wait for a connection, in seconds.
#[serde(default)]
pub connect_timeout: Option<f32>,
/// How long to wait for the TLS handshake, in seconds.
#[serde(default)]
pub handshake_timeout: Option<f32>,
/// How many attempts to make to contact a host
#[serde(default)]
pub retry_count: Option<u32>,
/// Port to use to check host liveness.
#[serde(default)]
pub host_live_port: u16,
}
#[derive(Deserialize)]
#[serde(rename_all="lowercase", tag="format")]
pub enum OutputFormat {
Json(JsonConfig)
}
#[derive(Deserialize)]
pub struct TopConfig {
pub targets: TargetConfig,
pub output: OutputFormat,
}
pub struct Host {
pub ip: IpAddr,
pub semaphore: Arc<Semaphore>,
pub ports: Vec<u16>,
pub live_port: Option<NonZeroU16>,
}
/// The subset of config needed by probe tasks.
#[derive(Clone)]
pub struct ProbeConfig {
pub retry_count: u32,
pub connect_timeout: Duration,
pub handshake_timeout: Duration,
pub global_semaphore: Arc<Semaphore>,
}
pub struct Config {
pub hosts: Vec<Host>,
pub probe_config: ProbeConfig,
}
pub fn load_config(path: impl AsRef<Path>) -> anyhow::Result<Config> {
let content = std::fs::read_to_string(path)?;
let top_config = toml::from_str::<TopConfig>(&content)?;
let target_config = top_config.targets;
let host_parallelism = target_config.host_parallelism.unwrap_or(5); // conservative default
// Construct the list of hosts and host/port pairs.
let mut hosts_seen = HashSet::new();
let mut hosts = Vec::new();
let mut probes: usize = 0;
for range in target_config.hosts {
for ip in range {
if !hosts_seen.insert(ip) {
continue;
}
hosts.push(Host {
ip,
live_port: NonZeroU16::new(target_config.host_live_port),
semaphore: Arc::new(Semaphore::new(host_parallelism)),
ports: target_config.ports.clone(),
});
probes += target_config.ports.len();
}
}
// Configure the reporter
let (reporter, backend) = crate::report::configure_backend(top_config.output)?;
let probe_config = ProbeConfig {
retry_count: target_config.retry_count.unwrap_or(1),
connect_timeout: Duration::from_secs_f32(target_config.handshake_timeout.unwrap_or(5.)),
handshake_timeout: Duration::from_secs_f32(target_config.handshake_timeout.unwrap_or(5.)),
// 900 is a sane default in case we're crossing a NAT boundary; if not, this can safely be 100's
// of thousands, depending on system resources.
global_semaphore: Arc::new(Semaphore::new(target_config.global_parallelism.unwrap_or(900))),
};
Ok(Config{
hosts,
probe_config,
})
}

2
src/lib.rs Normal file
View File

@@ -0,0 +1,2 @@
pub mod config;
pub mod report;

10
src/main.rs Normal file
View File

@@ -0,0 +1,10 @@
use ascertain::{report, config};
pub mod scanner;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
eprintln!("{}", std::mem::size_of::<config::Host>());
Ok(())
}

320
src/report.rs Normal file
View File

@@ -0,0 +1,320 @@
use std::borrow::Cow;
use std::collections::HashSet;
use std::future::Future;
use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr};
use std::path::PathBuf;
use std::sync::Arc;
use base64::Engine;
use base64::prelude::BASE64_STANDARD;
use chrono::{Local, Utc};
use openssl::asn1::{Asn1Time, Asn1TimeRef};
use openssl::hash::MessageDigest;
use openssl::pkey::{Id, PKeyRef, Public};
use openssl::stack::StackRef;
use openssl::x509::{GeneralNameRef, X509, X509NameEntryRef, X509NameRef, X509Ref};
use serde::{Deserialize, Serialize};
use serde_with::{base64::Base64, serde_as};
use thiserror::Error;
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::sync::{mpsc, RwLock};
use tokio::sync::mpsc::Sender;
use tracing::{error, warn};
use crate::config::OutputFormat;
#[derive(Error, Debug, Serialize, Copy, Clone)]
pub enum ReportError {
#[error("Connection timed out")]
ConnectionTimeout,
#[error("Connection refused")]
ConnectionRefused,
#[error("Handshake timed out")]
HandshakeTimeout,
#[error("TLS Protocol error: probably not a TLS server")]
ProtocolError,
}
#[derive(Serialize, Debug, Clone)]
#[serde(rename="lowercase", tag="status", content="report")]
pub enum ReportPayload {
Success{
certificate: CertInfo,
// TODO: define fields for SSL implementation
},
Error{msg: ReportError},
}
#[derive(Serialize, Debug, Clone)]
pub struct ProbeReport {
pub host: SocketAddr,
pub scan_date: chrono::DateTime<Local>,
#[serde(flatten)]
pub result: ReportPayload
}
#[serde_as]
#[derive(Serialize, Debug, Clone)]
pub struct CertInfo {
#[serde_as(as="Base64")]
pub cert_digest: Vec<u8>,
#[serde_as(as="Base64")]
pub issuer_subject_der: Vec<u8>,
pub issuer_subject: Vec<String>,
#[serde_as(as="Base64")]
pub certificate_der: Vec<u8>,
pub subject: Vec<String>,
#[serde_as(as="Base64")]
pub subject_der: Vec<u8>,
pub san: Vec<String>,
pub not_before: chrono::DateTime<Utc>,
pub not_after: chrono::DateTime<Utc>,
pub key_type: String,
pub signature_type: String,
#[serde(with="hex")]
pub authority_key_id: Vec<u8>,
#[serde(with="hex")]
pub subject_key_id: Vec<u8>,
}
fn asn1time_to_datetime(date: &Asn1TimeRef) -> anyhow::Result<chrono::DateTime<Utc>> {
let res = Asn1Time::from_unix(0).unwrap().diff(date)?;
let timestamp = res.days as i64 * 86400 + res.secs as i64;
chrono::DateTime::from_timestamp(timestamp, 0)
.ok_or(anyhow::anyhow!("Constructing timestamp failed"))
}
fn describe_key(key: &PKeyRef<Public>) -> String {
match key.id() {
Id::RSA => format!("RSA-{}", key.bits()),
Id::RSA_PSS => format!("RSA-PSS-{}", key.bits()),
Id::DSA => format!("DSA-{}", key.bits()),
Id::EC => format!("EC-P{}", key.bits()),
Id::ED25519 => "Ed25519".to_owned(),
Id::ED448 => "Ed448".to_owned(),
id => format!("UNKNOWN-{}", id.as_raw()),
}
}
impl CertInfo {
pub fn extract(data: &X509Ref) -> anyhow::Result<Self> {
let md = MessageDigest::sha256();
let cert_digest = data.digest(md)?.to_vec();
let issuer_subject = data.subject_name();
Ok(CertInfo {
cert_digest,
issuer_subject_der: issuer_subject.to_der()?,
issuer_subject: issuer_subject.entries().map(format_x509_name_entry).collect(),
certificate_der: data.to_der()?,
subject: data.subject_name().entries().map(format_x509_name_entry).collect(),
subject_der: data.subject_name().to_der()?,
san: data.subject_alt_names()
.map(|stack| stack.iter().map(format_general_name).collect())
.unwrap_or(Vec::new()),
not_before: asn1time_to_datetime(data.not_before())?,
not_after: asn1time_to_datetime(data.not_after())?,
key_type: describe_key(data.public_key()?.as_ref()),
signature_type: data.signature_algorithm().object().nid().short_name()?.to_owned(),
authority_key_id: data.authority_key_id().map_or(Vec::new(), |id| id.as_slice().to_vec()),
subject_key_id: data.subject_key_id().map_or(Vec::new(), |id| id.as_slice().to_vec())
})
}
}
#[derive(Deserialize)]
pub struct JsonConfig {
/// File which receives discovered certificates.
output_file: PathBuf,
/// File which receives discovered issuer certificates. Optional; if not included, do not store issuers.
#[serde(default)]
issuer_file: Option<PathBuf>,
/// Enable an outer container so that the output file contains a single JSON list.
/// If disabled (default), the file is in json-lines format.
#[serde(default)]
container: bool,
}
#[allow(unused)]
#[derive(Clone)]
pub struct Reporter {
issuer_chan: Sender<X509>,
report_chan: Sender<ProbeReport>,
digests: Arc<RwLock<HashSet<Vec<u8>>>>,
collect_digests: bool,
}
#[derive(Error, Debug)]
pub enum ReportingError {
#[error("Report formatter terminated")]
ReportFormatterFailed,
}
fn format_x509_name_entry(entry: &X509NameEntryRef) -> String {
let name = entry.object().nid().short_name()
.map(Cow::Borrowed)
.unwrap_or_else(|_| Cow::Owned(format!("{:?}", entry.object())));
let value = entry.data().as_utf8().map(|data| data.to_string()).unwrap_or_else(|_|
BASE64_STANDARD.encode(entry.data().as_slice())
);
format!("{name}={value}")
}
fn format_x509_name(name: &X509NameRef) -> String {
let mut result = "".to_owned();
for entry in name.entries() {
if !result.is_empty() {
result.push_str(", ")
}
result.push_str(format_x509_name_entry(entry).as_ref());
}
result
}
fn format_general_name(name: &GeneralNameRef) -> String {
// TODO: there's other types that aren't supported by the safe wrapper.
if let Some(name) = name.email() {
return format!("EMAIL:{name}")
} else if let Some(name) = name.dnsname() {
return format!("DNS:{name}")
} else if let Some(name) = name.uri() {
return format!("URI:{name}")
} else if let Some(ip) = name.ipaddress() {
match ip.len() {
4 => format!("IP:{}", Ipv4Addr::from(<[u8;4]>::try_from(ip).unwrap())),
16 => format!("IP:{}", Ipv6Addr::from(<[u8;16]>::try_from(ip).unwrap())),
_ => format!("IPx:{}", hex::encode(ip))
}
} else if let Some(dn) = name.directory_name() {
format!("DN:{}", format_x509_name(dn))
} else {
"UNKNOWN".to_string()
}
}
impl Reporter {
pub async fn report_issuers(&self, issuers: &StackRef<X509>) {
if !self.collect_digests {
return;
}
// TODO This is always in whatever order the client returns the certificates :-/
for issuer in issuers.iter() {
self.note_issuer(issuer).await;
}
}
async fn note_issuer(&self, x509: &X509Ref) -> Option<()> {
let der_digest = x509.digest(MessageDigest::sha256()).ok()?.to_vec();
// We try with a read lock first in order to increase parallelism.
// Most of the time, the cert will already be in the store.
let already = self.digests.read().await.contains(der_digest.as_slice());
if !already {
if self.digests.write().await.insert(der_digest) {
if self.issuer_chan.send(x509.to_owned()).await.is_err() {
}
}
}
Some(())
}
pub async fn report_probe(&self, report: ProbeReport) -> Result<(), ReportingError> {
if self.report_chan.send(report).await.is_err() {
error!("Report formatter has exited early");
Err(ReportingError::ReportFormatterFailed)
} else {
Ok(())
}
}
}
fn start_json(config: JsonConfig) -> anyhow::Result<(impl Future<Output=()>+Send, Reporter)> {
let (issuer_send, mut issuer_recv) = mpsc::channel::<X509>(5);
let (report_send, mut report_recv) = mpsc::channel(5);
let report_file = tokio::fs::File::from_std(std::fs::File::create(config.output_file)?);
let issuer_writer = config.issuer_file.map(std::fs::File::create).transpose()?.map(tokio::fs::File::from_std);
let has_issuer = issuer_writer.is_some();
let container = config.container;
let issuer_fut = async move {
if let Some(issuer_file) = issuer_writer {
let mut first_record = true;
let mut issuer_file = tokio::io::BufWriter::new(issuer_file);
if container {
issuer_file.write_u8(b'[').await?;
}
while let Some(issuer) = issuer_recv.recv().await {
match CertInfo::extract(issuer.as_ref()) {
Ok(info) => {
if container && !first_record {
issuer_file.write_u8(b',').await?;
first_record = false;
}
let info = serde_json::to_vec(&info)?;
issuer_file.write_all(info.as_slice()).await?;
issuer_file.write_u8(b'\n').await?;
}
Err(err) => {
warn!(%err, "Failed to extract data from certificate")
}
}
}
if container {
issuer_file.write_all(b"]").await?;
}
issuer_file.flush().await?;
}
Ok(()) as anyhow::Result<()>
};
let report_fut = async move {
let mut file = BufWriter::new(report_file);
if container {
file.write_u8(b'[').await?;
}
let mut first_record = true;
while let Some(report) = report_recv.recv().await {
if container && !first_record {
file.write_u8(b',').await?;
first_record = false;
}
let json = serde_json::to_vec(&report)?;
file.write_all(json.as_slice()).await?;
file.write_u8(b'\n').await?;
}
if container {
file.write_u8(b']').await?;
}
file.flush().await?;
Ok(()) as anyhow::Result<()>
};
let task = async move {
let (reporter, issuer) = futures::future::join(report_fut, issuer_fut).await;
if let Err(error) = reporter {
error!(%error, "Report writer failed")
}
if let Err(error) = issuer {
error!(%error, "Issuer certificate writer failed")
}
};
let reporter = Reporter {
issuer_chan: issuer_send,
report_chan: report_send,
digests: Arc::new(Default::default()),
collect_digests: has_issuer,
};
Ok((task, reporter))
}
/// Configure the reporting backend
pub(crate) fn configure_backend(config: OutputFormat) -> anyhow::Result<(impl Future<Output=()>+Send, Reporter)> {
match config {
OutputFormat::Json(json) => start_json(json)
}
}

4
src/scanner.rs Normal file
View File

@@ -0,0 +1,4 @@
use crate::config::Config;
pub fn scan(config: Config) {
}