diff --git a/docs/censorship-resistance.md b/docs/censorship-resistance.md index d8e9513b..7ae96701 100644 --- a/docs/censorship-resistance.md +++ b/docs/censorship-resistance.md @@ -61,7 +61,10 @@ that limits the number of nodes to 8 for each IP, and uniformly disrtibute these ### Solution -The solution used in this implementation is to store data to all nodes closer to the target than the `expected distance to k (edk)` instead of just the closest `k` nodes. +To circumvent vertical sybil attack, we make sure to store data to as many of the closest nodes -that responded to our GET query- as necessary +to satisfy both the following requirements: + +#### One or more nodes are further from the target than the `expected distance to k (edk)`. To understand what that means, consider that we have a rough estimation of the DHT size (which we obtain as explained in the documentation of the [Dht Size Estimate](./dht_size_estimate.md)), then we can _expect_ that the closest `k` nodes, are going to be @@ -80,6 +83,39 @@ If we store data in all nodes until `edk` (the expected distance of the first 2 Because the nature of the DHT queries, we should expect to get a response from at least one of these honest nodes as we query closer and closer nodes to the target info hash. +#### Minimum number of unique subnets with 6 bits prefix. + +An extreme, and unlikely, but possible way to defeat our `edk` approach to detect vertical sybil attacks, is to DDoS all the honest nodes +and replace them with enough nodes owned by the attacker. + +To find enough nodes to replace the nodes until `edk` the attacker needs ~4 `/8` blocks, or a single `/6` block. + +However, we can make this much more expensive, by keeping track of the number of unique `6 bit prefixes` in each GET query response, +and store data to enough nodes that have enough unique prefixes to match the average from previous queries. + +At the time of writing, this usually means the attacker needs to control up to 12 `/6` blocks. + +## Extreme Vertical Sybil Attacks + +While we are satisfied that this static metrics to circumvent Sybil attacks make them prohibitively expensive, let's consider what +happens in the very unlikely event that an attacker has enough resources and motivation to brute force them both. + +In this case, an attacker acquires a so many Ips in so many subnets that they can both DDoS all the nodes until the expected distance to the 20th node, +and inject at least 20 nodes with as many unique `6 bit` prefix in their IPs as the average of the rest of the network. + +Eventually, the writer will notice that reads after writes (GET after PUT, resolving after publishing) doesn't work for them, which can only be explained +by an extreme targeted censorship attack. + +Once the writer notices this, they can manuaully start publishing to more and more nodes around the target, for example, instead of publishing to closest 20 nodes, +start publishing to closest 200, or 2000, where readers, without any manual intervention will be likely to find the data as they are approaching the target. + +The writer can do that, by making GET queries to random targets that share enough prefix bits with their target, to find more and more nodes around their target, +then store their data to these responding nodes. + +It is unfortunate that the writer needs to react manuaully at all, but given how extreme this attack is, we are satisfied with +the defese being way cheaper than the attack, making the attack not only unsustainable, but also unlikely to happen, given that the attacker knows +it won't be sustainable. + ## Horizontal Sybil Attacks If an attacker can't perform a vertical Sybil attack, it has to run > 20 times the number of current honest nodes to have a good chance of taking over an info hash, diff --git a/src/rpc.rs b/src/rpc.rs index b32d09e4..03e392a5 100644 --- a/src/rpc.rs +++ b/src/rpc.rs @@ -61,8 +61,11 @@ pub struct Rpc { last_table_ping: Instant, /// Closest responding nodes to specific target /// - /// as well as the dht size estimate based on closest claimed nodes, and closest responding nodes. - closest_nodes: LruCache>, f64, f64)>, + /// as well as the: + /// 1. dht size estimate based on closest claimed nodes, + /// 2. dht size estimate based on closest responding nodes. + /// 3. number of subnets with unique 6 bits prefix in ipv4 + closest_nodes: LruCache, // Active Queries queries: HashMap, @@ -74,6 +77,8 @@ pub struct Rpc { dht_size_estimates_sum: f64, /// Sum of Dht size estimates from closest _responding_ nodes from get queries. responders_based_dht_size_estimates_sum: f64, + /// Sum of the number of subnets with 6 bits prefix in the closest nodes ipv4 + subnets: usize, } impl Rpc { @@ -117,7 +122,9 @@ impl Rpc { last_table_ping: Instant::now(), dht_size_estimates_sum: 0.0, - responders_based_dht_size_estimates_sum: 0.0, + // Don't store to too many nodes just because you are in a cold start. + responders_based_dht_size_estimates_sum: 1_000_000.0, + subnets: 20, }) } @@ -210,20 +217,37 @@ impl Rpc { let closest = query.closest(); let responders = query.responders(); - let closest_responding_nodes = responders.nodes_until_edk( + let closest_responding_nodes = responders.take_until_secure( self.responders_based_dht_size_estimates_sum as usize / self.closest_nodes.len().max(1), + self.subnets / self.closest_nodes.len().max(1), ); if self.closest_nodes.len() >= MAX_CACHED_BUCKETS { - if let Some((_, (_, closest, responding))) = self.closest_nodes.pop_lru() { - self.dht_size_estimates_sum -= closest; - self.responders_based_dht_size_estimates_sum -= responding; + if let Some(( + _, + CachedGetQuery { + dht_size_estimate, + responders_dht_size_estimate, + subnets, + .. + }, + )) = self.closest_nodes.pop_lru() + { + self.dht_size_estimates_sum -= dht_size_estimate; + self.responders_based_dht_size_estimates_sum -= + responders_dht_size_estimate; + self.subnets -= subnets as usize }; } - self.dht_size_estimates_sum += closest.dht_size_estimate(); - self.responders_based_dht_size_estimates_sum += responders.dht_size_estimate(); + let dht_size_estimate = closest.dht_size_estimate(); + let responders_dht_size_estimate = responders.dht_size_estimate(); + let subnets_count = closest.subnets_count(); + + self.dht_size_estimates_sum += dht_size_estimate; + self.responders_based_dht_size_estimates_sum += responders_dht_size_estimate; + self.subnets += subnets_count as usize; if let Some(put_query) = self.put_queries.get_mut(id) { put_query.start(&mut self.socket, closest_responding_nodes) @@ -231,11 +255,12 @@ impl Rpc { self.closest_nodes.put( *id, - ( - closest_responding_nodes.into(), - closest.dht_size_estimate(), - responders.dht_size_estimate(), - ), + CachedGetQuery { + closest_nodes: closest_responding_nodes.into(), + dht_size_estimate, + responders_dht_size_estimate, + subnets: subnets_count, + }, ); }; } @@ -314,10 +339,13 @@ impl Rpc { let mut query = PutQuery::new(target, request.clone(), sender); - if let Some((closest_nodes, _, _)) = self + if let Some(closest_nodes) = self .closest_nodes .get(&target) - .filter(|(nodes, _, _)| !nodes.is_empty() && nodes.iter().any(|n| n.valid_token())) + .map(|cached| &cached.closest_nodes) + .filter(|closest_nodes| { + !closest_nodes.is_empty() && closest_nodes.iter().any(|n| n.valid_token()) + }) { query.start(&mut self.socket, closest_nodes) } else { @@ -402,8 +430,8 @@ impl Rpc { query.add_candidate(node) } - if let Some((cached_closest, _, _)) = self.closest_nodes.get(&target) { - for node in cached_closest { + if let Some(CachedGetQuery { closest_nodes, .. }) = self.closest_nodes.get(&target) { + for node in closest_nodes { query.add_candidate(node.clone()) } } @@ -675,6 +703,13 @@ impl Drop for Rpc { } } +struct CachedGetQuery { + closest_nodes: Vec>, + dht_size_estimate: f64, + responders_dht_size_estimate: f64, + subnets: u8, +} + /// Any received message and done queries in the [Rpc::tick]. #[derive(Debug, Clone)] pub struct RpcTickReport { diff --git a/src/rpc/closest_nodes.rs b/src/rpc/closest_nodes.rs index ad77d4f9..1bf54943 100644 --- a/src/rpc/closest_nodes.rs +++ b/src/rpc/closest_nodes.rs @@ -1,4 +1,4 @@ -use std::{convert::TryInto, rc::Rc}; +use std::{collections::HashSet, convert::TryInto, rc::Rc}; use crate::{common::MAX_BUCKET_SIZE_K, Id, Node}; @@ -61,27 +61,57 @@ impl ClosestNodes { self.nodes.is_empty() } - /// Get the closest [K][MAX_BUCKET_SIZE_K] nodes or all the nodes until the - /// expected distance of the Kth node, given a DHT size estimation. - pub fn nodes_until_edk(&self, previous_dht_size_estimate: usize) -> &[Rc] { - let mut until_edk = 0; + /// Take enough nodes closest to the target, until the following are satisfied: + /// 1. At least the closest [k][MAX_BUCKET_SIZE_K]. + /// 2. The last node should be at a distance `edk` which is the expected distance of the 20th + /// node given previous estimations of the DHT size. + /// 3. The number of subnets with uniqu e6 bits prefix in nodes ipv4 addresses match or exceeds + /// the average from previous queries. + /// + /// If one or more of these conditions are not met, then we just take all responding nodes + /// and store data at them. + pub fn take_until_secure( + &self, + previous_dht_size_estimate: usize, + average_subnets: usize, + ) -> &[Rc] { + let mut until_secure = 0; // 20 / dht_size_estimate == expected_dk / ID space // so expected_dk = 20 * ID space / dht_size_estimate let expected_dk = (20.0 * u128::MAX as f64 / (previous_dht_size_estimate as f64 + 1.0)) as u128; + let mut subnets = HashSet::new(); + for node in &self.nodes { let distance = distance(&self.target, node); - if distance >= expected_dk { + subnets.insert(subnet(node)); + + if distance >= expected_dk && subnets.len() >= average_subnets { break; } - until_edk += 1; + until_secure += 1; } - &self.nodes[0..until_edk.max(MAX_BUCKET_SIZE_K).min(self.nodes().len())] + &self.nodes[0..until_secure.max(MAX_BUCKET_SIZE_K).min(self.nodes().len())] + } + + /// Count the number of subnets with unique 6 bits prefix in ipv4 + pub fn subnets_count(&self) -> u8 { + if self.nodes.is_empty() { + return 20; + } + + let mut subnets = HashSet::new(); + + for node in self.nodes.iter().take(MAX_BUCKET_SIZE_K) { + subnets.insert(subnet(node)); + } + + subnets.len() as u8 } /// An estimation of the Dht from the distribution of closest nodes @@ -98,6 +128,13 @@ impl ClosestNodes { } } +fn subnet(node: &Rc) -> u8 { + match node.address().ip() { + std::net::IpAddr::V4(ip) => ((ip.to_bits() >> 26) & 0b0011_1111) as u8, + _ => unimplemented!(), + } +} + fn distance(target: &Id, node: &Node) -> u128 { let xor = node.id.xor(target); @@ -179,7 +216,7 @@ mod tests { } #[test] - fn counter_vertical_sybil_attack() { + fn take_until_expected_distance_to_20th_node() { let target = Id::random(); let dht_size_estimate = 200; @@ -204,7 +241,7 @@ mod tests { closest_nodes.add(node); } - let closest = closest_nodes.nodes_until_edk(dht_size_estimate); + let closest = closest_nodes.take_until_secure(dht_size_estimate, 0); assert!((closest.len() - sybil.nodes().len()) > 10); }