Skip to content

Commit

Permalink
reorganizing
Browse files Browse the repository at this point in the history
  • Loading branch information
ciminilorenzo committed Nov 11, 2024
1 parent bdd3ba1 commit 60ed7c3
Show file tree
Hide file tree
Showing 16 changed files with 250 additions and 232 deletions.
5 changes: 3 additions & 2 deletions benches/benchmarks/model4encoder_building.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use criterion::{criterion_group, BatchSize, Criterion};
use dsi_bitstream::prelude::BE;
use folded_streaming_rans::bvgraph::mock_writers::{EntropyEstimator, Log2Estimator};
use folded_streaming_rans::bvgraph::writer::BVGraphModelBuilder;
use folded_streaming_rans::bvgraph::estimators::entropy_estimator::EntropyEstimator;
use folded_streaming_rans::bvgraph::writers::bvgraph_model_builder::BVGraphModelBuilder;
use pprof::criterion::{Output, PProfProfiler};
use webgraph::graphs::{BVComp, BVGraph};
use webgraph::prelude::SequentialLabeling;
use folded_streaming_rans::bvgraph::estimators::log2_estimator::Log2Estimator;

fn model4encoder_building_bench(c: &mut Criterion) {
let graph = BVGraph::with_basename("tests/data/cnr-2000/cnr-2000")
Expand Down
2 changes: 1 addition & 1 deletion src/ans/model4encoder_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl ANSModel4EncoderBuilder {
false => n.next_power_of_two(),
};

// We need the list of symbols' indexes sorted by the frequency of the related
// We need the list of symbols indexes sorted by the frequency of the related
// symbol, in ascending order.
let sorted_indexes = folded_sym_freqs
.iter()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::convert::Infallible;
use webgraph::prelude::Encode;
use std::convert::Infallible;
use crate::ans::model4encoder::ANSModel4Encoder;
use crate::{Freq, Symbol, MAX_RAW_SYMBOL};
use crate::bvgraph::BVGraphComponent;
use crate::utils::ans_utils::fold_without_streaming_out;
use crate::{Freq, Symbol, MAX_RAW_SYMBOL};

#[derive(Clone)]
pub struct EntropyEstimator {
Expand All @@ -18,6 +18,7 @@ pub struct EntropyEstimator {
}

impl EntropyEstimator {

pub fn new(model: &ANSModel4Encoder, component_args: Vec<(usize, usize)>) -> Self {
let mut folding_thresholds = Vec::new();
let mut folding_offsets = Vec::new();
Expand Down Expand Up @@ -151,66 +152,4 @@ impl Encode for EntropyEstimator {
fn end_node(&mut self, _node: usize) -> Result<usize, Self::Error> {
Ok(0)
}
}

/// An estimator that simply returns the cost of each symbol calculated as the log2 of the value plus 2.
#[derive(Clone, Default)]
pub struct Log2Estimator {}

impl Log2Estimator {
fn get_symbol_cost(&self, value: u64, _component: BVGraphComponent) -> usize {
u64::ilog2(value + 2) as usize
}
}

impl Encode for Log2Estimator {
type Error = Infallible;

fn start_node(&mut self, _node: usize) -> Result<usize, Self::Error> {
Ok(0)
}

fn write_outdegree(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::Outdegree))
}

fn write_reference_offset(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::ReferenceOffset))
}

fn write_block_count(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::BlockCount))
}

fn write_block(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::Blocks))
}

fn write_interval_count(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::IntervalCount))
}

fn write_interval_start(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::IntervalStart))
}

fn write_interval_len(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::IntervalLen))
}

fn write_first_residual(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::FirstResidual))
}

fn write_residual(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::Residual))
}

fn flush(&mut self) -> Result<usize, Self::Error> {
Ok(0)
}

fn end_node(&mut self, _node: usize) -> Result<usize, Self::Error> {
Ok(0)
}
}
}
65 changes: 65 additions & 0 deletions src/bvgraph/estimators/log2_estimator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
use webgraph::prelude::Encode;
use std::convert::Infallible;
use crate::bvgraph::BVGraphComponent;

/// An estimator that simply returns the cost of each symbol calculated as the log2 of the value plus 2.
#[derive(Clone, Default)]
pub struct Log2Estimator {}

impl Log2Estimator {
fn get_symbol_cost(&self, value: u64, _component: BVGraphComponent) -> usize {
u64::ilog2(value + 2) as usize
}
}

impl Encode for Log2Estimator {
type Error = Infallible;

fn start_node(&mut self, _node: usize) -> Result<usize, Self::Error> {
Ok(0)
}

fn write_outdegree(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::Outdegree))
}

fn write_reference_offset(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::ReferenceOffset))
}

fn write_block_count(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::BlockCount))
}

fn write_block(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::Blocks))
}

fn write_interval_count(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::IntervalCount))
}

fn write_interval_start(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::IntervalStart))
}

fn write_interval_len(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::IntervalLen))
}

fn write_first_residual(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::FirstResidual))
}

fn write_residual(&mut self, value: u64) -> Result<usize, Self::Error> {
Ok(self.get_symbol_cost(value, BVGraphComponent::Residual))
}

fn flush(&mut self) -> Result<usize, Self::Error> {
Ok(0)
}

fn end_node(&mut self, _node: usize) -> Result<usize, Self::Error> {
Ok(0)
}
}
2 changes: 2 additions & 0 deletions src/bvgraph/estimators/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod log2_estimator;
pub mod entropy_estimator;
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use webgraph::prelude::RandomAccessDecoderFactory;
use crate::ans::decoder::ANSDecoder;
use crate::ans::model4decoder::ANSModel4Decoder;
use crate::ans::Prelude;
use crate::EF;
use anyhow::Result;
use webgraph::prelude::{RandomAccessDecoderFactory, SequentialDecoderFactory};

pub struct ANSBVGraphDecoderFactory {
/// The EliasFano containing the stream pointers for each of the nodes.
Expand Down Expand Up @@ -37,7 +36,7 @@ impl ANSBVGraphDecoderFactory {
impl RandomAccessDecoderFactory for ANSBVGraphDecoderFactory {
type Decoder<'b> = ANSDecoder<'b> where Self: 'b;

fn new_decoder(&self, node: usize) -> Result<Self::Decoder<'_>> {
fn new_decoder(&self, node: usize) -> anyhow::Result<Self::Decoder<'_>> {
// nodes' phases are stored in reversed order. Thus, for example, let's
// take the last phase if we want the phase of the first node.
let pointer = self.phases.get(self.num_nodes - node - 1);
Expand All @@ -50,33 +49,4 @@ impl RandomAccessDecoderFactory for ANSBVGraphDecoderFactory {
state,
))
}
}

pub struct ANSBVGraphSeqDecoderFactory {
/// The prelude resulting from the encoding process of the graph.
prelude: Prelude,

/// The ANSModel4Decoder used by the decoder to decode the graph.
model: ANSModel4Decoder,
}

impl ANSBVGraphSeqDecoderFactory {
pub fn new(prelude: Prelude) -> Self {
Self {
model: ANSModel4Decoder::new(&prelude.tables),
prelude,
}
}
}

impl SequentialDecoderFactory for ANSBVGraphSeqDecoderFactory {
type Decoder<'b> = ANSDecoder<'b> where Self: 'b;

fn new_decoder(&self) -> Result<Self::Decoder<'_>> {
Ok(ANSDecoder::new(
&self.model,
&self.prelude.stream,
self.prelude.state,
))
}
}
}
33 changes: 33 additions & 0 deletions src/bvgraph/factories/bvgraphseq_decoder_factory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use webgraph::prelude::SequentialDecoderFactory;
use crate::ans::decoder::ANSDecoder;
use crate::ans::model4decoder::ANSModel4Decoder;
use crate::ans::Prelude;

pub struct ANSBVGraphSeqDecoderFactory {
/// The prelude resulting from the encoding process of the graph.
prelude: Prelude,

/// The ANSModel4Decoder used by the decoder to decode the graph.
model: ANSModel4Decoder,
}

impl ANSBVGraphSeqDecoderFactory {
pub fn new(prelude: Prelude) -> Self {
Self {
model: ANSModel4Decoder::new(&prelude.tables),
prelude,
}
}
}

impl SequentialDecoderFactory for ANSBVGraphSeqDecoderFactory {
type Decoder<'b> = ANSDecoder<'b> where Self: 'b;

fn new_decoder(&self) -> anyhow::Result<Self::Decoder<'_>> {
Ok(ANSDecoder::new(
&self.model,
&self.prelude.stream,
self.prelude.state,
))
}
}
2 changes: 2 additions & 0 deletions src/bvgraph/factories/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod bvgraphseq_decoder_factory;
pub mod bvgraph_decoder_factory;
20 changes: 10 additions & 10 deletions src/bvgraph/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use std::fmt::Display;

pub mod mock_writers;
pub mod random_access;
pub mod reader;
pub mod factories;
pub mod sequential;
pub mod writer;
pub mod estimators;
pub mod writers;

/// An enumeration of the components getting a different model in the Rust
/// implementation of the BV format.
/// An enumeration of the components composing the BVGraph format.
#[derive(Clone, Copy, Debug)]
pub enum BVGraphComponent {
Outdegree,
Expand All @@ -21,6 +20,12 @@ pub enum BVGraphComponent {
Residual,
}

impl BVGraphComponent {

/// The number of components in the BVGraph format.
pub const COMPONENTS: usize = 9;
}

impl Display for BVGraphComponent {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand Down Expand Up @@ -53,8 +58,3 @@ impl From<usize> for BVGraphComponent {
}
}
}

impl BVGraphComponent {
/// The number of components in the BVGraph format.
pub const COMPONENTS: usize = 9;
}
8 changes: 5 additions & 3 deletions src/bvgraph/random_access.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::ans::{ANSCompressorPhase, Prelude};
use crate::bvgraph::mock_writers::{EntropyEstimator, Log2Estimator};
use crate::bvgraph::reader::ANSBVGraphDecoderFactory;
use crate::bvgraph::writer::{ANSBVGraphEncodeAndEstimate, BVGraphModelBuilder};
use crate::bvgraph::estimators::entropy_estimator::EntropyEstimator;
use crate::bvgraph::factories::bvgraph_decoder_factory::ANSBVGraphDecoderFactory;
use crate::bvgraph::writers::bvgraph_encoder::ANSBVGraphEncodeAndEstimate;
use crate::{State, EF};
use anyhow::{Context, Result};
use dsi_bitstream::prelude::BE;
Expand All @@ -14,6 +14,8 @@ use std::io::BufWriter;
use std::path::PathBuf;
use sux::dict::EliasFanoBuilder;
use webgraph::prelude::{BvComp, BvGraph, BvGraphSeq, SequentialLabeling};
use crate::bvgraph::estimators::log2_estimator::Log2Estimator;
use crate::bvgraph::writers::bvgraph_model_builder::BVGraphModelBuilder;

/// An ANS-encoded BVGraph that can be accessed both randomly and sequentially.
pub struct ANSBVGraph();
Expand Down
2 changes: 1 addition & 1 deletion src/bvgraph/sequential.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::ans::Prelude;
use crate::bvgraph::reader::ANSBVGraphSeqDecoderFactory;
use crate::bvgraph::factories::bvgraphseq_decoder_factory::ANSBVGraphSeqDecoderFactory;
use epserde::prelude::*;
use std::path::PathBuf;
use webgraph::prelude::BvGraphSeq;
Expand Down
Loading

0 comments on commit 60ed7c3

Please # to comment.