From f6698d6c14e7d2a39fe50b2539617bd9faa03066 Mon Sep 17 00:00:00 2001 From: Roderick van Domburg Date: Fri, 3 Jan 2025 11:40:00 +0100 Subject: [PATCH 01/18] Switch decoder output from i16 to f32 samples This change improves audio quality in two ways: 1. Having decoders output `f32` samples directly instead of converting to `i16` 2. Properly handling high bit-depth FLAC and WAV files (20/24/32-bit) without downsampling Benefits include: - Eliminates unnecessary quantization error from `i16` conversion - Better matches the rodio pipeline which already uses `f32` internally - Preserves greater-than-i16 precision from formats like WAV and FLAC - More natural for decoders like Vorbis/MP3 that operate in floating point Breaking change - decoder output type changes from `i16` to `f32`. Changes: - Updated decoders to output `f32` - Updated `wav_test.rs` and `flac_test.rs` assertions to check for non-zero `f32` samples - Updated benchmarks to use `f32` samples and removed unnecessary `f32` conversions Fixes RustAudio/rodio#364 --- CHANGELOG.md | 1 + benches/conversions.rs | 2 +- benches/resampler.rs | 8 ++-- benches/shared.rs | 2 +- src/decoder/flac.rs | 21 ++++++---- src/decoder/mod.rs | 13 +++--- src/decoder/mp3.rs | 7 ++-- src/decoder/symphonia.rs | 12 +++--- src/decoder/vorbis.rs | 38 +++++++++++------ src/decoder/wav.rs | 89 +++++++++++++++------------------------- tests/flac_test.rs | 4 +- tests/mp4a_test.rs | 2 +- tests/seek.rs | 4 +- tests/wav_test.rs | 12 +++--- 14 files changed, 109 insertions(+), 106 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea4de731..4fcdf718 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Breaking: `Sink::try_new` renamed to `connect_new` and does not return error anymore. `Sink::new_idle` was renamed to `new`. - Breaking: In the `Source` trait, the method `current_frame_len()` was renamed to `current_span_len()`. +- Breaking: `Decoder` now outputs `f32` samples instead of `i16`. - The term 'frame' was renamed to 'span' in the crate and documentation. ### Fixed diff --git a/benches/conversions.rs b/benches/conversions.rs index 9bebb974..1610191b 100644 --- a/benches/conversions.rs +++ b/benches/conversions.rs @@ -10,7 +10,7 @@ fn main() { } #[divan::bench(types = [i16, u16, f32])] -fn from_i16_to>(bencher: Bencher) { +fn from_f32_to>(bencher: Bencher) { bencher .with_inputs(|| TestSource::music_wav()) .bench_values(|source| { diff --git a/benches/resampler.rs b/benches/resampler.rs index 22ddbf15..7d4609fa 100644 --- a/benches/resampler.rs +++ b/benches/resampler.rs @@ -13,11 +13,11 @@ fn main() { fn no_resampling(bencher: Bencher) { bencher .with_inputs(|| { - let source = TestSource::::music_wav(); + let source = TestSource::::music_wav(); (source.channels(), source.sample_rate(), source) }) .bench_values(|(channels, sample_rate, source)| { - UniformSourceIterator::<_, i16>::new(source, channels, sample_rate) + UniformSourceIterator::<_, f32>::new(source, channels, sample_rate) .for_each(divan::black_box_drop) }) } @@ -32,11 +32,11 @@ const COMMON_SAMPLE_RATES: [u32; 12] = [ fn resample_to(bencher: Bencher, target_sample_rate: u32) { bencher .with_inputs(|| { - let source = TestSource::::music_wav(); + let source = TestSource::::music_wav(); (source.channels(), source) }) .bench_values(|(channels, source)| { - UniformSourceIterator::<_, i16>::new(source, channels, target_sample_rate) + UniformSourceIterator::<_, f32>::new(source, channels, target_sample_rate) .for_each(divan::black_box_drop) }) } diff --git a/benches/shared.rs b/benches/shared.rs index 8cab969d..a99e9ad9 100644 --- a/benches/shared.rs +++ b/benches/shared.rs @@ -43,7 +43,7 @@ impl Source for TestSource { } } -impl TestSource { +impl TestSource { pub fn music_wav() -> Self { let data = include_bytes!("../assets/music.wav"); let cursor = Cursor::new(data); diff --git a/src/decoder/flac.rs b/src/decoder/flac.rs index 9773f9d8..83ab235a 100644 --- a/src/decoder/flac.rs +++ b/src/decoder/flac.rs @@ -1,4 +1,3 @@ -use std::cmp::Ordering; use std::io::{Read, Seek, SeekFrom}; use std::mem; use std::time::Duration; @@ -7,7 +6,11 @@ use crate::source::SeekError; use crate::Source; use crate::common::{ChannelCount, SampleRate}; + use claxon::FlacReader; +use cpal::Sample; + +use super::DecoderFormat; /// Decoder for the Flac format. pub struct FlacDecoder @@ -94,10 +97,10 @@ impl Iterator for FlacDecoder where R: Read + Seek, { - type Item = i16; + type Item = DecoderFormat; #[inline] - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { loop { if self.current_block_off < self.current_block.len() { // Read from current block. @@ -106,10 +109,14 @@ where + self.current_block_off / self.channels as usize; let raw_val = self.current_block[real_offset]; self.current_block_off += 1; - let real_val = match self.bits_per_sample.cmp(&16) { - Ordering::Less => (raw_val << (16 - self.bits_per_sample)) as i16, - Ordering::Equal => raw_val as i16, - Ordering::Greater => (raw_val >> (self.bits_per_sample - 16)) as i16, + let real_val = match self.bits_per_sample { + 8 => (raw_val as i8).to_sample::(), + 12 => (raw_val << 20).to_sample::(), + 16 => (raw_val as i16).to_sample::(), + 20 => (raw_val << 12).to_sample::(), + 24 => (raw_val << 8).to_sample::(), + 32 => raw_val.to_sample::(), + _ => panic!("Unimplemented flac spec: {} bits", self.bits_per_sample), }; return Some(real_val); } diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index ad1e61d7..74dbb7eb 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -31,6 +31,9 @@ mod vorbis; #[cfg(all(feature = "wav", not(feature = "symphonia-wav")))] mod wav; +/// Output format of the decoders. +pub type DecoderFormat = f32; + /// Source of audio samples from decoding a file. /// /// Supports MP3, WAV, Vorbis and Flac. @@ -68,7 +71,7 @@ where impl DecoderImpl { #[inline] - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { match self { #[cfg(all(feature = "wav", not(feature = "symphonia-wav")))] DecoderImpl::Wav(source) => source.next(), @@ -396,10 +399,10 @@ impl Iterator for Decoder where R: Read + Seek, { - type Item = i16; + type Item = DecoderFormat; #[inline] - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { self.0.next() } @@ -442,10 +445,10 @@ impl Iterator for LoopedDecoder where R: Read + Seek, { - type Item = i16; + type Item = DecoderFormat; #[inline] - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { if let Some(sample) = self.0.next() { Some(sample) } else { diff --git a/src/decoder/mp3.rs b/src/decoder/mp3.rs index f85605c7..99d69a63 100644 --- a/src/decoder/mp3.rs +++ b/src/decoder/mp3.rs @@ -1,6 +1,7 @@ use std::io::{Read, Seek, SeekFrom}; use std::time::Duration; +use super::DecoderFormat; use crate::source::SeekError; use crate::Source; @@ -34,7 +35,7 @@ where // .expect("should be able to allocate memory, perform IO"); // let current_span = decoder.decode_frame() let current_span = decoder.next_frame() - // the reader makes enough data available therefore + // the reader makes enough data available therefore // if we crash here the invariant broken is: .expect("data should not corrupt"); @@ -92,9 +93,9 @@ impl Iterator for Mp3Decoder where R: Read + Seek, { - type Item = i16; + type Item = DecoderFormat; - fn next(&mut self) -> Option { + fn next(&mut self) -> Option { if self.current_span_offset == self.current_span_len().unwrap() { if let Ok(span) = self.decoder.next_frame() { // if let Ok(span) = self.decoder.decode_frame() { diff --git a/src/decoder/symphonia.rs b/src/decoder/symphonia.rs index c466e6ee..9b010d11 100644 --- a/src/decoder/symphonia.rs +++ b/src/decoder/symphonia.rs @@ -14,7 +14,7 @@ use symphonia::{ default::get_probe, }; -use super::DecoderError; +use super::{DecoderError, DecoderFormat}; use crate::common::{ChannelCount, SampleRate}; use crate::{source, Source}; @@ -28,7 +28,7 @@ pub(crate) struct SymphoniaDecoder { current_span_offset: usize, format: Box, total_duration: Option