idsp/
hbf.rs

1//! Half-band filters and cascades
2//!
3//! Used to perform very efficient high-dynamic range rate changes by powers of two.
4//!
5//! Symmetric and anti-symmetric FIR filter prototype.
6//!
7//! # Generics
8//! * `M`: number of taps, one-sided. The filter has effectively 2*M DSP taps
9//!
10//! # Half band decimation/interpolation filters
11//!
12//! Half-band filters (rate change of 2) and cascades of HBFs are implemented in
13//! [`HbfDec`] and [`HbfInt`] etc.
14//! The half-band filter has unique properties that make it preferable in many cases:
15//!
16//! * only needs M multiplications (fused multiply accumulate) for 4*M taps
17//! * HBF decimator stores less state than a generic FIR filter
18//! * as a FIR filter has linear phase/flat group delay
19//! * very small passband ripple and excellent stopband attenuation
20//! * as a cascade of decimation/interpolation filters, the higher-rate filters
21//!   need successively fewer taps, allowing the filtering to be dominated by
22//!   only the highest rate filter with the fewest taps
23//! * In a cascade of HBF the overall latency, group delay, and impulse response
24//!   length are dominated by the lowest-rate filter which, due to its manageable transition
25//!   band width (compared to single-stage filters) can be smaller, shorter, and faster.
26//! * high dynamic range and inherent stability compared with an IIR filter
27//! * can be combined with a CIC filter for non-power-of-two or even higher rate changes
28//!
29//! The implementations here are all `no_std` and `no-alloc`.
30//! They support (but don't require) in-place filtering to reduce memory usage.
31//! They unroll and optimize extremely well targetting current architectures,
32//! e.g. requiring less than 4 instructions per input item for the full `HbfDecCascade` on Skylake.
33//! The filters are optimized for decent block sizes and perform best (i.e. with negligible
34//! overhead) for blocks of 32 high-rate items or more, depending very much on architecture.
35
36use core::{
37    iter::Sum,
38    ops::{Add, Mul, Sub},
39    slice::{from_mut, from_ref},
40};
41
42use dsp_process::{ChunkIn, ChunkOut, Major, SplitInplace, SplitProcess};
43
44/// Perform the FIR convolution and yield results iteratively.
45#[inline]
46fn get<C: Copy, T, const M: usize, const ODD: bool, const SYM: bool>(
47    c: &[C; M],
48    x: &[T],
49) -> impl Iterator<Item = T>
50where
51    T: Copy + Add<Output = T> + Sub<Output = T> + Mul<C, Output = T> + Sum,
52{
53    // https://doc.rust-lang.org/std/primitive.slice.html#method.array_windows
54    x.windows(2 * M + ODD as usize).map(|x| {
55        let Some((old, new)) = x.first_chunk::<M>().zip(x.last_chunk::<M>()) else {
56            unreachable!()
57        };
58        // Taps from small (large distance from center) to large (center taps)
59        // to reduce FP cancellation a bit
60        let xc = old
61            .iter()
62            .zip(new.iter().rev())
63            .zip(c.iter())
64            .map(|((old, new), tap)| (if SYM { *new + *old } else { *new - *old }) * *tap)
65            .sum();
66        if ODD && SYM { xc + x[M] } else { xc }
67    })
68}
69
70macro_rules! type_fir {
71    ($name:ident, $odd:literal, $sym:literal) => {
72        #[doc = concat!("Linear phase FIR taps for odd = ", stringify!($odd), " and symmetric = ", stringify!($sym))]
73        #[derive(Clone, Copy, Debug, Default)]
74        #[repr(transparent)]
75        pub struct $name<C>(pub C);
76        impl<T, const M: usize> $name<[T; M]> {
77            /// Response length/number of taps minus one
78            pub const LEN: usize = 2 * M - 1 + $odd as usize;
79
80            #[allow(unused)]
81            const fn len(&self) -> usize {
82                Self::LEN
83            }
84        }
85
86        impl<
87            C: Copy,
88            T: Copy + Default + Sub<T, Output = T> + Add<Output = T> + Mul<C, Output = T> + Sum,
89            const M: usize,
90            const N: usize,
91        > SplitProcess<T, T, [T; N]> for $name<[C; M]>
92        {
93            fn block(&self, state: &mut [T; N], x: &[T], y: &mut [T]) {
94                for (x, y) in x.chunks(N - Self::LEN).zip(y.chunks_mut(N - Self::LEN)) {
95                    state[Self::LEN..][..x.len()].copy_from_slice(x);
96                    for (y, x) in y.iter_mut().zip(get::<_, _, _, $odd, $sym>(&self.0, state)) {
97                        *y = x;
98                    }
99                    state.copy_within(x.len()..x.len() + Self::LEN, 0);
100                }
101            }
102
103            fn process(&self, state: &mut [T; N], x: T) -> T {
104                let mut y = T::default();
105                self.block(state, from_ref(&x), from_mut(&mut y));
106                y
107            }
108        }
109
110        impl<
111            C: Copy,
112            T: Copy + Default + Sub<T, Output = T> + Add<Output = T> + Mul<C, Output = T> + Sum,
113            const M: usize,
114            const N: usize,
115        > SplitInplace<T, [T; N]> for $name<[C; M]>
116        {
117            fn inplace(&self, state: &mut [T; N], xy: &mut [T]) {
118                for xy in xy.chunks_mut(N - Self::LEN) {
119                    state[Self::LEN..][..xy.len()].copy_from_slice(xy);
120                    for (y, x) in xy.iter_mut().zip(get::<_, _, _, $odd, $sym>(&self.0, state)) {
121                        *y = x;
122                    }
123                    state.copy_within(xy.len()..xy.len() + Self::LEN, 0);
124                }
125            }
126        }
127    };
128}
129// Type 1 taps
130// Center tap is unity
131type_fir!(OddSymmetric, true, true);
132// Type 2 taps
133type_fir!(EvenSymmetric, false, true);
134// Type 3 taps
135// Center tap is zero
136type_fir!(OddAntiSymmetric, true, false);
137// Type 4 taps
138type_fir!(EvenAntiSymmetric, false, false);
139
140/// Half band decimator (decimate by two) state
141#[derive(Clone, Debug)]
142pub struct HbfDec<T> {
143    even: T, // at least N - M len
144    odd: T,  // N > 2*M - 1 (=EvenSymmetric::LEN)
145}
146
147impl<T: Copy + Default, const N: usize> Default for HbfDec<[T; N]> {
148    fn default() -> Self {
149        Self {
150            even: [T::default(); _],
151            odd: [T::default(); _],
152        }
153    }
154}
155
156impl<
157    C: Copy,
158    T: Copy + Default + Add<Output = T> + Sub<Output = T> + Mul<C, Output = T> + Sum,
159    const M: usize,
160    const N: usize,
161> SplitProcess<[T; 2], T, HbfDec<[T; N]>> for EvenSymmetric<[C; M]>
162{
163    fn block(&self, state: &mut HbfDec<[T; N]>, x: &[[T; 2]], y: &mut [T]) {
164        debug_assert_eq!(x.len(), y.len());
165        const { assert!(N > Self::LEN) }
166        for (x, y) in x.chunks(N - Self::LEN).zip(y.chunks_mut(N - Self::LEN)) {
167            // load input
168            for (x, (even, odd)) in x.iter().zip(
169                state.even[M - 1..]
170                    .iter_mut()
171                    .zip(state.odd[Self::LEN..].iter_mut()),
172            ) {
173                *even = x[0];
174                *odd = x[1];
175            }
176            // compute output
177            let odd = get::<_, _, _, false, true>(&self.0, &state.odd);
178            for (y, (odd, even)) in y.iter_mut().zip(odd.zip(state.even.iter().copied())) {
179                *y = odd + even;
180            }
181            // keep state
182            state.even.copy_within(x.len()..x.len() + M - 1, 0);
183            state.odd.copy_within(x.len()..x.len() + Self::LEN, 0);
184        }
185    }
186
187    fn process(&self, state: &mut HbfDec<[T; N]>, x: [T; 2]) -> T {
188        let mut y = Default::default();
189        self.block(state, from_ref(&x), from_mut(&mut y));
190        y
191    }
192}
193
194/// Half band interpolator (interpolation rate 2) state
195#[derive(Clone, Debug)]
196pub struct HbfInt<T> {
197    x: T, // len N > EvenSymmetric::LEN
198}
199
200impl<T: Default + Copy, const N: usize> Default for HbfInt<[T; N]> {
201    fn default() -> Self {
202        Self {
203            x: [T::default(); _],
204        }
205    }
206}
207
208impl<
209    C: Copy,
210    T: Copy + Default + Add<Output = T> + Sub<Output = T> + Mul<C, Output = T> + Sum,
211    const M: usize,
212    const N: usize,
213> SplitProcess<T, [T; 2], HbfInt<[T; N]>> for EvenSymmetric<[C; M]>
214{
215    fn block(&self, state: &mut HbfInt<[T; N]>, x: &[T], y: &mut [[T; 2]]) {
216        debug_assert_eq!(x.len(), y.len());
217        const { assert!(N > Self::LEN) }
218        for (x, y) in x.chunks(N - Self::LEN).zip(y.chunks_mut(N - Self::LEN)) {
219            // load input
220            state.x[Self::LEN..][..x.len()].copy_from_slice(x);
221            // compute output
222            let odd = get::<_, _, _, false, true>(&self.0, &state.x);
223            for (y, (even, odd)) in y.iter_mut().zip(odd.zip(state.x[M..].iter().copied())) {
224                *y = [even, odd]; // interpolated, center tap: identity
225            }
226            // keep state
227            state.x.copy_within(x.len()..x.len() + Self::LEN, 0);
228        }
229    }
230
231    fn process(&self, state: &mut HbfInt<[T; N]>, x: T) -> [T; 2] {
232        let mut y = Default::default();
233        self.block(state, from_ref(&x), from_mut(&mut y));
234        y
235    }
236}
237
238/// Half band filter cascade taps for a 98 dB filter
239type HbfTaps98 = (
240    EvenSymmetric<[f32; 15]>,
241    EvenSymmetric<[f32; 6]>,
242    EvenSymmetric<[f32; 3]>,
243    EvenSymmetric<[f32; 3]>,
244    EvenSymmetric<[f32; 2]>,
245);
246
247/// Half band filter cascade taps
248///
249/// * obtained with `signal.remez(2*n, bands=(0, .4, .5, .5), desired=(1, 0), fs=1, grid_density=1<<10)[:n]`
250/// * more than 98 dB stop band attenuation (>16 bit)
251/// * 0.4 pass band (relative to lowest sample rate)
252/// * less than 0.001 dB ripple
253/// * linear phase/flat group delay
254/// * rate change up to 2**5 = 32
255/// * lowest rate filter is at 0 index
256/// * use taps 0..n for 2**n interpolation/decimation
257#[allow(clippy::excessive_precision)]
258pub const HBF_TAPS_98: HbfTaps98 = (
259    // n=15 coefficients (effective number of DSP taps 4*15-1 = 59), transition band width df=.2 fs
260    EvenSymmetric([
261        7.02144012e-05,
262        -2.43279582e-04,
263        6.35026936e-04,
264        -1.39782541e-03,
265        2.74613582e-03,
266        -4.96403839e-03,
267        8.41806912e-03,
268        -1.35827601e-02,
269        2.11004053e-02,
270        -3.19267647e-02,
271        4.77024289e-02,
272        -7.18014345e-02,
273        1.12942004e-01,
274        -2.03279594e-01,
275        6.33592923e-01,
276    ]),
277    // 6, .47
278    EvenSymmetric([
279        -0.00086943,
280        0.00577837,
281        -0.02201674,
282        0.06357869,
283        -0.16627679,
284        0.61979312,
285    ]),
286    // 3, .754
287    EvenSymmetric([0.01414651, -0.10439639, 0.59026742]),
288    // 3, .877
289    EvenSymmetric([0.01227974, -0.09930782, 0.58702834]),
290    // 2, .94
291    EvenSymmetric([-0.06291796, 0.5629161]),
292);
293
294/// Half band filter cascade taps
295type HbfTaps = (
296    EvenSymmetric<[f32; 23]>,
297    EvenSymmetric<[f32; 10]>,
298    EvenSymmetric<[f32; 5]>,
299    EvenSymmetric<[f32; 4]>,
300    EvenSymmetric<[f32; 3]>,
301);
302
303/// Half band filters taps
304///
305/// * 140 dB stopband, 0.2 µB passband ripple, limited by f32 dynamic range
306/// * otherwise like [`HBF_TAPS_98`].
307#[allow(clippy::excessive_precision)]
308pub const HBF_TAPS: HbfTaps = (
309    EvenSymmetric([
310        7.60375795e-07,
311        -3.77494111e-06,
312        1.26458559e-05,
313        -3.43188253e-05,
314        8.10687478e-05,
315        -1.72971467e-04,
316        3.40845059e-04,
317        -6.29522864e-04,
318        1.10128831e-03,
319        -1.83933299e-03,
320        2.95124926e-03,
321        -4.57290964e-03,
322        6.87374176e-03,
323        -1.00656257e-02,
324        1.44199840e-02,
325        -2.03025100e-02,
326        2.82462332e-02,
327        -3.91128509e-02,
328        5.44795658e-02,
329        -7.77002672e-02,
330        1.17523452e-01,
331        -2.06185388e-01,
332        6.34588695e-01,
333    ]),
334    EvenSymmetric([
335        -1.12811343e-05,
336        1.12724671e-04,
337        -6.07439343e-04,
338        2.31904511e-03,
339        -7.00322950e-03,
340        1.78225473e-02,
341        -4.01209836e-02,
342        8.43315989e-02,
343        -1.83189521e-01,
344        6.26346521e-01,
345    ]),
346    EvenSymmetric([0.0007686, -0.00768669, 0.0386536, -0.14002434, 0.60828885]),
347    EvenSymmetric([-0.00261331, 0.02476858, -0.12112638, 0.59897111]),
348    EvenSymmetric([0.01186105, -0.09808109, 0.58622005]),
349);
350
351/// Passband width in units of lowest sample rate
352pub const HBF_PASSBAND: f32 = 0.4;
353
354/// Cascade block size
355///
356/// Heuristically performs well.
357pub const HBF_CASCADE_BLOCK: usize = 1 << 5;
358
359/// Half-band decimation filter state
360///
361/// See [HBF_TAPS] and [HBF_DEC_CASCADE].
362/// Supports rate changes are power of two up to 32.
363pub type HbfDec2 = HbfDec<[f32; HBF_TAPS.0.len() + HBF_CASCADE_BLOCK]>;
364/// HBF Decimate-by-4 cascade state
365pub type HbfDec4 = (
366    HbfDec<[f32; HBF_TAPS.1.len() + (HBF_CASCADE_BLOCK << 1)]>,
367    HbfDec2,
368);
369/// HBF Decimate-by-8 cascade state
370pub type HbfDec8 = (
371    HbfDec<[f32; HBF_TAPS.2.len() + (HBF_CASCADE_BLOCK << 2)]>,
372    HbfDec4,
373);
374/// HBF Decimate-by-16 cascade state
375pub type HbfDec16 = (
376    HbfDec<[f32; HBF_TAPS.3.len() + (HBF_CASCADE_BLOCK << 3)]>,
377    HbfDec8,
378);
379/// HBF Decimate-by-32 cascade state
380pub type HbfDec32 = (
381    HbfDec<[f32; HBF_TAPS.4.len() + (HBF_CASCADE_BLOCK << 4)]>,
382    HbfDec16,
383);
384
385type HbfDecCascade<const B: usize = HBF_CASCADE_BLOCK> = Major<
386    (
387        ChunkIn<&'static EvenSymmetric<[f32; HBF_TAPS.4.0.len()]>, 2>,
388        Major<
389            (
390                ChunkIn<&'static EvenSymmetric<[f32; HBF_TAPS.3.0.len()]>, 2>,
391                Major<
392                    (
393                        ChunkIn<&'static EvenSymmetric<[f32; HBF_TAPS.2.0.len()]>, 2>,
394                        Major<
395                            (
396                                ChunkIn<&'static EvenSymmetric<[f32; HBF_TAPS.1.0.len()]>, 2>,
397                                &'static EvenSymmetric<[f32; HBF_TAPS.0.0.len()]>,
398                            ),
399                            [[f32; 2]; B],
400                        >,
401                    ),
402                    [[f32; 4]; B],
403                >,
404            ),
405            [[f32; 8]; B],
406        >,
407    ),
408    [[f32; 16]; B],
409>;
410
411/// HBF decimation cascade
412pub const HBF_DEC_CASCADE: HbfDecCascade = Major::new((
413    ChunkIn(&HBF_TAPS.4),
414    Major::new((
415        ChunkIn(&HBF_TAPS.3),
416        Major::new((
417            ChunkIn(&HBF_TAPS.2),
418            Major::new((ChunkIn(&HBF_TAPS.1), &HBF_TAPS.0)),
419        )),
420    )),
421));
422
423/// Response length, effective number of taps
424pub const fn hbf_dec_response_length(depth: usize) -> usize {
425    assert!(depth <= 5);
426    let mut n = 0;
427    if depth > 4 {
428        n /= 2;
429        n += HBF_TAPS.4.len();
430    }
431    if depth > 3 {
432        n /= 2;
433        n += HBF_TAPS.3.len();
434    }
435    if depth > 2 {
436        n /= 2;
437        n += HBF_TAPS.2.len();
438    }
439    if depth > 1 {
440        n /= 2;
441        n += HBF_TAPS.1.len();
442    }
443    if depth > 0 {
444        n /= 2;
445        n += HBF_TAPS.0.len();
446    }
447    n
448}
449
450/// Half-band interpolation filter state
451///
452/// See [HBF_TAPS] and [HBF_INT_CASCADE].
453/// Supports rate changes are power of two up to 32.
454pub type HbfInt2 = HbfInt<[f32; HBF_TAPS.0.len() + HBF_CASCADE_BLOCK]>;
455/// HBF interpolate-by-4 cascade state
456pub type HbfInt4 = (
457    HbfInt2,
458    HbfInt<[f32; HBF_TAPS.1.len() + (HBF_CASCADE_BLOCK << 1)]>,
459);
460/// HBF interpolate-by-8 cascade state
461pub type HbfInt8 = (
462    HbfInt4,
463    HbfInt<[f32; HBF_TAPS.2.len() + (HBF_CASCADE_BLOCK << 2)]>,
464);
465/// HBF interpolate-by-16 cascade state
466pub type HbfInt16 = (
467    HbfInt8,
468    HbfInt<[f32; HBF_TAPS.3.len() + (HBF_CASCADE_BLOCK << 3)]>,
469);
470/// HBF interpolate-by-32 cascade state
471pub type HbfInt32 = (
472    HbfInt16,
473    HbfInt<[f32; HBF_TAPS.4.len() + (HBF_CASCADE_BLOCK << 4)]>,
474);
475
476type HbfIntCascade<const B: usize = HBF_CASCADE_BLOCK> = Major<
477    (
478        Major<
479            (
480                Major<
481                    (
482                        Major<
483                            (
484                                &'static EvenSymmetric<[f32; HBF_TAPS.0.0.len()]>,
485                                ChunkOut<&'static EvenSymmetric<[f32; HBF_TAPS.1.0.len()]>, 2>,
486                            ),
487                            [[f32; 2]; B],
488                        >,
489                        ChunkOut<&'static EvenSymmetric<[f32; HBF_TAPS.2.0.len()]>, 2>,
490                    ),
491                    [[f32; 4]; B],
492                >,
493                ChunkOut<&'static EvenSymmetric<[f32; HBF_TAPS.3.0.len()]>, 2>,
494            ),
495            [[f32; 8]; B],
496        >,
497        ChunkOut<&'static EvenSymmetric<[f32; HBF_TAPS.4.0.len()]>, 2>,
498    ),
499    [[f32; 16]; B],
500>;
501
502/// HBF interpolation cascade
503pub const HBF_INT_CASCADE: HbfIntCascade = Major::new((
504    Major::new((
505        Major::new((
506            Major::new((&HBF_TAPS.0, ChunkOut(&HBF_TAPS.1))),
507            ChunkOut(&HBF_TAPS.2),
508        )),
509        ChunkOut(&HBF_TAPS.3),
510    )),
511    ChunkOut(&HBF_TAPS.4),
512));
513
514/// Response length, effective number of taps
515pub const fn hbf_int_response_length(depth: usize) -> usize {
516    assert!(depth <= 5);
517    let mut n = 0;
518    if depth > 0 {
519        n += HBF_TAPS.0.len();
520        n *= 2;
521    }
522    if depth > 1 {
523        n += HBF_TAPS.1.len();
524        n *= 2;
525    }
526    if depth > 2 {
527        n += HBF_TAPS.2.len();
528        n *= 2;
529    }
530    if depth > 3 {
531        n += HBF_TAPS.3.len();
532        n *= 2;
533    }
534    if depth > 4 {
535        n += HBF_TAPS.4.len();
536        n *= 2;
537    }
538    n
539}
540
541#[cfg(test)]
542mod test {
543    use super::*;
544    use dsp_process::{Process, Split};
545    use rustfft::{FftPlanner, num_complex::Complex};
546
547    #[test]
548    fn test() {
549        let mut h = Split::new(EvenSymmetric([0.5]), HbfDec::<[_; 5]>::default());
550        h.block(&[], &mut []);
551
552        let x = [1.0; 8];
553        let mut y = [0.0; 4];
554        h.block(x.as_chunks().0, &mut y);
555        assert_eq!(y, [1.5, 2.0, 2.0, 2.0]);
556
557        let mut h = Split::new(&HBF_TAPS.3, HbfDec::<[_; 11]>::default());
558        let x: Vec<_> = (0..8).map(|i| i as f32).collect();
559        h.block(x.as_chunks().0, &mut y);
560        println!("{:?}", y);
561    }
562
563    #[test]
564    fn decim() {
565        let mut h = HbfDec16::default();
566        const R: usize = 1 << 4;
567        let mut y = vec![0.0; 2];
568        let x: Vec<_> = (0..y.len() * R).map(|i| i as f32).collect();
569        HBF_DEC_CASCADE
570            .inner
571            .1
572            .block(&mut h, x.as_chunks::<R>().0, &mut y);
573        println!("{:?}", y);
574    }
575
576    #[test]
577    fn response_length_dec() {
578        let mut h = HbfDec16::default();
579        const R: usize = 4;
580        let mut y = [0.0; 100];
581        let x: Vec<f32> = (0..y.len() << R).map(|_| rand::random()).collect();
582        HBF_DEC_CASCADE
583            .inner
584            .1
585            .block(&mut h, x.as_chunks::<{ 1 << R }>().0, &mut y);
586        let x = vec![0.0; 1 << 10];
587        HBF_DEC_CASCADE.inner.1.block(
588            &mut h,
589            x.as_chunks::<{ 1 << R }>().0,
590            &mut y[..x.len() >> R],
591        );
592        let n = hbf_dec_response_length(R);
593        assert!(y[n - 1] != 0.0);
594        assert_eq!(y[n], 0.0);
595    }
596
597    #[test]
598    fn interp() {
599        let mut h = HbfInt16::default();
600        const R: usize = 4;
601        let r = hbf_int_response_length(R);
602        let mut x = vec![0.0; (r >> R) + 1];
603        x[0] = 1.0;
604        let mut y = vec![[0.0; 1 << R]; x.len()];
605        HBF_INT_CASCADE.inner.0.block(&mut h, &x, &mut y);
606        println!("{:?}", y); // interpolator impulse response
607        let y = y.as_flattened();
608        assert_ne!(y[r], 0.0);
609        assert_eq!(y[r + 1..], vec![0.0; y.len() - r - 1]);
610
611        let mut y: Vec<_> = y
612            .iter()
613            .map(|&x| Complex {
614                re: x as f64 / (1 << R) as f64,
615                im: 0.0,
616            })
617            .collect();
618        // pad
619        y.resize(5 << 10, Default::default());
620        FftPlanner::new().plan_fft_forward(y.len()).process(&mut y);
621        // transfer function
622        let p: Vec<_> = y.iter().map(|y| 10.0 * y.norm_sqr().log10()).collect();
623        let f = p.len() as f32 / (1 << R) as f32;
624        // pass band ripple
625        let p_pass = p[..(f * HBF_PASSBAND).floor() as _]
626            .iter()
627            .fold(0.0, |m, p| p.abs().max(m));
628        assert!(p_pass < 1e-6, "{p_pass}");
629        // stop band attenuation
630        let p_stop = p[(f * (1.0 - HBF_PASSBAND)).ceil() as _..p.len() / 2]
631            .iter()
632            .fold(-200.0, |m, p| p.max(m));
633        assert!(p_stop < -141.5, "{p_stop}");
634    }
635
636    /// small 32 block size, single stage, 3 mul (11 tap) decimator
637    /// 2.5 cycles per input item, > 2 GS/s per core on Skylake
638    #[test]
639    #[ignore]
640    fn insn_dec() {
641        const N: usize = HBF_TAPS.4.0.len();
642        assert_eq!(N, 3);
643        const M: usize = 1 << 4;
644        let mut h = HbfDec::<[_; EvenSymmetric::<[f32; N]>::LEN + M]>::default();
645        let mut x = [[9.0; 2]; M];
646        let mut y = [0.0; M];
647        for _ in 0..1 << 25 {
648            HBF_TAPS.4.block(&mut h, &x, &mut y);
649            x[13][1] = y[11]; // prevent the entire loop from being optimized away
650        }
651    }
652
653    /// 1k block size, single stage, 23 mul (91 tap) decimator
654    /// 2.6 cycles: > 1 GS/s
655    #[test]
656    #[ignore]
657    fn insn_dec2() {
658        const N: usize = HBF_TAPS.0.0.len();
659        assert_eq!(N, 23);
660        const M: usize = 1 << 9;
661        let mut h = HbfDec::<[_; EvenSymmetric::<[f32; N]>::LEN + M]>::default();
662        let mut x = [[9.0; 2]; M];
663        let mut y = [0.0; M];
664        for _ in 0..1 << 20 {
665            HBF_TAPS.0.block(&mut h, &x, &mut y);
666            x[33][1] = y[11]; // prevent the entire loop from being optimized away
667        }
668    }
669
670    /// full block size full decimator cascade (depth 4, 1024 items per input block)
671    /// 1.8 cycles: > 2 GS/s
672    #[test]
673    #[ignore]
674    fn insn_casc() {
675        let mut h = HbfDec16::default();
676        const R: usize = 4;
677        let mut x = [[9.0; 1 << R]; 1 << 6];
678        let mut y = [0.0; 1 << 6];
679        for _ in 0..1 << 20 {
680            HBF_DEC_CASCADE.inner.1.block(&mut h, &x, &mut y);
681            x[33][1] = y[11]; // prevent the entire loop from being optimized away
682        }
683    }
684
685    // // sdr crate, setup like insn_dec2()
686    // // 187 insn
687    // #[test]
688    // #[ignore]
689    // fn insn_sdr() {
690    //     use sdr::fir;
691    //     const N: usize = HBF_TAPS.0.len();
692    //     const M: usize = 1 << 10;
693    //     let mut taps = [0.0f64; { 4 * N - 1 }];
694    //     let (old, new) = taps.split_at_mut(2 * N - 1);
695    //     for (tap, (old, new)) in HBF_TAPS.0.iter().zip(
696    //         old.iter_mut()
697    //             .step_by(2)
698    //             .zip(new.iter_mut().rev().step_by(2)),
699    //     ) {
700    //         *old = (*tap * 0.5).into();
701    //         *new = *old;
702    //     }
703    //     taps[2 * N - 1] = 0.5;
704    //     let mut h = fir::FIR::new(&taps, 2, 1);
705    //     let x = [9.0; M];
706    //     // let mut h1 = HbfDec::<N, { 2 * N - 1 + M }>::new(&HBF_TAPS.0);
707    //     // let mut y1 = [0.0; M / 2];
708    //     for _ in 0..1 << 16 {
709    //         let _y = h.process(&x);
710    //         // h1.process_block(Some(&x), &mut y1);
711    //         // assert_eq!(y1.len(), y.len());
712    //         // assert!(y1.iter().zip(y.iter()).all(|(y1, y)| (y1 - y).abs() < 1e-6));
713    //     }
714    // }
715
716    // // // futuredsp crate, setup like insn_dec2()
717    // // // 315 insn
718    // #[test]
719    // #[ignore]
720    // fn insn_futuredsp() {
721    //     use futuredsp::{fir::PolyphaseResamplingFirKernel, UnaryKernel};
722    //     const N: usize = HBF_TAPS.0.len();
723    //     const M: usize = 1 << 10;
724    //     let mut taps = [0.0f32; { 4 * N - 1 }];
725    //     let (old, new) = taps.split_at_mut(2 * N - 1);
726    //     for (tap, (old, new)) in HBF_TAPS.0.iter().zip(
727    //         old.iter_mut()
728    //             .step_by(2)
729    //             .zip(new.iter_mut().rev().step_by(2)),
730    //     ) {
731    //         *old = *tap * 0.5;
732    //         *new = *old;
733    //     }
734    //     taps[2 * N - 1] = 0.5;
735    //     let x = [9.0f32; M];
736    //     let mut y = [0.0f32; M];
737    //     let fir = PolyphaseResamplingFirKernel::<_, _, _, _>::new(1, 2, taps);
738    //     for _ in 0..1 << 14 {
739    //         fir.work(&x, &mut y);
740    //     }
741    // }
742}