idsp/
hbf.rs

1//! Half-band filters and cascades
2//!
3//! Used to perform very efficient high-dynamic range rate changes by powers of two.
4
5use core::{
6    iter::Sum,
7    ops::{Add, Mul},
8};
9
10use num_traits::Zero;
11
12/// Filter input items into output items.
13pub trait Filter {
14    /// Input/output item type.
15    // TODO: impl with generic item type
16    type Item;
17
18    /// Process a block of items.
19    ///
20    /// Input items can be either in `x` or in `y`.
21    /// In the latter case the filtering operation is done in-place.
22    /// Output is always written into `y`.
23    /// The slice of items written into `y` is returned.
24    /// Input and output size relations must match the filter requirements
25    /// (decimation/interpolation and maximum block size).
26    /// When using in-place operation, `y` needs to contain the input items
27    /// (fewer than `y.len()` in the case of interpolation) and must be able to
28    /// contain the output items.
29    fn process_block<'a>(
30        &mut self,
31        x: Option<&[Self::Item]>,
32        y: &'a mut [Self::Item],
33    ) -> &'a mut [Self::Item];
34
35    /// Return the block size granularity and the maximum block size.
36    ///
37    /// For in-place processing, this refers to constraints on `y`.
38    /// Otherwise this refers to the larger of `x` and `y` (`x` for decimation and `y` for interpolation).
39    /// The granularity is also the rate change in the case of interpolation/decimation filters.
40    fn block_size(&self) -> (usize, usize);
41
42    /// Finite impulse response length in numer of output items minus one
43    /// Get this many to drain all previous memory
44    fn response_length(&self) -> usize;
45
46    // TODO: process items with automatic blocks
47    // fn process(&mut self, x: Option<&[Self::Item]>, y: &mut [Self::Item]) -> usize {}
48}
49
50/// Symmetric FIR filter prototype.
51///
52/// # Generics
53/// * `M`: number of taps, one-sided. The filter has effectively 2*M DSP taps
54/// * `N`: state size: N = 2*M - 1 + {input/output}.len()
55///
56/// # Half band decimation/interpolation filters
57///
58/// Half-band filters (rate change of 2) and cascades of HBFs are implemented in
59/// [`HbfDec`] and [`HbfInt`] etc.
60/// The half-band filter has unique properties that make it preferable in many cases:
61///
62/// * only needs M multiplications (fused multiply accumulate) for 4*M taps
63/// * HBF decimator stores less state than a generic FIR filter
64/// * as a FIR filter has linear phase/flat group delay
65/// * very small passband ripple and excellent stopband attenuation
66/// * as a cascade of decimation/interpolation filters, the higher-rate filters
67///   need successively fewer taps, allowing the filtering to be dominated by
68///   only the highest rate filter with the fewest taps
69/// * In a cascade of HBF the overall latency, group delay, and impulse response
70///   length are dominated by the lowest-rate filter which, due to its manageable transition
71///   band width (compared to single-stage filters) can be smaller, shorter, and faster.
72/// * high dynamic range and inherent stability compared with an IIR filter
73/// * can be combined with a CIC filter for non-power-of-two or even higher rate changes
74///
75/// The implementations here are all `no_std` and `no-alloc`.
76/// They support (but don't require) in-place filtering to reduce memory usage.
77/// They unroll and optimize extremely well targetting current architectures,
78/// e.g. requiring less than 4 instructions per input item for the full `HbfDecCascade` on Skylake.
79/// The filters are optimized for decent block sizes and perform best (i.e. with negligible
80/// overhead) for blocks of 32 high-rate items or more, depending very much on architecture.
81
82#[derive(Clone, Debug, Copy)]
83pub struct SymFir<'a, T, const M: usize, const N: usize> {
84    x: [T; N],
85    taps: &'a [T; M],
86}
87
88impl<'a, T: Copy + Zero + Add + Mul<Output = T> + Sum, const M: usize, const N: usize>
89    SymFir<'a, T, M, N>
90{
91    /// Create a new `SymFir`.
92    ///
93    /// # Args
94    /// * `taps`: one-sided FIR coefficients, excluding center tap, oldest to one-before-center
95    pub fn new(taps: &'a [T; M]) -> Self {
96        debug_assert!(N >= M * 2);
97        Self {
98            x: [T::zero(); N],
99            taps,
100        }
101    }
102
103    /// Obtain a mutable reference to the input items buffer space.
104    #[inline]
105    pub fn buf_mut(&mut self) -> &mut [T] {
106        &mut self.x[2 * M - 1..]
107    }
108
109    /// Perform the FIR convolution and yield results iteratively.
110    #[inline]
111    pub fn get(&self) -> impl Iterator<Item = T> + '_ {
112        self.x.windows(2 * M).map(|x| {
113            let (old, new) = x.split_at(M);
114            old.iter()
115                .zip(new.iter().rev())
116                .zip(self.taps.iter())
117                .map(|((xo, xn), tap)| (*xo + *xn) * *tap)
118                .sum()
119        })
120    }
121
122    /// Move items as new filter state.
123    ///
124    /// # Args
125    /// * `offset`: Keep the `2*M-1` items at `offset` as the new filter state.
126    #[inline]
127    pub fn keep_state(&mut self, offset: usize) {
128        self.x.copy_within(offset..offset + 2 * M - 1, 0);
129    }
130}
131
132// TODO: pub struct SymFirInt<R>, SymFirDec<R>
133
134/// Half band decimator (decimate by two)
135///
136/// The effective number of DSP taps is 4*M - 1.
137///
138/// M: number of taps
139/// N: state size: N = 2*M - 1 + output.len()
140#[derive(Clone, Debug, Copy)]
141pub struct HbfDec<'a, T, const M: usize, const N: usize> {
142    even: [T; N], // This is an upper bound to N - M (unstable const expr)
143    odd: SymFir<'a, T, M, N>,
144}
145
146impl<'a, T: Zero + Copy + Add + Mul<Output = T> + Sum, const M: usize, const N: usize>
147    HbfDec<'a, T, M, N>
148{
149    /// Create a new `HbfDec`.
150    ///
151    /// # Args
152    /// * `taps`: The FIR filter coefficients. Only the non-zero (odd) taps
153    ///   from oldest to one-before-center. Normalized such that center tap is 1.
154    pub fn new(taps: &'a [T; M]) -> Self {
155        Self {
156            even: [T::zero(); N],
157            odd: SymFir::new(taps),
158        }
159    }
160}
161
162trait Half {
163    fn half(self) -> Self;
164}
165
166macro_rules! impl_half_f {
167    ($($t:ty)+) => {$(
168        impl Half for $t {
169            fn half(self) -> Self {
170                0.5 * self
171            }
172        }
173    )+}
174}
175impl_half_f!(f32 f64);
176
177macro_rules! impl_half_i {
178    ($($t:ty)+) => {$(
179        impl Half for $t {
180            fn half(self) -> Self {
181                self >> 1
182            }
183        }
184    )+}
185}
186impl_half_i!(i8 i16 i32 i64 i128);
187
188impl<T: Copy + Zero + Add + Mul<Output = T> + Sum + Half, const M: usize, const N: usize> Filter
189    for HbfDec<'_, T, M, N>
190{
191    type Item = T;
192
193    #[inline]
194    fn block_size(&self) -> (usize, usize) {
195        (2, 2 * (N - (2 * M - 1)))
196    }
197
198    #[inline]
199    fn response_length(&self) -> usize {
200        2 * M - 1
201    }
202
203    fn process_block<'b>(
204        &mut self,
205        x: Option<&[Self::Item]>,
206        y: &'b mut [Self::Item],
207    ) -> &'b mut [Self::Item] {
208        let x = x.unwrap_or(y);
209        debug_assert_eq!(x.len() & 1, 0);
210        let k = x.len() / 2;
211        // load input
212        for (xi, (even, odd)) in x.chunks_exact(2).zip(
213            self.even[M - 1..][..k]
214                .iter_mut()
215                .zip(self.odd.buf_mut()[..k].iter_mut()),
216        ) {
217            *even = xi[0];
218            *odd = xi[1];
219        }
220        // compute output
221        for (yi, (even, odd)) in y[..k]
222            .iter_mut()
223            .zip(self.even[..k].iter().zip(self.odd.get()))
224        {
225            *yi = (*even + odd).half();
226        }
227        // keep state
228        self.even.copy_within(k..k + M - 1, 0);
229        self.odd.keep_state(k);
230        &mut y[..k]
231    }
232}
233
234/// Half band interpolator (interpolation rate 2)
235///
236/// The effective number of DSP taps is 4*M - 1.
237///
238/// M: number of taps
239/// N: state size: N = 2*M - 1 + input.len()
240#[derive(Clone, Debug, Copy)]
241pub struct HbfInt<'a, T, const M: usize, const N: usize> {
242    fir: SymFir<'a, T, M, N>,
243}
244
245impl<'a, T: Copy + Zero + Add + Mul<Output = T> + Sum, const M: usize, const N: usize>
246    HbfInt<'a, T, M, N>
247{
248    /// Non-zero (odd) taps from oldest to one-before-center.
249    /// Normalized such that center tap is 1.
250    pub fn new(taps: &'a [T; M]) -> Self {
251        Self {
252            fir: SymFir::new(taps),
253        }
254    }
255
256    /// Obtain a mutable reference to the input items buffer space
257    pub fn buf_mut(&mut self) -> &mut [T] {
258        self.fir.buf_mut()
259    }
260}
261
262impl<T: Copy + Zero + Add + Mul<Output = T> + Sum, const M: usize, const N: usize> Filter
263    for HbfInt<'_, T, M, N>
264{
265    type Item = T;
266
267    #[inline]
268    fn block_size(&self) -> (usize, usize) {
269        (2, 2 * (N - (2 * M - 1)))
270    }
271
272    #[inline]
273    fn response_length(&self) -> usize {
274        4 * M - 2
275    }
276
277    fn process_block<'b>(
278        &mut self,
279        x: Option<&[Self::Item]>,
280        y: &'b mut [Self::Item],
281    ) -> &'b mut [Self::Item] {
282        debug_assert_eq!(y.len() & 1, 0);
283        let k = y.len() / 2;
284        let x = x.unwrap_or(&y[..k]);
285        // load input
286        self.fir.buf_mut()[..k].copy_from_slice(x);
287        // compute output
288        for (yi, (even, &odd)) in y
289            .chunks_exact_mut(2)
290            .zip(self.fir.get().zip(self.fir.x[M..][..k].iter()))
291        {
292            // Choose the even item to be the interpolated one.
293            // The alternative would have the same response length
294            // but larger latency.
295            yi[0] = even; // interpolated
296            yi[1] = odd; // center tap: identity
297        }
298        // keep state
299        self.fir.keep_state(k);
300        y
301    }
302}
303
304/// Standard/optimal half-band filter cascade taps
305///
306/// * obtained with `2*signal.remez(4*n - 1, bands=(0, .5-df/2, .5+df/2, 1), desired=(1, 0), fs=2, grid_density=512)[:2*n:2]`
307/// * more than 98 dB stop band attenuation (>16 bit)
308/// * 0.4 pass band (relative to lowest sample rate)
309/// * less than 0.001 dB ripple
310/// * linear phase/flat group delay
311/// * rate change up to 2**5 = 32
312/// * lowest rate filter is at 0 index
313/// * use taps 0..n for 2**n interpolation/decimation
314#[allow(clippy::excessive_precision, clippy::type_complexity)]
315pub const HBF_TAPS_98: ([f32; 15], [f32; 6], [f32; 3], [f32; 3], [f32; 2]) = (
316    // n=15 coefficients (effective number of DSP taps 4*15-1 = 59), transition band width df=.2 fs
317    [
318        7.02144012e-05,
319        -2.43279582e-04,
320        6.35026936e-04,
321        -1.39782541e-03,
322        2.74613582e-03,
323        -4.96403839e-03,
324        8.41806912e-03,
325        -1.35827601e-02,
326        2.11004053e-02,
327        -3.19267647e-02,
328        4.77024289e-02,
329        -7.18014345e-02,
330        1.12942004e-01,
331        -2.03279594e-01,
332        6.33592923e-01,
333    ],
334    // 6, .47
335    [
336        -0.00086943,
337        0.00577837,
338        -0.02201674,
339        0.06357869,
340        -0.16627679,
341        0.61979312,
342    ],
343    // 3, .754
344    [0.01414651, -0.10439639, 0.59026742],
345    // 3, .877
346    [0.01227974, -0.09930782, 0.58702834],
347    // 2, .94
348    [-0.06291796, 0.5629161],
349);
350
351/// * 140 dB stopband, 2 µdB passband ripple, limited by f32 dynamic range
352/// * otherwise like [`HBF_TAPS_98`].
353#[allow(clippy::excessive_precision, clippy::type_complexity)]
354pub const HBF_TAPS: ([f32; 23], [f32; 9], [f32; 5], [f32; 4], [f32; 3]) = (
355    [
356        7.60376281e-07,
357        -3.77494189e-06,
358        1.26458572e-05,
359        -3.43188258e-05,
360        8.10687488e-05,
361        -1.72971471e-04,
362        3.40845058e-04,
363        -6.29522838e-04,
364        1.10128836e-03,
365        -1.83933298e-03,
366        2.95124925e-03,
367        -4.57290979e-03,
368        6.87374175e-03,
369        -1.00656254e-02,
370        1.44199841e-02,
371        -2.03025099e-02,
372        2.82462332e-02,
373        -3.91128510e-02,
374        5.44795655e-02,
375        -7.77002648e-02,
376        1.17523454e-01,
377        -2.06185386e-01,
378        6.34588718e-01,
379    ],
380    [
381        3.13788260e-05,
382        -2.90598691e-04,
383        1.46009063e-03,
384        -5.22455620e-03,
385        1.48913004e-02,
386        -3.62276956e-02,
387        8.02305192e-02,
388        -1.80019379e-01,
389        6.25149012e-01,
390    ],
391    [
392        7.62032287e-04,
393        -7.64759816e-03,
394        3.85545008e-02,
395        -1.39896080e-01,
396        6.08227193e-01,
397    ],
398    [
399        -2.65761488e-03,
400        2.49805823e-02,
401        -1.21497065e-01,
402        5.99174082e-01,
403    ],
404    [1.18773514e-02, -9.81294960e-02, 5.86252153e-01],
405);
406
407/// Passband width in units of lowest sample rate
408pub const HBF_PASSBAND: f32 = 0.4;
409
410/// Max low-rate block size (HbfIntCascade input, HbfDecCascade output)
411pub const HBF_CASCADE_BLOCK: usize = 1 << 6;
412
413/// Half-band decimation filter cascade with optimal taps
414///
415/// See [HBF_TAPS].
416/// Only in-place processing is implemented.
417/// Supports rate changes of 1, 2, 4, 8, and 16.
418#[derive(Copy, Clone, Debug)]
419pub struct HbfDecCascade {
420    depth: usize,
421    stages: (
422        HbfDec<
423            'static,
424            f32,
425            { HBF_TAPS.0.len() },
426            { 2 * HBF_TAPS.0.len() - 1 + HBF_CASCADE_BLOCK },
427        >,
428        HbfDec<
429            'static,
430            f32,
431            { HBF_TAPS.1.len() },
432            { 2 * HBF_TAPS.1.len() - 1 + HBF_CASCADE_BLOCK * 2 },
433        >,
434        HbfDec<
435            'static,
436            f32,
437            { HBF_TAPS.2.len() },
438            { 2 * HBF_TAPS.2.len() - 1 + HBF_CASCADE_BLOCK * 4 },
439        >,
440        HbfDec<
441            'static,
442            f32,
443            { HBF_TAPS.3.len() },
444            { 2 * HBF_TAPS.3.len() - 1 + HBF_CASCADE_BLOCK * 8 },
445        >,
446    ),
447}
448
449impl Default for HbfDecCascade {
450    fn default() -> Self {
451        Self {
452            depth: 0,
453            stages: (
454                HbfDec::new(&HBF_TAPS.0),
455                HbfDec::new(&HBF_TAPS.1),
456                HbfDec::new(&HBF_TAPS.2),
457                HbfDec::new(&HBF_TAPS.3),
458            ),
459        }
460    }
461}
462
463impl HbfDecCascade {
464    /// Set cascade depth
465    ///
466    /// Sets the number of HBF filter stages to apply.
467    #[inline]
468    pub fn set_depth(&mut self, n: usize) {
469        assert!(n <= 4);
470        self.depth = n;
471    }
472
473    /// Cascade depth
474    ///
475    /// The number of HBF filter stages to apply.
476    #[inline]
477    pub fn depth(&self) -> usize {
478        self.depth
479    }
480}
481
482impl Filter for HbfDecCascade {
483    type Item = f32;
484
485    #[inline]
486    fn block_size(&self) -> (usize, usize) {
487        (
488            1 << self.depth,
489            match self.depth {
490                0 => usize::MAX,
491                1 => self.stages.0.block_size().1,
492                2 => self.stages.1.block_size().1,
493                3 => self.stages.2.block_size().1,
494                _ => self.stages.3.block_size().1,
495            },
496        )
497    }
498
499    #[inline]
500    fn response_length(&self) -> usize {
501        let mut n = 0;
502        if self.depth > 3 {
503            n = n / 2 + self.stages.3.response_length();
504        }
505        if self.depth > 2 {
506            n = n / 2 + self.stages.2.response_length();
507        }
508        if self.depth > 1 {
509            n = n / 2 + self.stages.1.response_length();
510        }
511        if self.depth > 0 {
512            n = n / 2 + self.stages.0.response_length();
513        }
514        n
515    }
516
517    fn process_block<'a>(
518        &mut self,
519        x: Option<&[Self::Item]>,
520        mut y: &'a mut [Self::Item],
521    ) -> &'a mut [Self::Item] {
522        if x.is_some() {
523            unimplemented!(); // TODO: pair of intermediate buffers
524        }
525        let n = y.len();
526
527        if self.depth > 3 {
528            y = self.stages.3.process_block(None, y);
529        }
530        if self.depth > 2 {
531            y = self.stages.2.process_block(None, y);
532        }
533        if self.depth > 1 {
534            y = self.stages.1.process_block(None, y);
535        }
536        if self.depth > 0 {
537            y = self.stages.0.process_block(None, y);
538        }
539        debug_assert_eq!(y.len(), n >> self.depth);
540        y
541    }
542}
543
544/// Half-band interpolation filter cascade with optimal taps.
545///
546/// This is a no_alloc version without trait objects.
547/// The price to pay is fixed and maximal memory usage independent
548/// of block size and cascade length.
549///
550/// See [HBF_TAPS].
551/// Only in-place processing is implemented.
552/// Supports rate changes of 1, 2, 4, 8, and 16.
553#[derive(Copy, Clone, Debug)]
554pub struct HbfIntCascade {
555    depth: usize,
556    stages: (
557        HbfInt<
558            'static,
559            f32,
560            { HBF_TAPS.0.len() },
561            { 2 * HBF_TAPS.0.len() - 1 + HBF_CASCADE_BLOCK },
562        >,
563        HbfInt<
564            'static,
565            f32,
566            { HBF_TAPS.1.len() },
567            { 2 * HBF_TAPS.1.len() - 1 + HBF_CASCADE_BLOCK * 2 },
568        >,
569        HbfInt<
570            'static,
571            f32,
572            { HBF_TAPS.2.len() },
573            { 2 * HBF_TAPS.2.len() - 1 + HBF_CASCADE_BLOCK * 4 },
574        >,
575        HbfInt<
576            'static,
577            f32,
578            { HBF_TAPS.3.len() },
579            { 2 * HBF_TAPS.3.len() - 1 + HBF_CASCADE_BLOCK * 8 },
580        >,
581    ),
582}
583
584impl Default for HbfIntCascade {
585    fn default() -> Self {
586        Self {
587            depth: 4,
588            stages: (
589                HbfInt::new(&HBF_TAPS.0),
590                HbfInt::new(&HBF_TAPS.1),
591                HbfInt::new(&HBF_TAPS.2),
592                HbfInt::new(&HBF_TAPS.3),
593            ),
594        }
595    }
596}
597
598impl HbfIntCascade {
599    /// Set cascade depth
600    ///
601    /// Sets the number of HBF filter stages to apply.
602    pub fn set_depth(&mut self, n: usize) {
603        assert!(n <= 4);
604        self.depth = n;
605    }
606
607    /// Cascade depth
608    ///
609    /// The number of HBF filter stages to apply.
610    pub fn depth(&self) -> usize {
611        self.depth
612    }
613}
614
615impl Filter for HbfIntCascade {
616    type Item = f32;
617
618    #[inline]
619    fn block_size(&self) -> (usize, usize) {
620        (
621            1 << self.depth,
622            match self.depth {
623                0 => usize::MAX,
624                1 => self.stages.0.block_size().1,
625                2 => self.stages.1.block_size().1,
626                3 => self.stages.2.block_size().1,
627                _ => self.stages.3.block_size().1,
628            },
629        )
630    }
631
632    #[inline]
633    fn response_length(&self) -> usize {
634        let mut n = 0;
635        if self.depth > 0 {
636            n = 2 * n + self.stages.0.response_length();
637        }
638        if self.depth > 1 {
639            n = 2 * n + self.stages.1.response_length();
640        }
641        if self.depth > 2 {
642            n = 2 * n + self.stages.2.response_length();
643        }
644        if self.depth > 3 {
645            n = 2 * n + self.stages.3.response_length();
646        }
647        n
648    }
649
650    fn process_block<'a>(
651        &mut self,
652        x: Option<&[Self::Item]>,
653        y: &'a mut [Self::Item],
654    ) -> &'a mut [Self::Item] {
655        if x.is_some() {
656            unimplemented!(); // TODO: one intermediate buffer and `y`
657        }
658        // TODO: use buf_mut() and write directly into next filters' input buffer
659
660        let mut n = y.len() >> self.depth;
661        if self.depth > 0 {
662            n = self.stages.0.process_block(None, &mut y[..2 * n]).len();
663        }
664        if self.depth > 1 {
665            n = self.stages.1.process_block(None, &mut y[..2 * n]).len();
666        }
667        if self.depth > 2 {
668            n = self.stages.2.process_block(None, &mut y[..2 * n]).len();
669        }
670        if self.depth > 3 {
671            n = self.stages.3.process_block(None, &mut y[..2 * n]).len();
672        }
673        debug_assert_eq!(n, y.len());
674        &mut y[..n]
675    }
676}
677
678#[cfg(test)]
679mod test {
680    use super::*;
681    use rustfft::{num_complex::Complex, FftPlanner};
682
683    #[test]
684    fn test() {
685        let mut h = HbfDec::<_, 1, 5>::new(&[0.5]);
686        assert_eq!(h.process_block(None, &mut []), &[]);
687
688        let mut x = [1.0; 8];
689        assert_eq!((2, x.len()), h.block_size());
690        let x = h.process_block(None, &mut x);
691        assert_eq!(x, [0.75, 1.0, 1.0, 1.0]);
692
693        let mut h = HbfDec::<_, { HBF_TAPS.3.len() }, 11>::new(&HBF_TAPS.3);
694        let mut x: Vec<_> = (0..8).map(|i| i as f32).collect();
695        assert_eq!((2, x.len()), h.block_size());
696        let x = h.process_block(None, &mut x);
697        println!("{:?}", x);
698    }
699
700    #[test]
701    fn decim() {
702        let mut h = HbfDecCascade::default();
703        h.set_depth(4);
704        assert_eq!(
705            h.block_size(),
706            (1 << h.depth(), HBF_CASCADE_BLOCK << h.depth())
707        );
708        let mut x: Vec<_> = (0..2 << h.depth()).map(|i| i as f32).collect();
709        let x = h.process_block(None, &mut x);
710        println!("{:?}", x);
711    }
712
713    #[test]
714    fn response_length_dec() {
715        let mut h = HbfDecCascade::default();
716        h.set_depth(4);
717        let mut y: Vec<f32> = (0..1 << 10).map(|_| rand::random()).collect();
718        h.process_block(None, &mut y);
719        let mut y = vec![0.0; 1 << 10];
720        let z = h.process_block(None, &mut y);
721        let n = h.response_length();
722        assert!(z[n - 1] != 0.0);
723        assert_eq!(z[n], 0.0);
724    }
725
726    #[test]
727    fn interp() {
728        let mut h = HbfIntCascade::default();
729        h.set_depth(4);
730        assert_eq!(
731            h.block_size(),
732            (1 << h.depth(), HBF_CASCADE_BLOCK << h.depth())
733        );
734        let k = h.block_size().0;
735        let r = h.response_length();
736        let mut x = vec![0.0; (r + 1 + k - 1) / k * k];
737        x[0] = 1.0;
738        let x = h.process_block(None, &mut x);
739        println!("{:?}", x); // interpolator impulse response
740        assert!(x[r] != 0.0);
741        assert_eq!(x[r + 1..], vec![0.0; x.len() - r - 1]);
742
743        let g = (1 << h.depth()) as f32;
744        let mut y = Vec::from_iter(x.iter().map(|&x| Complex {
745            re: x as f64 / g as f64,
746            im: 0.0,
747        }));
748        // pad
749        y.resize(5 << 10, Complex::default());
750        FftPlanner::new().plan_fft_forward(y.len()).process(&mut y);
751        // transfer function
752        let p = Vec::from_iter(y.iter().map(|y| 10.0 * y.norm_sqr().log10()));
753        let f = p.len() as f32 / g;
754        // pass band ripple
755        let p_pass = p[..(f * HBF_PASSBAND).floor() as _]
756            .iter()
757            .fold(0.0, |m, p| p.abs().max(m));
758        assert!(p_pass < 2e-6, "{p_pass}");
759        // stop band attenuation
760        let p_stop = p[(f * (1.0 - HBF_PASSBAND)).ceil() as _..p.len() / 2]
761            .iter()
762            .fold(-200.0, |m, p| p.max(m));
763        assert!(p_stop < -140.0, "{p_stop}");
764    }
765
766    /// small 32 block size, single stage, 3 mul (11 tap) decimator
767    /// 3.5 insn per input item, > 1 GS/s per core on Skylake
768    #[test]
769    #[ignore]
770    fn insn_dec() {
771        const N: usize = HBF_TAPS.4.len();
772        assert_eq!(N, 3);
773        let mut h = HbfDec::<_, N, { 2 * N - 1 + (1 << 4) }>::new(&HBF_TAPS.4);
774        let mut x = [9.0; 1 << 5];
775        for _ in 0..1 << 25 {
776            h.process_block(None, &mut x);
777        }
778    }
779
780    /// 1k block size, single stage, 23 mul (91 tap) decimator
781    /// 4.9 insn: > 1 GS/s
782    #[test]
783    #[ignore]
784    fn insn_dec2() {
785        const N: usize = HBF_TAPS.0.len();
786        assert_eq!(N, 23);
787        const M: usize = 1 << 10;
788        let mut h = HbfDec::<_, N, { 2 * N - 1 + M }>::new(&HBF_TAPS.0);
789        let mut x = [9.0; M];
790        for _ in 0..1 << 20 {
791            h.process_block(None, &mut x);
792        }
793    }
794
795    /// full block size full decimator cascade (depth 4, 1024 items per input block)
796    /// 4.1 insn: > 1 GS/s
797    #[test]
798    #[ignore]
799    fn insn_casc() {
800        let mut x = [9.0; 1 << 10];
801        let mut h = HbfDecCascade::default();
802        h.set_depth(4);
803        for _ in 0..1 << 20 {
804            h.process_block(None, &mut x);
805        }
806    }
807
808    // // sdr crate, setup like insn_dec2()
809    // // 187 insn
810    // #[test]
811    // #[ignore]
812    // fn insn_sdr() {
813    //     use sdr::fir;
814    //     const N: usize = HBF_TAPS.0.len();
815    //     const M: usize = 1 << 10;
816    //     let mut taps = [0.0f64; { 4 * N - 1 }];
817    //     let (old, new) = taps.split_at_mut(2 * N - 1);
818    //     for (tap, (old, new)) in HBF_TAPS.0.iter().zip(
819    //         old.iter_mut()
820    //             .step_by(2)
821    //             .zip(new.iter_mut().rev().step_by(2)),
822    //     ) {
823    //         *old = (*tap * 0.5).into();
824    //         *new = *old;
825    //     }
826    //     taps[2 * N - 1] = 0.5;
827    //     let mut h = fir::FIR::new(&taps, 2, 1);
828    //     let x = [9.0; M];
829    //     // let mut h1 = HbfDec::<N, { 2 * N - 1 + M }>::new(&HBF_TAPS.0);
830    //     // let mut y1 = [0.0; M / 2];
831    //     for _ in 0..1 << 16 {
832    //         let _y = h.process(&x);
833    //         // h1.process_block(Some(&x), &mut y1);
834    //         // assert_eq!(y1.len(), y.len());
835    //         // assert!(y1.iter().zip(y.iter()).all(|(y1, y)| (y1 - y).abs() < 1e-6));
836    //     }
837    // }
838
839    // // // futuredsp crate, setup like insn_dec2()
840    // // // 315 insn
841    // #[test]
842    // #[ignore]
843    // fn insn_futuredsp() {
844    //     use futuredsp::{fir::PolyphaseResamplingFirKernel, UnaryKernel};
845    //     const N: usize = HBF_TAPS.0.len();
846    //     const M: usize = 1 << 10;
847    //     let mut taps = [0.0f32; { 4 * N - 1 }];
848    //     let (old, new) = taps.split_at_mut(2 * N - 1);
849    //     for (tap, (old, new)) in HBF_TAPS.0.iter().zip(
850    //         old.iter_mut()
851    //             .step_by(2)
852    //             .zip(new.iter_mut().rev().step_by(2)),
853    //     ) {
854    //         *old = *tap * 0.5;
855    //         *new = *old;
856    //     }
857    //     taps[2 * N - 1] = 0.5;
858    //     let x = [9.0f32; M];
859    //     let mut y = [0.0f32; M];
860    //     let fir = PolyphaseResamplingFirKernel::<_, _, _, _>::new(1, 2, taps);
861    //     for _ in 0..1 << 14 {
862    //         fir.work(&x, &mut y);
863    //     }
864    // }
865}