Skip to main content

grib/encoder/
complex.rs

1use grib_template_helpers::WriteToBuffer;
2
3use crate::{
4    SimplePackingStrategy, WriteGrib2DataSections,
5    def::grib2::template::param_set::{ComplexPacking, SimplePacking},
6    encoder::{Encode, bitmap::Bitmap, helpers::BitsRequired, writer},
7};
8
9/// Strategies applied when performing complex packing on numerical sequences.
10/// Complex packing is a method that divides a sequence of numbers into groups
11/// and efficiently compresses each group to improve the overall compression
12/// ratio of the data.
13#[derive(Debug, PartialEq, Eq, Clone)]
14pub enum ComplexPackingStrategy {
15    /// A strategy that pre-reads a specified number of elements to determine
16    /// whether to add an element to the current group.
17    LookAhead(usize),
18}
19
20#[derive(Debug, PartialEq, Eq, Clone)]
21pub enum SpatialDifferencingOption {
22    None,
23}
24
25pub(crate) struct Encoder<'a> {
26    data: &'a [f64],
27    simple_packing_strategy: SimplePackingStrategy,
28    complex_packing_strategy: ComplexPackingStrategy,
29}
30
31impl<'a> Encoder<'a> {
32    pub(crate) fn new(
33        data: &'a [f64],
34        simple_packing_strategy: SimplePackingStrategy,
35        complex_packing_strategy: ComplexPackingStrategy,
36    ) -> Self {
37        Self {
38            data,
39            simple_packing_strategy,
40            complex_packing_strategy,
41        }
42    }
43}
44
45impl<'a> Encode for Encoder<'a> {
46    type Output = Encoded;
47
48    fn encode(&self) -> Self::Output {
49        match self.complex_packing_strategy {
50            ComplexPackingStrategy::LookAhead(num) => {
51                let (mut simple, scaled, bitmap) = match self.simple_packing_strategy {
52                    SimplePackingStrategy::Decimal(decimal) => {
53                        super::determine_simple_packing_params(self.data, decimal)
54                    }
55                };
56                let (complex, coded) = if simple.num_bits == 0 {
57                    let len = self.data.len();
58                    (
59                        ComplexPacking::for_unique_values(len),
60                        CodedValues::Unique(len),
61                    )
62                } else {
63                    let exp = 2_f64.powf(simple.exp as f64);
64                    let integers = scaled
65                        .iter()
66                        .map(|value| ((value - simple.ref_val as f64) / exp).round() as u32)
67                        .collect::<Vec<_>>();
68                    let num_bits = integers.iter().max().unwrap().bits_required();
69                    simple.num_bits = num_bits;
70                    let groups = Groups::from_values(&integers, num);
71                    (
72                        ComplexPacking::from(&groups),
73                        CodedValues::NonUnique(groups),
74                    )
75                };
76                Encoded::new(simple, complex, coded, bitmap)
77            }
78        }
79    }
80}
81
82impl ComplexPacking {
83    fn for_unique_values(len: usize) -> Self {
84        let len = len as u32;
85        Self {
86            group_splitting_method: 1,
87            missing_value_management: 0,
88            primary_missing_value: 0xffffffff,
89            secondary_missing_value: 0xffffffff,
90            num_groups: 1,
91            group_width_ref: 0,
92            num_group_width_bits: 0,
93            group_len_ref: len,
94            group_len_inc: 1,
95            group_len_last: len,
96            num_group_len_bits: 0,
97        }
98    }
99}
100
101#[derive(Debug)]
102pub(crate) struct Encoded {
103    simple: SimplePacking,
104    complex: ComplexPacking,
105    coded: CodedValues,
106    bitmap: Bitmap,
107}
108
109impl Encoded {
110    fn new(
111        simple: SimplePacking,
112        complex: ComplexPacking,
113        coded: CodedValues,
114        bitmap: Bitmap,
115    ) -> Self {
116        Self {
117            simple,
118            complex,
119            coded,
120            bitmap,
121        }
122    }
123
124    pub(crate) fn params(&self) -> (&SimplePacking, &ComplexPacking) {
125        (&self.simple, &self.complex)
126    }
127}
128
129impl WriteGrib2DataSections for Encoded {
130    fn section5_len(&self) -> usize {
131        47
132    }
133
134    fn write_section5(&self, buf: &mut [u8]) -> Result<usize, &'static str> {
135        let len = self.section5_len();
136        if buf.len() < len {
137            return Err("destination buffer is too small");
138        }
139
140        let mut pos = 0;
141        pos += (len as u32).write_to_buffer(&mut buf[pos..])?; // header.len
142        pos += 5_u8.write_to_buffer(&mut buf[pos..])?; // header.sect_num
143        pos += (self.coded.num_values() as u32).write_to_buffer(&mut buf[pos..])?; // payload.num_encoded_points
144        pos += 2_u16.write_to_buffer(&mut buf[pos..])?; // payload.template_num
145        pos += self.simple.write_to_buffer(&mut buf[pos..])?;
146        pos += 0_u8.write_to_buffer(&mut buf[pos..])?; // payload.template.orig_field_type
147        pos += self.complex.write_to_buffer(&mut buf[pos..])?;
148
149        Ok(pos)
150    }
151
152    fn section6_len(&self) -> usize {
153        let bitmap_size = if self.bitmap.has_nan() {
154            self.bitmap.num_bytes_required()
155        } else {
156            0
157        };
158        6 + bitmap_size
159    }
160
161    fn write_section6(&self, buf: &mut [u8]) -> Result<usize, &'static str> {
162        let len = self.section6_len();
163        if buf.len() < len {
164            return Err("destination buffer is too small");
165        }
166
167        let mut pos = 0;
168        pos += (len as u32).write_to_buffer(&mut buf[pos..])?;
169        pos += 6_u8.write_to_buffer(&mut buf[pos..])?;
170        if self.bitmap.has_nan() {
171            pos += 0_u8.write_to_buffer(&mut buf[pos..])?;
172            pos += self.bitmap.write_to_buffer(&mut buf[pos..])?;
173        } else {
174            pos += 255_u8.write_to_buffer(&mut buf[pos..])?;
175        }
176
177        Ok(pos)
178    }
179
180    fn section7_len(&self) -> usize {
181        let len = match &self.coded {
182            CodedValues::NonUnique(Groups(inner)) => {
183                let num_groups = self.complex.num_groups as usize;
184                let bits_refs = self.simple.num_bits as usize * num_groups;
185                let bits_widths = self.complex.num_group_width_bits as usize * num_groups;
186                let bits_lengths = self.complex.num_group_len_bits as usize * num_groups;
187                let bits_values: usize = inner.iter().map(|g| g.len() * g.width as usize).sum();
188                bits_refs.div_ceil(8)
189                    + bits_widths.div_ceil(8)
190                    + bits_lengths.div_ceil(8)
191                    + bits_values.div_ceil(8)
192            }
193            CodedValues::Unique(_) => 0,
194        };
195        5 + len
196    }
197
198    fn write_section7(&self, buf: &mut [u8]) -> Result<usize, &'static str> {
199        let len = self.section7_len();
200        if buf.len() < len {
201            return Err("destination buffer is too small");
202        }
203
204        let mut pos = 0;
205        pos += (len as u32).write_to_buffer(&mut buf[pos..])?;
206        pos += 7_u8.write_to_buffer(&mut buf[pos..])?;
207        match &self.coded {
208            CodedValues::NonUnique(Groups(inner)) => {
209                if self.simple.num_bits != 0 {
210                    let refs = inner.iter().map(|g| g.ref_val).collect::<Vec<_>>();
211                    let nbitwise = writer::NBitwise::new(&refs, self.simple.num_bits as usize);
212                    pos += nbitwise.write_to_buffer(&mut buf[pos..])?;
213                }
214
215                if self.complex.num_group_width_bits != 0 {
216                    let widths = inner
217                        .iter()
218                        .map(|g| (g.width - self.complex.group_width_ref) as u32)
219                        .collect::<Vec<_>>();
220                    let nbitwise =
221                        writer::NBitwise::new(&widths, self.complex.num_group_width_bits as usize);
222                    pos += nbitwise.write_to_buffer(&mut buf[pos..])?;
223                }
224
225                if self.complex.group_len_inc != 0 && self.complex.num_group_len_bits != 0 {
226                    let lengths = inner
227                        .iter()
228                        .take(self.complex.num_groups as usize - 1)
229                        .map(|g| {
230                            (g.len() as u32 - self.complex.group_len_ref)
231                                / self.complex.group_len_inc as u32
232                        })
233                        .chain(std::iter::once(0))
234                        .collect::<Vec<_>>();
235                    let nbitwise =
236                        writer::NBitwise::new(&lengths, self.complex.num_group_len_bits as usize);
237                    pos += nbitwise.write_to_buffer(&mut buf[pos..])?;
238                }
239
240                let mut start_offset_bits = 0;
241                for group in inner {
242                    if group.width != 0 {
243                        let nbitwise = writer::NBitwise::new(&group.values, group.width as usize)
244                            .with_offset_bits(start_offset_bits);
245                        nbitwise.write_to_buffer(&mut buf[pos..])?;
246                        let (pos_shifted, new_offset) = nbitwise.new_pos();
247                        pos += pos_shifted;
248                        start_offset_bits = new_offset;
249                    }
250                }
251                if start_offset_bits != 0 {
252                    pos += 1;
253                }
254            }
255            CodedValues::Unique(_) => {}
256        }
257
258        Ok(pos)
259    }
260}
261
262#[derive(Debug)]
263enum CodedValues {
264    NonUnique(Groups),
265    Unique(usize),
266}
267
268impl CodedValues {
269    pub(crate) fn num_values(&self) -> usize {
270        match self {
271            Self::NonUnique(vec) => vec.num_values(),
272            Self::Unique(size) => *size,
273        }
274    }
275}
276
277#[derive(Debug, PartialEq, Eq)]
278struct Groups(Vec<Group>);
279
280impl Groups {
281    fn from_values(values: &[u32], num_lookahead: usize) -> Self {
282        let mut groups = Vec::new();
283        let mut start = 0;
284
285        while start < values.len() {
286            let mut end = start + 1;
287
288            let v = values[start];
289            let (mut min, mut max) = (v, v);
290            let mut width = 0;
291
292            while end < values.len() {
293                let v = values[end];
294                let (new_min, new_max) = (min.min(v), max.max(v));
295                let new_width = (new_max - new_min).bits_required();
296
297                let len = end - start;
298                let cost_extend = group_cost(len + 1, new_width);
299                let cost_keep = group_cost(len, width)
300                    + new_group_cost_estimated(&values[end..], num_lookahead);
301                if cost_keep < cost_extend {
302                    break;
303                }
304
305                min = new_min;
306                max = new_max;
307                width = new_width;
308                end += 1;
309            }
310
311            groups.push(Group::from_values(&values[start..end]));
312            start = end;
313        }
314
315        Self(groups)
316    }
317
318    fn optimal_length_params(&self) -> Option<OptimalLengthParams> {
319        let Self(inner) = self;
320        let num_lengths = inner.len() - 1; // the last group is treated separately
321        if num_lengths == 0 {
322            None
323        } else {
324            let lengths = inner
325                .iter()
326                .take(num_lengths)
327                .map(|g| g.values.len())
328                .collect::<Vec<_>>();
329            Some(OptimalLengthParams::from(&lengths[..]))
330        }
331    }
332
333    fn num_values(&self) -> usize {
334        let Self(inner) = self;
335        inner.iter().map(|g| g.values.len()).sum()
336    }
337}
338
339fn group_cost(len: usize, width: u8) -> usize {
340    len * width as usize
341}
342
343fn new_group_cost_estimated(values: &[u32], num_lookahead: usize) -> usize {
344    if values.is_empty() {
345        return 0;
346    }
347
348    let lookahead = values.iter().take(num_lookahead);
349
350    let (mut min, mut max) = (u32::MAX, u32::MIN);
351    let mut len = 0;
352
353    for &v in lookahead {
354        (min, max) = (min.min(v), max.max(v));
355        len += 1;
356    }
357    let width = (max - min).bits_required();
358    group_cost(len, width)
359}
360
361impl From<&Groups> for ComplexPacking {
362    fn from(value: &Groups) -> Self {
363        let Groups(inner) = value;
364        let group_width_ref = inner.iter().map(|g| g.width).min().unwrap();
365        let max_width = inner.iter().map(|g| g.width).max().unwrap();
366        let num_group_width_bits = (max_width - group_width_ref).bits_required();
367        let (group_len_ref, group_len_inc, num_group_len_bits) =
368            if let Some(length_params) = value.optimal_length_params() {
369                (
370                    length_params.ref_ as u32,
371                    length_params.inc as u8,
372                    length_params.num_bits,
373                )
374            } else {
375                (0, 0, 0)
376            };
377        Self {
378            group_splitting_method: 1,
379            missing_value_management: 0,
380            primary_missing_value: 0xffffffff,
381            secondary_missing_value: 0xffffffff,
382            num_groups: inner.len() as u32,
383            group_width_ref,
384            num_group_width_bits,
385            group_len_ref,
386            group_len_inc,
387            group_len_last: inner.last().unwrap().values.len() as u32,
388            num_group_len_bits,
389        }
390    }
391}
392
393#[derive(Debug, PartialEq, Eq)]
394struct Group {
395    pub ref_val: u32,
396    pub width: u8,
397    pub values: Vec<u32>,
398}
399
400impl Group {
401    fn from_values(values: &[u32]) -> Self {
402        let ref_val = *values.iter().min().unwrap();
403        let mut max_diff = u32::MIN;
404        let diffs = values
405            .iter()
406            .map(|v| {
407                let diff = v - ref_val;
408                max_diff = max_diff.max(diff);
409                diff
410            })
411            .collect();
412        let width = max_diff.bits_required();
413
414        Group {
415            ref_val,
416            width,
417            values: diffs,
418        }
419    }
420
421    fn len(&self) -> usize {
422        self.values.len()
423    }
424}
425
426#[derive(Debug, PartialEq, Eq)]
427struct OptimalLengthParams {
428    ref_: usize,
429    inc: usize,
430    num_bits: u8,
431    total_bits: usize,
432}
433
434impl OptimalLengthParams {
435    fn new(ref_: usize, inc: usize, num_bits: u8, total_bits: usize) -> Self {
436        Self {
437            ref_,
438            inc,
439            num_bits,
440            total_bits,
441        }
442    }
443}
444
445impl From<&[usize]> for OptimalLengthParams {
446    fn from(value: &[usize]) -> Self {
447        let ref_ = value.iter().min().unwrap();
448        let diffs = value.iter().map(|&l| l - ref_).collect::<Vec<_>>();
449        let gcd_ = diffs.iter().copied().reduce(gcd).unwrap_or(0);
450
451        if gcd_ == 0 {
452            return Self::new(*ref_, 0, 0, 0);
453        }
454
455        let max_code = diffs.iter().map(|d| d / gcd_).max().unwrap();
456        let num_bits = max_code.bits_required();
457        let total_bits = num_bits as usize * value.len();
458
459        Self::new(*ref_, gcd_, num_bits, total_bits)
460    }
461}
462
463fn gcd(m: usize, n: usize) -> usize {
464    let (m, n) = if m > n { (m, n) } else { (n, m) };
465    if n == 0 { m } else { gcd(n, m % n) }
466}
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471
472    #[test]
473    fn grouping() {
474        let mut values = (0_u32..24).collect::<Vec<_>>();
475        values[10] = 64;
476        values[21] = 128;
477        values[22] = 256;
478        let actual = Groups::from_values(&values, 4);
479        let expected = Groups(vec![
480            Group {
481                ref_val: 0,
482                width: 4,
483                values: vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
484            },
485            Group {
486                ref_val: 64,
487                width: 0,
488                values: vec![0],
489            },
490            Group {
491                ref_val: 11,
492                width: 4,
493                values: vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
494            },
495            Group {
496                ref_val: 128,
497                width: 8,
498                values: vec![0, 128],
499            },
500            Group {
501                ref_val: 23,
502                width: 0,
503                values: vec![0],
504            },
505        ]);
506        assert_eq!(actual, expected);
507    }
508
509    macro_rules! test_optimal_length_params {
510        ($((
511            $name:ident,
512            $input:expr,
513            $expected:expr,
514        ),)*) => ($(
515            #[test]
516            fn $name() {
517                let lengths = $input;
518                let actual = OptimalLengthParams::from(&lengths[..]);
519                let expected = $expected;
520                assert_eq!(actual, expected);
521            }
522        )*);
523    }
524
525    test_optimal_length_params! {
526        (
527            optimal_length_params_for_all_zero,
528            vec![0, 0, 0, 0, 0],
529            OptimalLengthParams::new(0, 0, 0, 0),
530        ),
531        (
532            optimal_length_params_for_all_same_nonzero,
533            vec![5, 5, 5, 5, 5],
534            OptimalLengthParams::new(5, 0, 0, 0),
535        ),
536        (
537            optimal_length_params_for_gcd_being_one,
538            vec![26, 24, 20, 14, 13],
539            OptimalLengthParams::new(13, 1, 4, 20),
540        ),
541        (
542            optimal_length_params_for_gcd_being_integer_other_than_zero_and_one,
543            vec![13, 19, 22, 16, 25],
544            OptimalLengthParams::new(13, 3, 3, 15),
545        ),
546    }
547
548    macro_rules! grib2_coded_values_roundtrip_tests {
549        ($(($name:ident, $input:expr),)*) => ($(
550            #[test]
551            fn $name() -> Result<(), Box<dyn std::error::Error>> {
552                let values = $input;
553                let encoder = Encoder::new(
554                    &values,
555                    SimplePackingStrategy::Decimal(0),
556                    ComplexPackingStrategy::LookAhead(4),
557                );
558                let encoded = encoder.encode();
559                let mut sect5 = vec![0; encoded.section5_len()];
560                let pos = encoded.write_section5(&mut sect5)?;
561                assert_eq!(pos, sect5.len());
562                let mut sect6 = vec![0; encoded.section6_len()];
563                let pos = encoded.write_section6(&mut sect6)?;
564                assert_eq!(pos, sect6.len());
565                let mut sect7 = vec![0; encoded.section7_len()];
566                let pos = encoded.write_section7(&mut sect7)?;
567                assert_eq!(pos, sect7.len());
568                let decoder = crate::Grib2SubmessageDecoder::new(values.len(), sect5, sect6, sect7)?;
569                let actual = decoder.dispatch()?.collect::<Vec<_>>();
570                let expected = values.iter().map(|val| *val as f32).collect::<Vec<_>>();
571                assert_eq!(actual.len(), expected.len());
572                actual
573                    .iter()
574                    .zip(expected.iter())
575                    .all(|(a, b)| (a.is_nan() && b.is_nan()) || (a == b));
576                Ok(())
577            }
578        )*);
579    }
580
581    grib2_coded_values_roundtrip_tests! {
582        (
583            grib2_coded_values_roundtrip_test_with_nonunique_values,
584            (2..11).map(|val| val as f64).collect::<Vec<_>>()
585        ),
586        (
587            grib2_coded_values_roundtrip_test_with_nonunique_values_and_group_len_inc_being_0,
588            (2..5).map(|val| val as f64).collect::<Vec<_>>()
589        ),
590        (
591            grib2_coded_values_roundtrip_test_with_unique_values,
592            vec![10.0_f64; 256]
593        ),
594        (
595            grib2_coded_values_roundtrip_test_with_zero_only_groups,
596            vec![0, 0, 0, 100, 10, 2, 2, 1]
597                .into_iter()
598                .flat_map(|val| [val as f64; 8])
599                .collect::<Vec<_>>()
600        ),
601        (
602            grib2_coded_values_roundtrip_test_with_body_data_affected_by_offsets,
603            vec![0.; 32]
604                .into_iter()
605                .chain([7., 15., 90.].into_iter())
606                .chain([0.; 32].into_iter())
607                .chain([114., 104., 92., 225.].into_iter())
608                .chain([0.; 32].into_iter())
609                .chain([114., 104., 92., 225.].into_iter())
610                .collect::<Vec<_>>()
611        ),
612    }
613}