libwreport  3.34
input.h
1 #ifndef WREPORT_BUFR_INPUT_H
2 #define WREPORT_BUFR_INPUT_H
3 
4 #include <wreport/error.h>
5 #include <wreport/var.h>
6 #include <wreport/bulletin.h>
7 #include <string>
8 #include <functional>
9 
10 namespace wreport {
11 struct Bulletin;
12 
13 namespace bulletin {
14 struct AssociatedField;
15 }
16 
17 namespace bufr {
18 
20 {
21  Bulletin& out;
22  unsigned subset_count;
23  DispatchToSubsets(Bulletin& out, unsigned subset_count) : out(out), subset_count(subset_count) {}
24 
25  void add_missing(Varinfo info)
26  {
27  for (unsigned i = 0; i < subset_count; ++i)
28  out.subsets[i].store_variable_undef(info);
29  }
30  void add_same(const Var& var)
31  {
32  for (unsigned i = 0; i < subset_count; ++i)
33  out.subsets[i].store_variable(Var(var));
34  }
35  void add_var(unsigned subset, Var&& var)
36  {
37  out.subsets[subset].store_variable(var);
38  }
39 };
40 
41 
45 class Input
46 {
47 protected:
52  void scan_section_length(unsigned sec_no);
53 
54 public:
56  const uint8_t* data;
57 
59  size_t data_len;
60 
68  const char* fname = nullptr;
69 
77  size_t start_offset = 0;
78 
80  unsigned s4_cursor = 0;
81 
83  uint8_t pbyte = 0;
84 
86  int pbyte_len = 0;
87 
89  unsigned sec[6];
90 
91 
98  Input(const std::string& in);
99 
108 
120  void scan_other_sections(bool has_optional);
121 
123  unsigned offset() const { return s4_cursor; }
124 
126  unsigned bits_left() const { return (data_len - s4_cursor) * 8 + pbyte_len; }
127 
129  inline unsigned read_byte(unsigned pos) const
130  {
131  return (unsigned)data[pos];
132  }
133 
135  inline unsigned read_byte(unsigned section, unsigned pos) const
136  {
137  return (unsigned)data[sec[section] + pos];
138  }
139 
141  unsigned read_number(unsigned pos, unsigned byte_len) const
142  {
143  unsigned res = 0;
144  for (unsigned i = 0; i < byte_len; ++i)
145  {
146  res <<= 8;
147  res |= data[pos + i];
148  }
149  return res;
150  }
151 
156  inline unsigned read_number(unsigned section, unsigned pos, unsigned byte_len) const
157  {
158  return read_number(sec[section] + pos, byte_len);
159  }
160 
165  uint32_t get_bits(unsigned n)
166  {
167  uint32_t result = 0;
168 
169  if (s4_cursor == data_len)
170  parse_error("end of buffer while looking for %d bits of bit-packed data", n);
171 
172  // TODO: review and benchmark and possibly simplify
173  // (a possible alternative approach is to keep a current bitmask that
174  // starts at 0x80 and is shifted right by 1 at each read until it
175  // reaches 0, and get rid of pbyte_len)
176  for (unsigned i = 0; i < n; i++)
177  {
178  if (pbyte_len == 0)
179  {
180  pbyte_len = 8;
181  pbyte = data[s4_cursor++];
182  }
183  result <<= 1;
184  if (pbyte & 0x80)
185  result |= 1;
186  pbyte <<= 1;
187  pbyte_len--;
188  }
189 
190  return result;
191  }
192 
196  void skip_bits(unsigned n)
197  {
198  if (s4_cursor == data_len)
199  parse_error("end of buffer while looking for %d bits of bit-packed data", n);
200 
201  for (unsigned i = 0; i < n; i++)
202  {
203  if (pbyte_len == 0)
204  {
205  pbyte_len = 8;
206  pbyte = data[s4_cursor++];
207  }
208  pbyte <<= 1;
209  pbyte_len--;
210  }
211  }
212 
214  void debug_dump_next_bits(const char* desc, unsigned count, const std::vector<unsigned>& groups={}) const;
215 
220  void debug_find_sequence(const char* pattern) const;
221 
223  void parse_error(const char* fmt, ...) const WREPORT_THROWF_ATTRS(2, 3);
224 
226  void parse_error(unsigned pos, const char* fmt, ...) const WREPORT_THROWF_ATTRS(3, 4);
227 
229  void parse_error(unsigned section, unsigned pos, const char* fmt, ...) const WREPORT_THROWF_ATTRS(4, 5);
230 
243  void check_available_data(unsigned pos, size_t datalen, const char* expected);
244 
259  void check_available_message_data(unsigned section, unsigned pos, size_t datalen, const char* expected);
260 
275  void check_available_section_data(unsigned section, unsigned pos, size_t datalen, const char* expected);
276 
289  void decode_compressed_number(Var& dest, uint32_t base, unsigned diffbits);
290 
299  void decode_number(Var& dest);
300 
304  bool decode_compressed_base(Varinfo info, uint32_t& base, uint32_t& diffbits);
305 
310  void decode_compressed_number(Varinfo info, unsigned subsets, std::function<void(unsigned, Var&&)> dest);
311 
312  void decode_string(Varinfo info, unsigned subsets, DispatchToSubsets& dest);
313 
314  void decode_compressed_number(Varinfo info, unsigned subsets, DispatchToSubsets& dest);
315 
320  void decode_compressed_number_af(Varinfo info, const bulletin::AssociatedField& afield, unsigned subsets, std::function<void(unsigned, Var&&)> dest);
321 
333  void decode_compressed_semantic_number(Var& dest, unsigned subsets);
334 
351  bool decode_string(unsigned bit_len, char* str, size_t& len);
352 
364  void decode_string(Var& dest);
365 
377  void decode_string(Var& dest, unsigned subsets);
378 
383  void decode_string(Varinfo info, unsigned subsets, std::function<void(unsigned, Var&&)> dest);
384 
396  void decode_binary(Var& dest);
397 
405  std::string decode_uncompressed_bitmap(unsigned size);
406 
420  std::string decode_compressed_bitmap(unsigned size);
421 };
422 
423 }
424 }
425 #endif
Storage for the decoded data of a BUFR or CREX message.
Definition: bulletin.h:30
std::vector< Subset > subsets
Decoded variables.
Definition: bulletin.h:122
A physical variable.
Definition: var.h:25
Binary buffer with bit-level read operations.
Definition: input.h:46
size_t data_len
Input buffer size.
Definition: input.h:59
unsigned read_byte(unsigned pos) const
Read a byte value at offset pos.
Definition: input.h:129
Input(const std::string &in)
Wrap a string iinto a Input.
uint32_t get_bits(unsigned n)
Get the integer value of the next 'n' bits from the decode input n must be <= 32.
Definition: input.h:165
void check_available_section_data(unsigned section, unsigned pos, size_t datalen, const char *expected)
Check that the given section in the input buffer contains at least datalen characters after offset po...
void decode_compressed_number(Var &dest, uint32_t base, unsigned diffbits)
Decode a compressed number as described by dest.info(), ad set it as value for dest.
void debug_dump_next_bits(const char *desc, unsigned count, const std::vector< unsigned > &groups={}) const
Dump to stderr 'count' bits of 'buf', starting at the 'ofs-th' bit.
uint8_t pbyte
Byte we are currently decoding.
Definition: input.h:83
std::string decode_uncompressed_bitmap(unsigned size)
Decode an uncompressed bitmap of size bits.
void scan_other_sections(bool has_optional)
Scan the message filling in the sec[] array of section start offsets of all sections from 2 on.
unsigned bits_left() const
Return the number of bits left in the message to be decoded.
Definition: input.h:126
void scan_lead_sections()
Scan the message filling in the sec[] array of start offsets of sections 0 and 1.
void scan_section_length(unsigned sec_no)
Scan length of section sec_no, filling in the start of the next section in sec[sec_no + 1].
unsigned offset() const
Return the current decoding byte offset.
Definition: input.h:123
unsigned s4_cursor
Offset of the byte we are currently decoding.
Definition: input.h:80
void void void void check_available_data(unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos; throw error_parse ...
unsigned read_number(unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos.
Definition: input.h:141
void decode_compressed_number_af(Varinfo info, const bulletin::AssociatedField &afield, unsigned subsets, std::function< void(unsigned, Var &&)> dest)
Decode a number as described by info from a compressed bufr with subsets subsets, and send the result...
void debug_find_sequence(const char *pattern) const
Match the given pattern as regexp on the still unread input bitstream, with bits converted to a strin...
unsigned sec[6]
Offsets of the start of BUFR sections.
Definition: input.h:89
size_t start_offset
File offset of the start of the message.
Definition: input.h:77
void check_available_message_data(unsigned section, unsigned pos, size_t datalen, const char *expected)
Check that the input buffer contains at least datalen characters after offset pos in section section;...
unsigned read_byte(unsigned section, unsigned pos) const
Read a byte value at offset pos inside section section.
Definition: input.h:135
unsigned read_number(unsigned section, unsigned pos, unsigned byte_len) const
Read a big endian integer value byte_len bytes long, at offset pos inside section section.
Definition: input.h:156
const uint8_t * data
Input buffer.
Definition: input.h:56
void skip_bits(unsigned n)
Skip the next n bits.
Definition: input.h:196
const char * fname
Input file name (optional).
Definition: input.h:68
void decode_binary(Var &dest)
Decode a generic binary value as-is, as described by dest.info(), ad set it as value for dest.
bool decode_compressed_base(Varinfo info, uint32_t &base, uint32_t &diffbits)
Decode the base value for a variable in a compressed BUFR.
std::string decode_compressed_bitmap(unsigned size)
Decode a "compressed" bitmap of size bits.
void decode_compressed_semantic_number(Var &dest, unsigned subsets)
Decode a number as described by dest.info(), and set it as value for dest.
int pbyte_len
Bits left in pbyte to decode.
Definition: input.h:86
void parse_error(const char *fmt,...) const WREPORT_THROWF_ATTRS(2
Throw an error_parse at the current decoding location.
void decode_number(Var &dest)
Decode a number as described by dest.info(), and set it as value for dest.
wreport exceptions.
#define WREPORT_THROWF_ATTRS(a, b)
Tell the compiler that a function always throws and expects printf-style arguments.
Definition: error.h:56
String functions.
Definition: benchmark.h:13
Information about a variable.
Definition: varinfo.h:139
Definition: input.h:20
Definition: associated_fields.h:13