OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_block_decoder.cpp
Go to the documentation of this file.
1 //***************************************************************************/
2 // This software is released under the 2-Clause BSD license, included
3 // below.
4 //
5 // Copyright (c) 2019, Aous Naman
6 // Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7 // Copyright (c) 2019, The University of New South Wales, Australia
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // 1. Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //***************************************************************************/
32 // This file is part of the OpenJPH software implementation.
33 // File: ojph_block_decoder.cpp
34 // Author: Aous Naman
35 // Date: 28 August 2019
36 //***************************************************************************/
37 
38 //***************************************************************************/
43 #include <cassert>
44 #include <cstring>
45 #include "ojph_block_decoder.h"
46 #include "ojph_arch.h"
47 #include "ojph_message.h"
48 
49 namespace ojph {
50  namespace local {
51 
52  //************************************************************************/
69  static ui16 vlc_tbl0[1024] = { 0 };
73  static ui16 vlc_tbl1[1024] = { 0 };
75 
76  //************************************************************************/
83  struct dec_mel_st {
84  dec_mel_st() : data(NULL), tmp(0), bits(0), size(0), unstuff(false),
85  k(0), num_runs(0), runs(0)
86  {}
87  // data decoding machinary
88  ui8* data;
90  int bits;
91  int size;
92  bool unstuff;
93  int k;
94 
95  // queue of decoded runs
96  int num_runs;
98  };
99 
100  //************************************************************************/
112  static inline
113  void mel_read(dec_mel_st *melp)
114  {
115  if (melp->bits > 32) //there are enough bits in the tmp variable
116  return; // return without reading new data
117 
118  ui32 val = 0xFFFFFFFF; // feed in 0xFF if buffer is exhausted
119  if (melp->size > 4) { // if there is data in the MEL segment
120  val = *(ui32*)melp->data; // read 32 bits from MEL data
121  melp->data += 4; // advance pointer
122  melp->size -= 4; // reduce counter
123  }
124  else if (melp->size > 0)
125  { // 4 or less
126  int i = 0;
127  while (melp->size > 1) {
128  ui32 v = *melp->data++; // read one byte at a time
129  ui32 m = ~(0xFFu << i); // mask of location
130  val = (val & m) | (v << i);// put one byte in its correct location
131  --melp->size;
132  i += 8;
133  }
134  // size equal to 1
135  ui32 v = *melp->data++; // the one before the last is different
136  v |= 0xF; // MEL and VLC segments can overlap
137  ui32 m = ~(0xFFu << i);
138  val = (val & m) | (v << i);
139  --melp->size;
140  }
141 
142  // next we unstuff them before adding them to the buffer
143  int bits = 32 - melp->unstuff; // number of bits in val, subtract 1 if
144  // the previously read byte requires
145  // unstuffing
146 
147  // data is unstuffed and accumulated in t
148  // bits has the number of bits in t
149  ui32 t = val & 0xFF;
150  bool unstuff = ((val & 0xFF) == 0xFF); // true if we need unstuffing
151  bits -= unstuff; // there is one less bit in t if unstuffing is needed
152  t = t << (8 - unstuff); // move up to make room for the next byte
153 
154  //this is a repeat of the above
155  t |= (val>>8) & 0xFF;
156  unstuff = (((val >> 8) & 0xFF) == 0xFF);
157  bits -= unstuff;
158  t = t << (8 - unstuff);
159 
160  t |= (val>>16) & 0xFF;
161  unstuff = (((val >> 16) & 0xFF) == 0xFF);
162  bits -= unstuff;
163  t = t << (8 - unstuff);
164 
165  t |= (val>>24) & 0xFF;
166  melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
167 
168  // move t to tmp, and push the result all the way up, so we read from
169  // the MSB
170  melp->tmp |= ((ui64)t) << (64 - bits - melp->bits);
171  melp->bits += bits; //increment the number of bits in tmp
172  }
173 
174  //************************************************************************/
189  static inline
190  void mel_decode(dec_mel_st *melp)
191  {
192  static const int mel_exp[13] = { //MEL exponents
193  0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
194  };
195 
196  if (melp->bits < 6) // if there are less than 6 bits in tmp
197  mel_read(melp); // then read from the MEL bitstream
198  // 6 bits is the largest decodable MEL cwd
199 
200  //repeat so long that there is enough decodable bits in tmp,
201  // and the runs store is not full (num_runs < 8)
202  while (melp->bits >= 6 && melp->num_runs < 8)
203  {
204  int eval = mel_exp[melp->k]; // number of bits associated with state
205  int run = 0;
206  if (melp->tmp & (1ull<<63)) //The next bit to decode (stored in MSB)
207  { //one is found
208  run = 1 << eval;
209  run--; // consecutive runs of 0 events - 1
210  melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;//increment, max is 12
211  melp->tmp <<= 1; // consume one bit from tmp
212  melp->bits -= 1;
213  run = run << 1; // a stretch of zeros not terminating in one
214  }
215  else
216  { //0 is found
217  run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
218  melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0; //decrement, min is 0
219  melp->tmp <<= eval + 1; //consume eval + 1 bits (max is 6)
220  melp->bits -= eval + 1;
221  run = (run << 1) + 1; // a stretch of zeros terminating with one
222  }
223  eval = melp->num_runs * 7; // 7 bits per run
224  melp->runs &= ~((ui64)0x3F << eval); // 6 bits are sufficient
225  melp->runs |= ((ui64)run) << eval; // store the value in runs
226  melp->num_runs++; // increment count
227  }
228  }
229 
230  //************************************************************************/
240  static inline
241  void mel_init(dec_mel_st *melp, ui8* bbuf, int lcup, int scup)
242  {
243  melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
244  melp->bits = 0; // 0 bits in tmp
245  melp->tmp = 0; //
246  melp->unstuff = false; // no unstuffing
247  melp->size = scup - 1; // size is the length of MEL+VLC-1
248  melp->k = 0; // 0 for state
249  melp->num_runs = 0; // num_runs is 0
250  melp->runs = 0; //
251 
252  //This code is borrowed; original is for a different architecture
253  //These few lines take care of the case where data is not at a multiple
254  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the MEL segment
255  int num = 4 - (int)(intptr_t(melp->data) & 0x3);
256  for (int i = 0; i < num; ++i) { // this code is similar to mel_read
257  assert(melp->unstuff == false || melp->data[0] <= 0x8F);
258  ui64 d = (melp->size > 0) ? *melp->data : 0xFF;//if buffer is consumed
259  //set data to 0xFF
260  if (melp->size == 1) d |= 0xF; //if this is MEL+VLC-1, set LSBs to 0xF
261  // see the standard
262  melp->data += melp->size-- > 0; //increment if the end is not reached
263  int d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
264  melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
265  melp->bits += d_bits; //increment tmp by number of bits
266  melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs
267  //unstuffing
268  }
269  melp->tmp <<= (64 - melp->bits); //push all the way up so the first bit
270  // is the MSB
271  }
272 
273  //************************************************************************/
279  static inline
281  {
282  if (melp->num_runs == 0) //if no runs, decode more bit from MEL segment
283  mel_decode(melp);
284 
285  int t = melp->runs & 0x7F; //retrieve one run
286  melp->runs >>= 7; // remove the retrieved run
287  melp->num_runs--;
288  return t; // return run
289  }
290 
291  //************************************************************************/
295  struct rev_struct {
296  rev_struct() : data(NULL), tmp(0), bits(0), size(0), unstuff(false)
297  {}
298  //storage
302  int size;
303  bool unstuff;
305  };
306 
307  //************************************************************************/
327  inline void rev_read(rev_struct *vlcp)
328  {
329  //process 4 bytes at a time
330  if (vlcp->bits > 32) // if there are more than 32 bits in tmp, then
331  return; // reading 32 bits can overflow vlcp->tmp
332  ui32 val = 0;
333  //the next line (the if statement) needs to be tested first
334  if (vlcp->size > 3) // if there are more than 3 bytes left in VLC
335  {
336  // (vlcp->data - 3) move pointer back to read 32 bits at once
337  val = *(ui32*)(vlcp->data - 3); // then read 32 bits
338  vlcp->data -= 4; // move data pointer back by 4
339  vlcp->size -= 4; // reduce available byte by 4
340  }
341  else if (vlcp->size > 0)
342  { // 4 or less
343  int i = 24;
344  while (vlcp->size > 0) {
345  ui32 v = *vlcp->data--; // read one byte at a time
346  val |= (v << i); // put byte in its correct location
347  --vlcp->size;
348  i -= 8;
349  }
350  }
351 
352  //accumulate in tmp, number of bits in tmp are stored in bits
353  ui32 tmp = val >> 24; //start with the MSB byte
354  ui32 bits;
355 
356  // test unstuff (previous byte is >0x8F), and this byte is 0x7F
357  bits = 8 - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
358  bool unstuff = (val >> 24) > 0x8F; //this is for the next byte
359 
360  tmp |= ((val >> 16) & 0xFF) << bits; //process the next byte
361  bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
362  unstuff = ((val >> 16) & 0xFF) > 0x8F;
363 
364  tmp |= ((val >> 8) & 0xFF) << bits;
365  bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
366  unstuff = ((val >> 8) & 0xFF) > 0x8F;
367 
368  tmp |= (val & 0xFF) << bits;
369  bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
370  unstuff = (val & 0xFF) > 0x8F;
371 
372  // now move the read and unstuffed bits into vlcp->tmp
373  vlcp->tmp |= (ui64)tmp << vlcp->bits;
374  vlcp->bits += bits;
375  vlcp->unstuff = unstuff; // this for the next read
376  }
377 
378  //************************************************************************/
392  inline void rev_init(rev_struct *vlcp, ui8* data, int lcup, int scup)
393  {
394  //first byte has only the upper 4 bits
395  vlcp->data = data + lcup - 2;
396 
397  //size can not be larger than this, in fact it should be smaller
398  vlcp->size = scup - 2;
399 
400  ui32 d = *vlcp->data--; // read one byte (this is a half byte)
401  vlcp->tmp = d >> 4; // both initialize and set
402  vlcp->bits = 4 - ((vlcp->tmp & 7) == 7); //check standard
403  vlcp->unstuff = (d | 0xF) > 0x8F; //this is useful for the next byte
404 
405  //This code is designed for an architecture that read address should
406  // align to the read size (address multiple of 4 if read size is 4)
407  //These few lines take care of the case where data is not at a multiple
408  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream.
409  // To read 32 bits, read from (vlcp->data - 3)
410  int num = 1 + (int)(intptr_t(vlcp->data) & 0x3);
411  int tnum = num < vlcp->size ? num : vlcp->size;
412  for (int i = 0; i < tnum; ++i) {
413  ui64 d;
414  d = *vlcp->data--; // read one byte and move read pointer
415  //check if the last byte was >0x8F (unstuff == true) and this is 0x7F
416  ui32 d_bits = 8 - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
417  vlcp->tmp |= d << vlcp->bits; // move data to vlcp->tmp
418  vlcp->bits += d_bits;
419  vlcp->unstuff = d > 0x8F; // for next byte
420  }
421  vlcp->size -= tnum;
422  rev_read(vlcp); // read another 32 buts
423  }
424 
425  //************************************************************************/
432  inline ui32 rev_fetch(rev_struct *vlcp)
433  {
434  if (vlcp->bits < 32) // if there are less then 32 bits, read more
435  {
436  rev_read(vlcp); // read 32 bits, but unstuffing might reduce this
437  if (vlcp->bits < 32)// if there is still space in vlcp->tmp for 32 bits
438  rev_read(vlcp); // read another 32
439  }
440  return (ui32)vlcp->tmp; // return the head (bottom-most) of vlcp->tmp
441  }
442 
443  //************************************************************************/
449  inline ui32 rev_advance(rev_struct *vlcp, ui32 num_bits)
450  {
451  assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
452  vlcp->tmp >>= num_bits; // remove bits
453  vlcp->bits -= num_bits; // decrement the number of bits
454  return (ui32)vlcp->tmp;
455  }
456 
457  //************************************************************************/
468  inline void rev_read_mrp(rev_struct *mrp)
469  {
470  //process 4 bytes at a time
471  if (mrp->bits > 32)
472  return;
473  ui32 val = 0;
474  if (mrp->size > 3) // If there are 3 byte or more
475  { // (mrp->data - 3) move pointer back to read 32 bits at once
476  val = *(ui32*)(mrp->data - 3); // read 32 bits
477  mrp->data -= 4; // move back pointer
478  mrp->size -= 4; // reduce count
479  }
480  else if (mrp->size > 0)
481  {
482  int i = 24;
483  while (mrp->size > 0) {
484  ui32 v = *mrp->data--; // read one byte at a time
485  val |= (v << i); // put byte in its correct location
486  --mrp->size;
487  i -= 8;
488  }
489  }
490 
491  //accumulate in tmp, and keep count in bits
492  ui32 bits, tmp = val >> 24;
493 
494  //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
495  bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
496  bool unstuff = (val >> 24) > 0x8F;
497 
498  //process the next byte
499  tmp |= ((val >> 16) & 0xFF) << bits;
500  bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
501  unstuff = ((val >> 16) & 0xFF) > 0x8F;
502 
503  tmp |= ((val >> 8) & 0xFF) << bits;
504  bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
505  unstuff = ((val >> 8) & 0xFF) > 0x8F;
506 
507  tmp |= (val & 0xFF) << bits;
508  bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
509  unstuff = (val & 0xFF) > 0x8F;
510 
511  mrp->tmp |= (ui64)tmp << mrp->bits; // move data to mrp pointer
512  mrp->bits += bits;
513  mrp->unstuff = unstuff; // next byte
514  }
515 
516  //************************************************************************/
531  inline void rev_init_mrp(rev_struct *mrp, ui8* data, int lcup, int len2)
532  {
533  mrp->data = data + lcup + len2 - 1;
534  mrp->size = len2;
535  mrp->unstuff = true;
536  mrp->bits = 0;
537  mrp->tmp = 0;
538 
539  //This code is designed for an architecture that read address should
540  // align to the read size (address multiple of 4 if read size is 4)
541  //These few lines take care of the case where data is not at a multiple
542  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the MRP stream
543  int num = 1 + (int)(intptr_t(mrp->data) & 0x3);
544  for (int i = 0; i < num; ++i) {
545  ui64 d;
546  //read a byte, 0 if no more data
547  d = (mrp->size-- > 0) ? *mrp->data-- : 0;
548  //check if unstuffing is needed
549  ui32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
550  mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
551  mrp->bits += d_bits;
552  mrp->unstuff = d > 0x8F; // for next byte
553  }
554  rev_read_mrp(mrp);
555  }
556 
557  //************************************************************************/
565  {
566  if (mrp->bits < 32) // if there are less than 32 bits in mrp->tmp
567  {
568  rev_read_mrp(mrp); // read 30-32 bits from mrp
569  if (mrp->bits < 32) // if there is a space of 32 bits
570  rev_read_mrp(mrp); // read more
571  }
572  return (ui32)mrp->tmp; // return the head of mrp->tmp
573  }
574 
575  //************************************************************************/
581  inline ui32 rev_advance_mrp(rev_struct *mrp, ui32 num_bits)
582  {
583  assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
584  mrp->tmp >>= num_bits; // discard the lowest num_bits bits
585  mrp->bits -= num_bits;
586  return (ui32)mrp->tmp; // return data after consumption
587  }
588 
589  //************************************************************************/
594  static bool vlc_init_tables()
595  {
596  const bool debug = false; //useful for checking
597 
598  //Data in the table is arranged in this format (taken from the standard)
599  // c_q is the context for a quad
600  // rho is the signficance pattern for a quad
601  // u_off indicate if u value is 0 (u_off is 0), or communicated
602  // e_k, e_1 EMB patterns
603  // cwd VLC codeword
604  // cwd VLC codeword length
605  struct vlc_src_table { int c_q, rho, u_off, e_k, e_1, cwd, cwd_len; };
606  // initial quad rows
607  vlc_src_table tbl0[] = {
608  #include "table0.h"
609  };
610  // number of entries in the table
611  size_t tbl0_size = sizeof(tbl0) / sizeof(vlc_src_table);
612 
613  // nono-initial quad rows
614  vlc_src_table tbl1[] = {
615  #include "table1.h"
616  };
617  // number of entries in the table
618  size_t tbl1_size = sizeof(tbl1) / sizeof(vlc_src_table);
619 
620  if (debug) memset(vlc_tbl0, 0, sizeof(vlc_tbl0)); //unnecessary
621 
622  // this is to convert table entries into values for decoder look up
623  // There can be at most 1024 possibilites, not all of them are valid.
624  //
625  for (int i = 0; i < 1024; ++i)
626  {
627  int cwd = i & 0x7F; // from i extract codeword
628  int c_q = i >> 7; // from i extract context
629  // See if this case exist in the table, if so then set the entry in
630  // vlc_tbl0
631  for (size_t j = 0; j < tbl0_size; ++j)
632  if (tbl0[j].c_q == c_q) // this is an and operation
633  if (tbl0[j].cwd == (cwd & ((1 << tbl0[j].cwd_len) - 1)))
634  {
635  if (debug) assert(vlc_tbl0[i] == 0);
636  // Put this entry into the table
637  vlc_tbl0[i] = (ui16)((tbl0[j].rho << 4) | (tbl0[j].u_off << 3)
638  | (tbl0[j].e_k << 12) | (tbl0[j].e_1 << 8) | tbl0[j].cwd_len);
639  }
640  }
641 
642  if (debug) memset(vlc_tbl1, 0, sizeof(vlc_tbl1)); //unnecessary
643 
644  // this the same as above but for non-initial rows
645  for (int i = 0; i < 1024; ++i)
646  {
647  int cwd = i & 0x7F; //7 bits
648  int c_q = i >> 7;
649  for (size_t j = 0; j < tbl1_size; ++j)
650  if (tbl1[j].c_q == c_q) // this is an and operation
651  if (tbl1[j].cwd == (cwd & ((1 << tbl1[j].cwd_len) - 1)))
652  {
653  if (debug) assert(vlc_tbl1[i] == 0);
654  vlc_tbl1[i] = (ui16)((tbl1[j].rho << 4) | (tbl1[j].u_off << 3)
655  | (tbl1[j].e_k << 12) | (tbl1[j].e_1 << 8) | tbl1[j].cwd_len);
656  }
657  }
658 
659  return true;
660  }
661 
662  //************************************************************************/
673  inline ui32 decode_init_uvlc(ui32 vlc, ui32 mode, ui32 *u)
674  {
675  //table stores possible decoding three bits from vlc
676  // there are 8 entries for xx1, x10, 100, 000, where x means do not care
677  // table value is made up of
678  // 2 bits in the LSB for prefix length
679  // 3 bits for suffix length
680  // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
681  static const ui8 dec[8] = { // the index is the prefix codeword
682  3 | (5 << 2) | (5 << 5), //000 == 000, prefix codeword "000"
683  1 | (0 << 2) | (1 << 5), //001 == xx1, prefix codeword "1"
684  2 | (0 << 2) | (2 << 5), //010 == x10, prefix codeword "01"
685  1 | (0 << 2) | (1 << 5), //011 == xx1, prefix codeword "1"
686  3 | (1 << 2) | (3 << 5), //100 == 100, prefix codeword "001"
687  1 | (0 << 2) | (1 << 5), //101 == xx1, prefix codeword "1"
688  2 | (0 << 2) | (2 << 5), //110 == x10, prefix codeword "01"
689  1 | (0 << 2) | (1 << 5) //111 == xx1, prefix codeword "1"
690  };
691 
692  ui32 consumed_bits = 0;
693  if (mode == 0) // both u_off are 0
694  {
695  u[0] = u[1] = 1; //Kappa is 1 for initial line
696  }
697  else if (mode <= 2) // u_off are either 01 or 10
698  {
699  ui32 d = dec[vlc & 0x7]; //look at the least significant 3 bits
700  vlc >>= d & 0x3; //prefix length
701  consumed_bits += d & 0x3;
702 
703  ui32 suffix_len = ((d >> 2) & 0x7);
704  consumed_bits += suffix_len;
705 
706  d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
707  u[0] = (mode == 1) ? d + 1 : 1; // kappa is 1 for initial line
708  u[1] = (mode == 1) ? 1 : d + 1; // kappa is 1 for initial line
709  }
710  else if (mode == 3) // both u_off are 1, and MEL event is 0
711  {
712  ui32 d1 = dec[vlc & 0x7]; // LSBs of VLC are prefix codeword
713  vlc >>= d1 & 0x3; // Consume bits
714  consumed_bits += d1 & 0x3;
715 
716  if ((d1 & 0x3) > 2)
717  {
718  //u_{q_2} prefix
719  u[1] = (vlc & 1) + 1 + 1; //Kappa is 1 for initial line
720  ++consumed_bits;
721  vlc >>= 1;
722 
723  ui32 suffix_len = ((d1 >> 2) & 0x7);
724  consumed_bits += suffix_len;
725  d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
726  u[0] = d1 + 1; //Kappa is 1 for initial line
727  }
728  else
729  {
730  ui32 d2 = dec[vlc & 0x7]; // LSBs of VLC are prefix codeword
731  vlc >>= d2 & 0x3; // Consume bits
732  consumed_bits += d2 & 0x3;
733 
734  ui32 suffix_len = ((d1 >> 2) & 0x7);
735  consumed_bits += suffix_len;
736 
737  d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
738  u[0] = d1 + 1; //Kappa is 1 for initial line
739  vlc >>= suffix_len;
740 
741  suffix_len = ((d2 >> 2) & 0x7);
742  consumed_bits += suffix_len;
743 
744  d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
745  u[1] = d2 + 1; //Kappa is 1 for initial line
746  }
747  }
748  else if (mode == 4) // both u_off are 1, and MEL event is 1
749  {
750  ui32 d1 = dec[vlc & 0x7]; // LSBs of VLC are prefix codeword
751  vlc >>= d1 & 0x3; // Consume bits
752  consumed_bits += d1 & 0x3;
753 
754  ui32 d2 = dec[vlc & 0x7]; // LSBs of VLC are prefix codeword
755  vlc >>= d2 & 0x3; // Consume bits
756  consumed_bits += d2 & 0x3;
757 
758  ui32 suffix_len = ((d1 >> 2) & 0x7);
759  consumed_bits += suffix_len;
760 
761  d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
762  u[0] = d1 + 3; // add 2+kappa
763  vlc >>= suffix_len;
764 
765  suffix_len = ((d2 >> 2) & 0x7);
766  consumed_bits += suffix_len;
767 
768  d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
769  u[1] = d2 + 3; // add 2+kappa
770  }
771  return consumed_bits;
772  }
773 
774  //************************************************************************/
783  inline ui32 decode_noninit_uvlc(ui32 vlc, ui32 mode, ui32 *u)
784  {
785  //table stores possible decoding three bits from vlc
786  // there are 8 entries for xx1, x10, 100, 000, where x means do not care
787  // table value is made up of
788  // 2 bits in the LSB for prefix length
789  // 3 bits for suffix length
790  // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
791  static const ui8 dec[8] = {
792  3 | (5 << 2) | (5 << 5), //000 == 000, prefix codeword "000"
793  1 | (0 << 2) | (1 << 5), //001 == xx1, prefix codeword "1"
794  2 | (0 << 2) | (2 << 5), //010 == x10, prefix codeword "01"
795  1 | (0 << 2) | (1 << 5), //011 == xx1, prefix codeword "1"
796  3 | (1 << 2) | (3 << 5), //100 == 100, prefix codeword "001"
797  1 | (0 << 2) | (1 << 5), //101 == xx1, prefix codeword "1"
798  2 | (0 << 2) | (2 << 5), //110 == x10, prefix codeword "01"
799  1 | (0 << 2) | (1 << 5) //111 == xx1, prefix codeword "1"
800  };
801 
802  ui32 consumed_bits = 0;
803  if (mode == 0)
804  {
805  u[0] = u[1] = 1; //for kappa
806  }
807  else if (mode <= 2) //u_off are either 01 or 10
808  {
809  ui32 d = dec[vlc & 0x7]; //look at the least significant 3 bits
810  vlc >>= d & 0x3; //prefix length
811  consumed_bits += d & 0x3;
812 
813  ui32 suffix_len = ((d >> 2) & 0x7);
814  consumed_bits += suffix_len;
815 
816  d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
817  u[0] = (mode == 1) ? d + 1 : 1; //for kappa
818  u[1] = (mode == 1) ? 1 : d + 1; //for kappa
819  }
820  else if (mode == 3) // both u_off are 1
821  {
822  ui32 d1 = dec[vlc & 0x7]; // LSBs of VLC are prefix codeword
823  vlc >>= d1 & 0x3; // Consume bits
824  consumed_bits += d1 & 0x3;
825 
826  ui32 d2 = dec[vlc & 0x7]; // LSBs of VLC are prefix codeword
827  vlc >>= d2 & 0x3; // Consume bits
828  consumed_bits += d2 & 0x3;
829 
830  ui32 suffix_len = ((d1 >> 2) & 0x7);
831  consumed_bits += suffix_len;
832 
833  d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
834  u[0] = d1 + 1; //1 for kappa
835  vlc >>= suffix_len;
836 
837  suffix_len = ((d2 >> 2) & 0x7);
838  consumed_bits += suffix_len;
839 
840  d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
841  u[1] = d2 + 1; //1 for kappa
842  }
843  return consumed_bits;
844  }
845 
846 
847  //************************************************************************/
852 
853  //************************************************************************/
857  struct frwd_struct {
858  const ui8* data;
861  bool unstuff;
862  int size;
863  };
864 
865  //************************************************************************/
883  template<int X>
885  {
886  assert(msp->bits <= 32); // assert that there is a space for 32 bits
887 
888  ui32 val = 0;
889  if (msp->size > 3) {
890  val = *(ui32*)msp->data; // read 32 bits
891  msp->data += 4; // increment pointer
892  msp->size -= 4; // reduce size
893  }
894  else if (msp->size > 0)
895  {
896  int i = 0;
897  val = X != 0 ? 0xFFFFFFFFu : 0;
898  while (msp->size > 0) {
899  ui32 v = *msp->data++; // read one byte at a time
900  ui32 m = ~(0xFFu << i); // mask of location
901  val = (val & m) | (v << i);// put one byte in its correct location
902  --msp->size;
903  i += 8;
904  }
905  }
906  else
907  val = X != 0 ? 0xFFFFFFFFu : 0;
908 
909  // we accumulate in t and keep a count of the number of bits in bits
910  ui32 bits = 8 - msp->unstuff;
911  ui32 t = val & 0xFF;
912  bool unstuff = ((val & 0xFF) == 0xFF); // Do we need unstuffing next?
913 
914  t |= ((val >> 8) & 0xFF) << bits;
915  bits += 8 - unstuff;
916  unstuff = (((val >> 8) & 0xFF) == 0xFF);
917 
918  t |= ((val >> 16) & 0xFF) << bits;
919  bits += 8 - unstuff;
920  unstuff = (((val >> 16) & 0xFF) == 0xFF);
921 
922  t |= ((val >> 24) & 0xFF) << bits;
923  bits += 8 - unstuff;
924  msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
925 
926  msp->tmp |= ((ui64)t) << msp->bits; // move data to msp->tmp
927  msp->bits += bits;
928  }
929 
930  //************************************************************************/
939  template<int X>
940  void frwd_init(frwd_struct *msp, const ui8* data, int size)
941  {
942  msp->data = data;
943  msp->tmp = 0;
944  msp->bits = 0;
945  msp->unstuff = false;
946  msp->size = size;
947 
948  //This code is designed for an architecture that read address should
949  // align to the read size (address multiple of 4 if read size is 4)
950  //These few lines take care of the case where data is not at a multiple
951  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the bitstream
952  int num = 4 - (int)(intptr_t(msp->data) & 0x3);
953  for (int i = 0; i < num; ++i)
954  {
955  ui64 d;
956  //read a byte if the buffer is not exhausted, otherwise set it to X
957  d = msp->size-- > 0 ? *msp->data++ : X;
958  msp->tmp |= (d << msp->bits); // store data in msp->tmp
959  msp->bits += 8 - msp->unstuff; // number of bits added to msp->tmp
960  msp->unstuff = ((d & 0xFF) == 0xFF); // unstuffing for next byte
961  }
962  frwd_read<X>(msp); // read 32 bits more
963  }
964 
965  //************************************************************************/
971  inline void frwd_advance(frwd_struct *msp, ui32 num_bits)
972  {
973  assert(num_bits <= msp->bits);
974  msp->tmp >>= num_bits; // consume num_bits
975  msp->bits -= num_bits;
976  }
977 
978  //************************************************************************/
985  template<int X>
987  {
988  if (msp->bits < 32)
989  {
990  frwd_read<X>(msp);
991  if (msp->bits < 32) //need to test
992  frwd_read<X>(msp);
993  }
994  return (ui32)msp->tmp;
995  }
996 
997  //************************************************************************/
1014  bool ojph_decode_codeblock(ui8* coded_data, ui32* decoded_data,
1015  ui32 missing_msbs, ui32 num_passes,
1016  ui32 lengths1, ui32 lengths2,
1017  ui32 width, ui32 height, ui32 stride,
1018  bool stripe_causal)
1019  {
1020  static bool insufficient_precision = false;
1021  static bool modify_code = false;
1022  static bool truncate_spp_mrp = false;
1023 
1024  /* sigma1 and sigma2 contains significant (i.e., non-zero) pixel
1025  * locations. The buffers are used interchangeably, because we need
1026  * more than 4 rows of significance information at a given time.
1027  * Each 32 bits contain significance information for 4 rows of 8
1028  * columns each. If we denote 32 bits by 0xaaaaaaaa, the each "a" is
1029  * called a nibble and has significance information for 4 rows.
1030  * The least significant nibble has information for the first column,
1031  * and so on. The nibble's LSB is for the first row, and so on.
1032  * Since, at most, we can have 1024 columns in a quad, we need 128
1033  * entries; we added 1 for convenience when propagation of signifcance
1034  * goes outside the structure
1035  */
1036  ui32 sigma1[129] = { 0 }, sigma2[129] = { 0 };
1037  // mbr arrangement is similar to sigma; mbr contains locations
1038  // that become significant during significance propagation pass
1039  ui32 mbr1[129] = { 0 }, mbr2[129] = { 0 };
1040  //a pointer to sigma
1041  ui32 *sip = sigma1; //pointers to arrays to be used interchangeably
1042  ui32 sip_shift = 0; //the amount of shift needed for sigma
1043 
1044  if (num_passes > 1 && lengths2 == 0)
1045  {
1046  OJPH_WARN(0x00010001, "A malformed codeblock that has more than "
1047  "one coding pass, but zero length for "
1048  "2nd and potential 3rd pass.\n");
1049  num_passes = 1;
1050  }
1051 
1052  if (num_passes > 3)
1053  {
1054  OJPH_WARN(0x00010002, "We do not support more than 3 coding passes; "
1055  "This codeblocks has %d passes.\n",
1056  num_passes);
1057  return false;
1058  }
1059 
1060  if (missing_msbs > 30) // p < 0
1061  {
1062  if (insufficient_precision == false)
1063  {
1064  insufficient_precision = true;
1065  OJPH_WARN(0x00010003, "32 bits are not enough to decode this "
1066  "codeblock. This message will not be "
1067  "displayed again.\n");
1068  }
1069  return false;
1070  }
1071  else if (missing_msbs == 30) // p == 0
1072  { // not enough precision to decode and set the bin center to 1
1073  if (modify_code == false) {
1074  modify_code = true;
1075  OJPH_WARN(0x00010004, "Not enough precision to decode the cleanup "
1076  "pass. The code can be modified to support "
1077  "this case. This message will not be "
1078  "displayed again.\n");
1079  }
1080  return false; // 32 bits are not enough to decode this
1081  }
1082  else if (missing_msbs == 29) // if p is 1, then num_passes must be 1
1083  {
1084  if (num_passes > 1) {
1085  num_passes = 1;
1086  if (truncate_spp_mrp == false) {
1087  truncate_spp_mrp = true;
1088  OJPH_WARN(0x00010005, "Not enough precision to decode the SgnProp "
1089  "nor MagRef passes; both will be skipped. "
1090  "This message will not be displayed "
1091  "again.\n");
1092  }
1093  }
1094  }
1095  ui32 p = 30 - missing_msbs; // The least significant bitplane for CUP
1096  // There is a way to handle the case of p == 0, but a different path
1097  // is required
1098 
1099  if (lengths1 < 2)
1100  {
1101  OJPH_WARN(0x00010006, "Wrong codeblock length.\n");
1102  return false;
1103  }
1104 
1105  // read scup and fix the bytes there
1106  int lcup, scup;
1107  lcup = (int)lengths1; // length of CUP
1108  //scup is the length of MEL + VLC
1109  scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
1110  if (scup < 2 || scup > lcup || scup > 4079) //something is wrong
1111  return false;
1112 
1113  // init structures
1114  dec_mel_st mel;
1115  mel_init(&mel, coded_data, lcup, scup);
1116  rev_struct vlc;
1117  rev_init(&vlc, coded_data, lcup, scup);
1118  frwd_struct magsgn;
1119  frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);
1120  frwd_struct sigprop;
1121  if (num_passes > 1) // needs to be tested
1122  frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2);
1123  rev_struct magref;
1124  if (num_passes > 2)
1125  rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
1126 
1135  ui8 *lsp, line_state[514]; //enough for 1024, max block width, + 2 extra
1136 
1137  //initial 2 lines
1139  lsp = line_state; // point to line state
1140  lsp[0] = 0; // for initial row of quad, we set to 0
1141  int run = mel_get_run(&mel); // decode runs of events from MEL bitstrm
1142  // data represented as runs of 0 events
1143  // See mel_decode description
1144  ui32 vlc_val; // fetched data from VLC bitstream
1145  ui32 qinf[2] = { 0 }; // quad info decoded from VLC bitstream
1146  ui32 c_q = 0; // context for quad q
1147  ui32* sp = decoded_data; // decoded codeblock samples
1148 
1149  for (ui32 x = 0; x < width; x += 4) // one iteration per quad pair
1150  {
1151  // decode VLC
1153 
1154  //first quad
1155  // Get the head of the VLC bitstream. One fetch is enough for two
1156  // quads, since the largest VLC code is 7 bits, and maximum number of
1157  // bits used for u is 8. Therefore for two quads we need 30 bits
1158  // (if we include unstuffing, then 32 bits are enough, since we have
1159  // a maximum of one stuffing per two bytes)
1160  vlc_val = rev_fetch(&vlc);
1161 
1162  //decode VLC using the context c_q and the head of the VLC bitstream
1163  qinf[0] = vlc_tbl0[ (c_q << 7) | (vlc_val & 0x7F) ];
1164 
1165  if (c_q == 0) // if zero context, we need to use one MEL event
1166  {
1167  run -= 2; //the number of 0 events is multiplied by 2, so subtract 2
1168 
1169  // Is the run terminated in 1? if so, use decoded VLC code,
1170  // otherwise, discard decoded data, since we will decoded again
1171  // using a different context
1172  qinf[0] = (run == -1) ? qinf[0] : 0;
1173 
1174  // is run -1 or -2? this means a run has been consumed
1175  if (run < 0)
1176  run = mel_get_run(&mel); // get another run
1177  }
1178 
1179  // prepare context for the next quad; eqn. 1 in ITU T.814
1180  c_q = ((qinf[0] & 0x10) >> 4) | ((qinf[0] & 0xE0) >> 5);
1181 
1182  //remove data from vlc stream (0 bits are removed if qinf is not used)
1183  vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
1184 
1185  //update sigma
1186  // The update depends on the value of x; consider one ui32
1187  // if x is 0, 8, 16 and so on, then this line update c locations
1188  // nibble (4 bits) number 0 1 2 3 4 5 6 7
1189  // LSB c c 0 0 0 0 0 0
1190  // c c 0 0 0 0 0 0
1191  // 0 0 0 0 0 0 0 0
1192  // 0 0 0 0 0 0 0 0
1193  // if x is 4, 12, 20, then this line update locations c
1194  // nibble (4 bits) number 0 1 2 3 4 5 6 7
1195  // LSB 0 0 0 0 c c 0 0
1196  // 0 0 0 0 c c 0 0
1197  // 0 0 0 0 0 0 0 0
1198  // 0 0 0 0 0 0 0 0
1199  *sip |= (((qinf[0] & 0x30)>>4) | ((qinf[0] & 0xC0)>>2)) << sip_shift;
1200 
1201  //second quad
1202  qinf[1] = 0;
1203  if (x + 2 < width) // do not run if codeblock is narrower
1204  {
1205  //decode VLC using the context c_q and the head of the VLC bitstream
1206  qinf[1] = vlc_tbl0[(c_q << 7) | (vlc_val & 0x7F)];
1207 
1208  // if context is zero, use one MEL event
1209  if (c_q == 0) //zero context
1210  {
1211  run -= 2; //subtract 2, since events number if multiplied by 2
1212 
1213  // if event is 0, discard decoded qinf
1214  qinf[1] = (run == -1) ? qinf[1] : 0;
1215 
1216  if (run < 0) // have we consumed all events in a run
1217  run = mel_get_run(&mel); // if yes, then get another run
1218  }
1219 
1220  //prepare context for the next quad, eqn. 1 in ITU T.814
1221  c_q = ((qinf[1] & 0x10) >> 4) | ((qinf[1] & 0xE0) >> 5);
1222 
1223  //remove data from vlc stream, if qinf is not used, cwdlen is 0
1224  vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
1225  }
1226 
1227  //update sigma
1228  // The update depends on the value of x; consider one ui32
1229  // if x is 0, 8, 16 and so on, then this line update c locations
1230  // nibble (4 bits) number 0 1 2 3 4 5 6 7
1231  // LSB 0 0 c c 0 0 0 0
1232  // 0 0 c c 0 0 0 0
1233  // 0 0 0 0 0 0 0 0
1234  // 0 0 0 0 0 0 0 0
1235  // if x is 4, 12, 20, then this line update locations c
1236  // nibble (4 bits) number 0 1 2 3 4 5 6 7
1237  // LSB 0 0 0 0 0 0 c c
1238  // 0 0 0 0 0 0 c c
1239  // 0 0 0 0 0 0 0 0
1240  // 0 0 0 0 0 0 0 0
1241  *sip |= (((qinf[1] & 0x30) | ((qinf[1] & 0xC0)<<2))) << (4+sip_shift);
1242 
1243  sip += x & 0x7 ? 1 : 0; // move sigma pointer to next entry
1244  sip_shift ^= 0x10; // increment/decrement sip_shift by 16
1245 
1246  // retrieve u
1248  ui32 U_q[2]; // u values for the quad pair
1249 
1250  // uvlc_mode is made up of u_offset bits from the quad pair
1251  ui32 uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
1252  if (uvlc_mode == 3) // if both u_offset are set, get an event from
1253  { // the MEL run of events
1254  run -= 2; //subtract 2, since events number if multiplied by 2
1255  uvlc_mode += (run == -1) ? 1 : 0; //increment uvlc_mode if event is 1
1256  if (run < 0) // if run is consumed (run is -1 or -2), get another run
1257  run = mel_get_run(&mel);
1258  }
1259  //decode uvlc_mode to get u for both quads
1260  ui32 consumed_bits = decode_init_uvlc(vlc_val, uvlc_mode, U_q);
1261  if (U_q[0] > missing_msbs + 2 || U_q[1] > missing_msbs + 2)
1262  {
1263  OJPH_WARN(0x00010007, "Malformed codeblock bitstream. Uq is larger "
1264  "than missing_msbs + 2.\n");
1265  return false;
1266  }
1267 
1268  //consume u bits in the VLC code
1269  vlc_val = rev_advance(&vlc, consumed_bits);
1270 
1271  //decode magsgn and update line_state
1273  ui32 m_n, v_n;
1274  ui32 ms_val;
1275 
1276  //We obtain a mask for the samples locations that needs evaluation
1277  ui32 locs = 0xFF;
1278  if (x + 4 > width) locs >>= (x + 4 - width) << 1; // limits width
1279  locs = height > 1 ? locs : (locs & 0x55); // limits height
1280 
1281  if ((((qinf[0] & 0xF0) >> 4) | (qinf[1] & 0xF0)) & ~locs)
1282  {
1283  OJPH_WARN(0x00010008, "Malformed codeblock bitstream. Decoded VLC "
1284  "code indicates that there are significant samples "
1285  "outside the codeblock area.\n");
1286  return false;
1287  }
1288 
1289  //first quad, starting at first sample in quad and moving on
1290  if (qinf[0] & 0x10) //is it signifcant? (sigma_n)
1291  {
1292  ms_val = frwd_fetch<0xFF>(&magsgn); //get 32 bits of magsgn data
1293  m_n = U_q[0] - ((qinf[0] >> 12) & 1); //evaluate m_n (number of bits
1294  // to read from bitstream), using EMB e_k
1295  frwd_advance(&magsgn, m_n); //consume m_n
1296  ui32 val = ms_val << 31; //get sign bit
1297  v_n = ms_val & ((1U << m_n) - 1); //keep only m_n bits
1298  v_n |= ((qinf[0] & 0x100) >> 8) << m_n; //add EMB e_1 as MSB
1299  v_n |= 1; //add center of bin
1300  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1301  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1302  sp[0] = val | ((v_n + 2) << (p - 1));
1303  }
1304  else if (locs & 0x1) // if this is inside the codeblock, set the
1305  sp[0] = 0; // sample to zero
1306 
1307  if (qinf[0] & 0x20) //sigma_n
1308  {
1309  ms_val = frwd_fetch<0xFF>(&magsgn); //get 32 bits
1310  m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n, uses EMB e_k
1311  frwd_advance(&magsgn, m_n); //consume m_n
1312  ui32 val = ms_val << 31; //get sign bit
1313  v_n = ms_val & ((1U << m_n) - 1); //keep only m_n bits
1314  v_n |= ((qinf[0] & 0x200) >> 9) << m_n; //add EMB e_1
1315  v_n |= 1; //bin center
1316  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1317  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1318  sp[stride] = val | ((v_n + 2) << (p - 1));
1319 
1320  //update line_state: bit 7 (\sigma^N), and E^N
1321  ui32 t = lsp[0] & 0x7F; //keep E^NW
1322  v_n = 32 - count_leading_zeros(v_n);
1323  lsp[0] = (ui8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N)|\sigma^N
1324  }
1325  else if (locs & 0x2) // if this is outside the codeblock, set the
1326  sp[stride] = 0; // sample to zero
1327 
1328  ++lsp; // move to next quad information
1329  ++sp; // move to next column of samples
1330 
1331  //this is similar to the above two samples
1332  if (qinf[0] & 0x40)
1333  {
1334  ms_val = frwd_fetch<0xFF>(&magsgn);
1335  m_n = U_q[0] - ((qinf[0] >> 14) & 1);
1336  frwd_advance(&magsgn, m_n);
1337  ui32 val = ms_val << 31;
1338  v_n = ms_val & ((1U << m_n) - 1);
1339  v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
1340  v_n |= 1;
1341  sp[0] = val | ((v_n + 2) << (p - 1));
1342  }
1343  else if (locs & 0x4)
1344  sp[0] = 0;
1345 
1346  lsp[0] = 0;
1347  if (qinf[0] & 0x80)
1348  {
1349  ms_val = frwd_fetch<0xFF>(&magsgn);
1350  m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
1351  frwd_advance(&magsgn, m_n);
1352  ui32 val = ms_val << 31;
1353  v_n = ms_val & ((1U << m_n) - 1);
1354  v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
1355  v_n |= 1; //center of bin
1356  sp[stride] = val | ((v_n + 2) << (p - 1));
1357 
1358  //line_state: bit 7 (\sigma^NW), and E^NW for next quad
1359  lsp[0] = (ui8)(0x80 | (32 - count_leading_zeros(v_n)));
1360  }
1361  else if (locs & 0x8) // if inside set to 0
1362  sp[stride] = 0;
1363 
1364  ++sp; //move to next column
1365 
1366  //second quad
1367  if (qinf[1] & 0x10)
1368  {
1369  ms_val = frwd_fetch<0xFF>(&magsgn);
1370  m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
1371  frwd_advance(&magsgn, m_n);
1372  ui32 val = ms_val << 31;
1373  v_n = ms_val & ((1U << m_n) - 1);
1374  v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
1375  v_n |= 1;
1376  sp[0] = val | ((v_n + 2) << (p - 1));
1377  }
1378  else if (locs & 0x10)
1379  sp[0] = 0;
1380 
1381  if (qinf[1] & 0x20)
1382  {
1383  ms_val = frwd_fetch<0xFF>(&magsgn);
1384  m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
1385  frwd_advance(&magsgn, m_n);
1386  ui32 val = ms_val << 31;
1387  v_n = ms_val & ((1U << m_n) - 1);
1388  v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
1389  v_n |= 1;
1390  sp[stride] = val | ((v_n + 2) << (p - 1));
1391 
1392  //update line_state: bit 7 (\sigma^N), and E^N
1393  ui32 t = lsp[0] & 0x7F; //E^NW
1394  v_n = 32 - count_leading_zeros(v_n); //E^N
1395  lsp[0] = (ui8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N)|\sigma^N
1396  }
1397  else if (locs & 0x20)
1398  sp[stride] = 0; //no need to update line_state
1399 
1400  ++lsp; //move line state to next quad
1401  ++sp; //move to next sample
1402 
1403  if (qinf[1] & 0x40)
1404  {
1405  ms_val = frwd_fetch<0xFF>(&magsgn);
1406  m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
1407  frwd_advance(&magsgn, m_n);
1408  ui32 val = ms_val << 31;
1409  v_n = ms_val & ((1U << m_n) - 1);
1410  v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
1411  v_n |= 1;
1412  sp[0] = val | ((v_n + 2) << (p - 1));
1413  }
1414  else if (locs & 0x40)
1415  sp[0] = 0;
1416 
1417  lsp[0] = 0;
1418  if (qinf[1] & 0x80)
1419  {
1420  ms_val = frwd_fetch<0xFF>(&magsgn);
1421  m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
1422  frwd_advance(&magsgn, m_n);
1423  ui32 val = ms_val << 31;
1424  v_n = ms_val & ((1U << m_n) - 1);
1425  v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
1426  v_n |= 1; //center of bin
1427  sp[stride] = val | ((v_n + 2) << (p - 1));
1428 
1429  //line_state: bit 7 (\sigma^NW), and E^NW for next quad
1430  lsp[0] = (ui8)(0x80 | (32 - count_leading_zeros(v_n)));
1431  }
1432  else if (locs & 0x80)
1433  sp[stride] = 0;
1434 
1435  ++sp;
1436  }
1437 
1438  //non-initial lines
1440  for (ui32 y = 2; y < height; /*done at the end of loop*/)
1441  {
1442  sip_shift ^= 0x2; // shift sigma to the upper half od the nibble
1443  sip_shift &= 0xFFFFFFEFU; //move back to 0 (it might have been at 0x10)
1444  ui32 *sip = y & 0x4 ? sigma2 : sigma1; //choose sigma array
1445 
1446  lsp = line_state;
1447  ui8 ls0 = lsp[0]; // read the line state value
1448  lsp[0] = 0; // and set it to zero
1449  sp = decoded_data + y * stride; // generated samples
1450  c_q = 0; // context
1451  for (ui32 x = 0; x < width; x += 4)
1452  {
1453  // decode vlc
1455 
1456  //first quad
1457  // get context, eqn. 2 ITU T.814
1458  // c_q has \sigma^W | \sigma^SW
1459  c_q |= (ls0 >> 7); //\sigma^NW | \sigma^N
1460  c_q |= (lsp[1] >> 5) & 0x4; //\sigma^NE | \sigma^NF
1461 
1462  //the following is very similar to previous code, so please refer to
1463  // that
1464  vlc_val = rev_fetch(&vlc);
1465  qinf[0] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
1466  if (c_q == 0) //zero context
1467  {
1468  run -= 2;
1469  qinf[0] = (run == -1) ? qinf[0] : 0;
1470  if (run < 0)
1471  run = mel_get_run(&mel);
1472  }
1473  //prepare context for the next quad, \sigma^W | \sigma^SW
1474  c_q = ((qinf[0] & 0x40) >> 5) | ((qinf[0] & 0x80) >> 6);
1475 
1476  //remove data from vlc stream
1477  vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
1478 
1479  //update sigma
1480  // The update depends on the value of x and y; consider one ui32
1481  // if x is 0, 8, 16 and so on, and y is 2, 6, etc., then this
1482  // line update c locations
1483  // nibble (4 bits) number 0 1 2 3 4 5 6 7
1484  // LSB 0 0 0 0 0 0 0 0
1485  // 0 0 0 0 0 0 0 0
1486  // c c 0 0 0 0 0 0
1487  // c c 0 0 0 0 0 0
1488  *sip |= (((qinf[0]&0x30) >> 4) | ((qinf[0]&0xC0) >> 2)) << sip_shift;
1489 
1490  //second quad
1491  qinf[1] = 0;
1492  if (x + 2 < width)
1493  {
1494  c_q |= (lsp[1] >> 7);
1495  c_q |= (lsp[2] >> 5) & 0x4;
1496  qinf[1] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
1497  if (c_q == 0) //zero context
1498  {
1499  run -= 2;
1500  qinf[1] = (run == -1) ? qinf[1] : 0;
1501  if (run < 0)
1502  run = mel_get_run(&mel);
1503  }
1504  //prepare context for the next quad
1505  c_q = ((qinf[1] & 0x40) >> 5) | ((qinf[1] & 0x80) >> 6);
1506  //remove data from vlc stream
1507  vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
1508  }
1509 
1510  //update sigma
1511  *sip |= (((qinf[1]&0x30) | ((qinf[1]&0xC0) << 2))) << (4+sip_shift);
1512 
1513  sip += x & 0x7 ? 1 : 0;
1514  sip_shift ^= 0x10;
1515 
1516  //retrieve u
1518  ui32 U_q[2];
1519  ui32 uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
1520  ui32 consumed_bits = decode_noninit_uvlc(vlc_val, uvlc_mode, U_q);
1521  vlc_val = rev_advance(&vlc, consumed_bits);
1522 
1523  //calculate E^max and add it to U_q, eqns 5 and 6 in ITU T.814
1524  if ((qinf[0] & 0xF0) & ((qinf[0] & 0xF0) - 1)) // is \gamma_q 1?
1525  {
1526  ui32 E = (ls0 & 0x7Fu);
1527  E = E > (lsp[1] & 0x7Fu) ? E : (lsp[1]&0x7Fu); //max(E, E^NE, E^NF)
1528  //since U_q alread has u_q + 1, we subtract 2 instead of 1
1529  U_q[0] += E > 2 ? E - 2 : 0;
1530  }
1531 
1532  if ((qinf[1] & 0xF0) & ((qinf[1] & 0xF0) - 1)) //is \gamma_q 1?
1533  {
1534  ui32 E = (lsp[1] & 0x7Fu);
1535  E = E > (lsp[2] & 0x7Fu) ? E : (lsp[2]&0x7Fu); //max(E, E^NE, E^NF)
1536  //since U_q alread has u_q + 1, we subtract 2 instead of 1
1537  U_q[1] += E > 2 ? E - 2 : 0;
1538  }
1539 
1540  if (U_q[0] > missing_msbs + 2 || U_q[1] > missing_msbs + 2)
1541  {
1542  OJPH_WARN(0x00010007, "Malformed codeblock bitstream. Uq is "
1543  "larger than missing_msbs + 2\n");
1544  return false;
1545  }
1546 
1547  ls0 = lsp[2]; //for next double quad
1548  lsp[1] = lsp[2] = 0;
1549 
1550  //decode magsgn and update line_state
1552  ui32 m_n, v_n;
1553  ui32 ms_val;
1554 
1555  //locations where samples need update
1556  ui32 locs = 0xFF;
1557  if (x + 4 > width) locs >>= (x + 4 - width) << 1;
1558  locs = y + 2 <= height ? locs : (locs & 0x55);
1559 
1560  if ((((qinf[0] & 0xF0) >> 4) | (qinf[1] & 0xF0)) & ~locs)
1561  {
1562  OJPH_WARN(0x00010008, "Malformed codeblock bitstream. Decoded VLC "
1563  "code indicates that there are significant samples "
1564  "outside the codeblock area.\n");
1565  return false;
1566  }
1567 
1568  if (qinf[0] & 0x10) //sigma_n
1569  {
1570  ms_val = frwd_fetch<0xFF>(&magsgn);
1571  m_n = U_q[0] - ((qinf[0] >> 12) & 1); //m_n
1572  frwd_advance(&magsgn, m_n);
1573  ui32 val = ms_val << 31;
1574  v_n = ms_val & ((1U << m_n) - 1);
1575  v_n |= ((qinf[0] & 0x100) >> 8) << m_n;
1576  v_n |= 1; //center of bin
1577  sp[0] = val | ((v_n + 2) << (p - 1));
1578  }
1579  else if (locs & 0x1)
1580  sp[0] = 0;
1581 
1582  if (qinf[0] & 0x20) //sigma_n
1583  {
1584  ms_val = frwd_fetch<0xFF>(&magsgn);
1585  m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n
1586  frwd_advance(&magsgn, m_n);
1587  ui32 val = ms_val << 31;
1588  v_n = ms_val & ((1U << m_n) - 1);
1589  v_n |= ((qinf[0] & 0x200) >> 9) << m_n;
1590  v_n |= 1; //center of bin
1591  sp[stride] = val | ((v_n + 2) << (p - 1));
1592 
1593  //update line_state: bit 7 (\sigma^N), and E^N
1594  ui32 t = lsp[0] & 0x7F; //E^NW
1595  v_n = 32 - count_leading_zeros(v_n);
1596  lsp[0] = (ui8)(0x80 | (t > v_n ? t : v_n));
1597  }
1598  else if (locs & 0x2)
1599  sp[stride] = 0; // no need to update line_state
1600 
1601  ++lsp;
1602  ++sp;
1603 
1604  if (qinf[0] & 0x40) //sigma_n
1605  {
1606  ms_val = frwd_fetch<0xFF>(&magsgn);
1607  m_n = U_q[0] - ((qinf[0] >> 14) & 1); //m_n
1608  frwd_advance(&magsgn, m_n);
1609  ui32 val = ms_val << 31;
1610  v_n = ms_val & ((1U << m_n) - 1);
1611  v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
1612  v_n |= 1; //center of bin
1613  sp[0] = val | ((v_n + 2) << (p - 1));
1614  }
1615  else if (locs & 0x4)
1616  sp[0] = 0;
1617 
1618  if (qinf[0] & 0x80) //sigma_n
1619  {
1620  ms_val = frwd_fetch<0xFF>(&magsgn);
1621  m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
1622  frwd_advance(&magsgn, m_n);
1623  ui32 val = ms_val << 31;
1624  v_n = ms_val & ((1U << m_n) - 1);
1625  v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
1626  v_n |= 1; //center of bin
1627  sp[stride] = val | ((v_n + 2) << (p - 1));
1628 
1629  //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
1630  lsp[0] = (ui8)(0x80 | (32 - count_leading_zeros(v_n)));
1631  }
1632  else if (locs & 0x8)
1633  {
1634  sp[stride] = 0;
1635  // lsp[0] = 0; needs testing
1636  }
1637 
1638  ++sp;
1639 
1640  if (qinf[1] & 0x10) //sigma_n
1641  {
1642  ms_val = frwd_fetch<0xFF>(&magsgn);
1643  m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
1644  frwd_advance(&magsgn, m_n);
1645  ui32 val = ms_val << 31;
1646  v_n = ms_val & ((1U << m_n) - 1);
1647  v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
1648  v_n |= 1; //center of bin
1649  sp[0] = val | ((v_n + 2) << (p - 1));
1650  }
1651  else if (locs & 0x10)
1652  sp[0] = 0;
1653 
1654  if (qinf[1] & 0x20) //sigma_n
1655  {
1656  ms_val = frwd_fetch<0xFF>(&magsgn);
1657  m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
1658  frwd_advance(&magsgn, m_n);
1659  ui32 val = ms_val << 31;
1660  v_n = ms_val & ((1U << m_n) - 1);
1661  v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
1662  v_n |= 1; //center of bin
1663  sp[stride] = val | ((v_n + 2) << (p - 1));
1664 
1665  //update line_state: bit 7 (\sigma^N), and E^N
1666  ui32 t = lsp[0] & 0x7F; //E^NW
1667  v_n = 32 - count_leading_zeros(v_n);
1668  lsp[0] = (ui8)(0x80 | (t > v_n ? t : v_n));
1669  }
1670  else if (locs & 0x20)
1671  sp[stride] = 0; //no need to update line_state
1672 
1673  ++lsp;
1674  ++sp;
1675 
1676  if (qinf[1] & 0x40) //sigma_n
1677  {
1678  ms_val = frwd_fetch<0xFF>(&magsgn);
1679  m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
1680  frwd_advance(&magsgn, m_n);
1681  ui32 val = ms_val << 31;
1682  v_n = ms_val & ((1U << m_n) - 1);
1683  v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
1684  v_n |= 1; //center of bin
1685  sp[0] = val | ((v_n + 2) << (p - 1));
1686  }
1687  else if (locs & 0x40)
1688  sp[0] = 0;
1689 
1690  if (qinf[1] & 0x80) //sigma_n
1691  {
1692  ms_val = frwd_fetch<0xFF>(&magsgn);
1693  m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
1694  frwd_advance(&magsgn, m_n);
1695  ui32 val = ms_val << 31;
1696  v_n = ms_val & ((1U << m_n) - 1);
1697  v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
1698  v_n |= 1; //center of bin
1699  sp[stride] = val | ((v_n + 2) << (p - 1));
1700 
1701  //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
1702  lsp[0] = (ui8)(0x80 | (32 - count_leading_zeros(v_n)));
1703  }
1704  else if (locs & 0x80)
1705  {
1706  sp[stride] = 0;
1707  // lsp[0] = 0; needs testing
1708  }
1709 
1710  ++sp;
1711  }
1712 
1713  y += 2;
1714  if (num_passes > 1 && (y & 3) == 0) //executed at multiples of 4
1715  { // This is for SPP and potentially MRP
1716 
1717  if (num_passes > 2) //do MRP
1718  {
1719  // select the current stripe
1720  ui32 *cur_sig = y & 0x4 ? sigma1 : sigma2;
1721  // the address of the data that needs updating
1722  ui32 *dpp = decoded_data + (y - 4) * stride;
1723  ui32 half = 1 << (p - 2); // half the center of the bin
1724  for (ui32 i = 0; i < width; i += 8)
1725  {
1726  //Process one entry from sigma array at a time
1727  // Each nibble (4 bits) in the sigma array represents 4 rows,
1728  // and the 32 bits contain 8 columns
1729  ui32 cwd = rev_fetch_mrp(&magref); // get 32 bit data
1730  ui32 sig = *cur_sig++; // 32 bit that will be processed now
1731  ui32 col_mask = 0xFu; // a mask for a column in sig
1732  ui32 *dp = dpp + i; // next column in decode samples
1733  if (sig) // if any of the 32 bits are set
1734  {
1735  for (int j = 0; j < 8; ++j, dp++) //one column at a time
1736  {
1737  if (sig & col_mask) // lowest nibble
1738  {
1739  ui32 sample_mask = 0x11111111u & col_mask; //LSB
1740 
1741  if (sig & sample_mask) //if LSB is set
1742  {
1743  assert(dp[0] != 0); // decoded value cannot be zero
1744  ui32 sym = cwd & 1; // get it value
1745  // remove center of bin if sym is 0
1746  dp[0] ^= (1 - sym) << (p - 1);
1747  dp[0] |= half; // put half the center of bin
1748  cwd >>= 1; //consume word
1749  }
1750  sample_mask += sample_mask; //next row
1751 
1752  if (sig & sample_mask)
1753  {
1754  assert(dp[stride] != 0);
1755  ui32 sym = cwd & 1;
1756  dp[stride] ^= (1 - sym) << (p - 1);
1757  dp[stride] |= half;
1758  cwd >>= 1;
1759  }
1760  sample_mask += sample_mask;
1761 
1762  if (sig & sample_mask)
1763  {
1764  assert(dp[2 * stride] != 0);
1765  ui32 sym = cwd & 1;
1766  dp[2 * stride] ^= (1 - sym) << (p - 1);
1767  dp[2 * stride] |= half;
1768  cwd >>= 1;
1769  }
1770  sample_mask += sample_mask;
1771 
1772  if (sig & sample_mask)
1773  {
1774  assert(dp[3 * stride] != 0);
1775  ui32 sym = cwd & 1;
1776  dp[3 * stride] ^= (1 - sym) << (p - 1);
1777  dp[3 * stride] |= half;
1778  cwd >>= 1;
1779  }
1780  sample_mask += sample_mask;
1781  }
1782  col_mask <<= 4; //next column
1783  }
1784  }
1785  // consume data according to the number of bits set
1786  rev_advance_mrp(&magref, population_count(sig));
1787  }
1788  }
1789 
1790  if (y >= 4) // update mbr array at the end of each stripe
1791  {
1792  //generate mbr corresponding to a stripe
1793  ui32 *sig = y & 0x4 ? sigma1 : sigma2;
1794  ui32 *mbr = y & 0x4 ? mbr1 : mbr2;
1795 
1796  //data is processed in patches of 8 columns, each
1797  // each 32 bits in sigma1 or mbr1 represent 4 rows
1798 
1799  ui32 prev = 0; // previous columns
1800  for (ui32 i = 0; i < width; i += 8, mbr++, sig++)
1801  {
1802  //integrate horizontally
1803  mbr[0] = sig[0]; //start with significant samples
1804  mbr[0] |= prev >> 28; //for first column, left neighbors
1805  mbr[0] |= sig[0] << 4; //left neighbors
1806  mbr[0] |= sig[0] >> 4; //right neighbors
1807  mbr[0] |= sig[1] << 28; //for last column, right neighbors
1808  prev = sig[0]; // for next group of columns
1809 
1810  //integrate vertically
1811  ui32 t = mbr[0], z = mbr[0];
1812  z |= (t & 0x77777777) << 1; //above neighbors
1813  z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
1814  mbr[0] = z & ~sig[0]; //remove already significance samples
1815  }
1816  }
1817 
1818  if (y >= 8) //wait until 8 rows has been processed
1819  {
1820  ui32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
1821 
1822  // add membership from the next stripe, obtained above
1823  cur_sig = y & 0x4 ? sigma2 : sigma1;
1824  cur_mbr = y & 0x4 ? mbr2 : mbr1;
1825  nxt_sig = y & 0x4 ? sigma1 : sigma2; //future samples
1826  ui32 prev = 0; // the columns before these group of 8 columns
1827  for (ui32 i = 0; i < width; i+=8, cur_mbr++, cur_sig++, nxt_sig++)
1828  {
1829  ui32 t = nxt_sig[0];
1830  t |= prev >> 28; //for first column, left neighbors
1831  t |= nxt_sig[0] << 4; //left neighbors
1832  t |= nxt_sig[0] >> 4; //right neighbors
1833  t |= nxt_sig[1] << 28; //for last column, right neighbors
1834  prev = nxt_sig[0]; // for next group of columns
1835 
1836  if (stripe_causal == false)
1837  cur_mbr[0] |= (t & 0x11111111u) << 3; //propagate up to cur_mbr
1838  cur_mbr[0] &= ~cur_sig[0]; //remove already significance samples
1839  }
1840 
1841  //find new locations and get signs
1842  cur_sig = y & 0x4 ? sigma2 : sigma1;
1843  cur_mbr = y & 0x4 ? mbr2 : mbr1;
1844  nxt_sig = y & 0x4 ? sigma1 : sigma2; //future samples
1845  nxt_mbr = y & 0x4 ? mbr1 : mbr2; //future samples
1846  ui32 val = 3u << (p - 2); // sample values for newly discovered
1847  // signficant samples including the bin center
1848  for (ui32 i = 0; i < width;
1849  i += 8, cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++)
1850  {
1851  ui32 mbr = *cur_mbr;
1852  ui32 new_sig = 0;
1853  if (mbr) //are there any samples that migt be signficant
1854  {
1855  for (ui32 n = 0; n < 8; n += 4)
1856  {
1857  ui32 cwd = frwd_fetch<0>(&sigprop); //get 32 bits
1858  ui32 cnt = 0;
1859 
1860  ui32 *dp = decoded_data + (y - 8) * stride;
1861  dp += i + n; //address for decoded samples
1862 
1863  ui32 col_mask = 0xFu << (4 * n); //a mask to select a column
1864 
1865  ui32 inv_sig = ~cur_sig[0]; // insignificant samples
1866 
1867  //find the last sample we operate on
1868  ui32 end = n + 4 + i < width ? n + 4 : width - i;
1869 
1870  for (ui32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
1871  {
1872  if ((col_mask & mbr) == 0) //no samples need checking
1873  continue;
1874 
1875  //scan mbr to find a new signficant sample
1876  ui32 sample_mask = 0x11111111u & col_mask; // LSB
1877  if (mbr & sample_mask)
1878  {
1879  assert(dp[0] == 0); // the sample must have been 0
1880  if (cwd & 1) //if this sample has become significant
1881  { // must propagate it to nearby samples
1882  new_sig |= sample_mask; // new significant samples
1883  ui32 t = 0x32u << (j * 4);// propagation to neighbors
1884  mbr |= t & inv_sig; //remove already signifcant samples
1885  }
1886  cwd >>= 1; ++cnt; //consume bit and increment number of
1887  //consumed bits
1888  }
1889 
1890  sample_mask += sample_mask; // next row
1891  if (mbr & sample_mask)
1892  {
1893  assert(dp[stride] == 0);
1894  if (cwd & 1)
1895  {
1896  new_sig |= sample_mask;
1897  ui32 t = 0x74u << (j * 4);
1898  mbr |= t & inv_sig;
1899  }
1900  cwd >>= 1; ++cnt;
1901  }
1902 
1903  sample_mask += sample_mask;
1904  if (mbr & sample_mask)
1905  {
1906  assert(dp[2 * stride] == 0);
1907  if (cwd & 1)
1908  {
1909  new_sig |= sample_mask;
1910  ui32 t = 0xE8u << (j * 4);
1911  mbr |= t & inv_sig;
1912  }
1913  cwd >>= 1; ++cnt;
1914  }
1915 
1916  sample_mask += sample_mask;
1917  if (mbr & sample_mask)
1918  {
1919  assert(dp[3 * stride] == 0);
1920  if (cwd & 1)
1921  {
1922  new_sig |= sample_mask;
1923  ui32 t = 0xC0u << (j * 4);
1924  mbr |= t & inv_sig;
1925  }
1926  cwd >>= 1; ++cnt;
1927  }
1928  }
1929 
1930  //obtain signs here
1931  if (new_sig & (0xFFFFu << (4 * n))) //if any
1932  {
1933  ui32 *dp = decoded_data + (y - 8) * stride;
1934  dp += i + n; // decoded samples address
1935  ui32 col_mask = 0xFu << (4 * n); //mask to select a column
1936 
1937  for (ui32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
1938  {
1939  if ((col_mask & new_sig) == 0) //if non is signficant
1940  continue;
1941 
1942  //scan 4 signs
1943  ui32 sample_mask = 0x11111111u & col_mask;
1944  if (new_sig & sample_mask)
1945  {
1946  assert(dp[0] == 0);
1947  dp[0] |= ((cwd & 1) << 31) | val; //put value and sign
1948  cwd >>= 1; ++cnt; //consume bit and increment number
1949  //of consumed bits
1950  }
1951 
1952  sample_mask += sample_mask;
1953  if (new_sig & sample_mask)
1954  {
1955  assert(dp[stride] == 0);
1956  dp[stride] |= ((cwd & 1) << 31) | val;
1957  cwd >>= 1; ++cnt;
1958  }
1959 
1960  sample_mask += sample_mask;
1961  if (new_sig & sample_mask)
1962  {
1963  assert(dp[2 * stride] == 0);
1964  dp[2 * stride] |= ((cwd & 1) << 31) | val;
1965  cwd >>= 1; ++cnt;
1966  }
1967 
1968  sample_mask += sample_mask;
1969  if (new_sig & sample_mask)
1970  {
1971  assert(dp[3 * stride] == 0);
1972  dp[3 * stride] |= ((cwd & 1) << 31) | val;
1973  cwd >>= 1; ++cnt;
1974  }
1975  }
1976 
1977  }
1978  frwd_advance(&sigprop, cnt); //consume the bits from bitstrm
1979  cnt = 0;
1980 
1981  //update the next 8 columns
1982  if (n == 4)
1983  {
1984  //horizontally
1985  ui32 t = new_sig >> 28;
1986  t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
1987  cur_mbr[1] |= t & ~cur_sig[1];
1988  }
1989  }
1990  }
1991  //update the next stripe (vertically propagation)
1992  new_sig |= cur_sig[0];
1993  ui32 u = (new_sig & 0x88888888) >> 3;
1994  ui32 t = u | (u << 4) | (u >> 4); //left and right neighbors
1995  if (i > 0)
1996  nxt_mbr[-1] |= (u << 28) & ~nxt_sig[-1];
1997  nxt_mbr[0] |= t & ~nxt_sig[0];
1998  nxt_mbr[1] |= (u >> 28) & ~nxt_sig[1];
1999  }
2000 
2001  //clear current sigma
2002  //mbr need not be cleared because it is overwritten
2003  cur_sig = y & 0x4 ? sigma2 : sigma1;
2004  memset(cur_sig, 0, (((width + 7) >> 3) + 1) << 2);
2005  }
2006  }
2007  }
2008 
2009  //terminating
2010  if (num_passes > 1) {
2011 
2012  if (num_passes > 2 && ((height & 3) == 1 || (height & 3) == 2))
2013  {//do magref
2014  ui32 *cur_sig = height & 0x4 ? sigma2 : sigma1; //reversed
2015  ui32 *dpp = decoded_data + (height & 0xFFFFFFFCu) * stride;
2016  ui32 half = 1 << (p - 2);
2017  for (ui32 i = 0; i < width; i += 8)
2018  {
2019  ui32 cwd = rev_fetch_mrp(&magref);
2020  ui32 sig = *cur_sig++;
2021  ui32 col_mask = 0xF;
2022  ui32 *dp = dpp + i;
2023  if (sig)
2024  {
2025  for (int j = 0; j < 8; ++j, dp++)
2026  {
2027  if (sig & col_mask)
2028  {
2029  ui32 sample_mask = 0x11111111u & col_mask;
2030 
2031  if (sig & sample_mask)
2032  {
2033  assert(dp[0] != 0);
2034  ui32 sym = cwd & 1;
2035  dp[0] ^= (1 - sym) << (p - 1);
2036  dp[0] |= half;
2037  cwd >>= 1;
2038  }
2039  sample_mask += sample_mask;
2040 
2041  if (sig & sample_mask)
2042  {
2043  assert(dp[stride] != 0);
2044  ui32 sym = cwd & 1;
2045  dp[stride] ^= (1 - sym) << (p - 1);
2046  dp[stride] |= half;
2047  cwd >>= 1;
2048  }
2049  sample_mask += sample_mask;
2050 
2051  if (sig & sample_mask)
2052  {
2053  assert(dp[2 * stride] != 0);
2054  ui32 sym = cwd & 1;
2055  dp[2 * stride] ^= (1 - sym) << (p - 1);
2056  dp[2 * stride] |= half;
2057  cwd >>= 1;
2058  }
2059  sample_mask += sample_mask;
2060 
2061  if (sig & sample_mask)
2062  {
2063  assert(dp[3 * stride] != 0);
2064  ui32 sym = cwd & 1;
2065  dp[3 * stride] ^= (1 - sym) << (p - 1);
2066  dp[3 * stride] |= half;
2067  cwd >>= 1;
2068  }
2069  sample_mask += sample_mask;
2070  }
2071  col_mask <<= 4;
2072  }
2073  }
2074  rev_advance_mrp(&magref, population_count(sig));
2075  }
2076  }
2077 
2078  //do the last incomplete stripe
2079  // for cases of (height & 3) == 0 and 3
2080  // the should have been processed previously
2081  if ((height & 3) == 1 || (height & 3) == 2)
2082  {
2083  //generate mbr of first stripe
2084  ui32 *sig = height & 0x4 ? sigma2 : sigma1;
2085  ui32 *mbr = height & 0x4 ? mbr2 : mbr1;
2086  //integrate horizontally
2087  ui32 prev = 0;
2088  for (ui32 i = 0; i < width; i += 8, mbr++, sig++)
2089  {
2090  mbr[0] = sig[0];
2091  mbr[0] |= prev >> 28; //for first column, left neighbors
2092  mbr[0] |= sig[0] << 4; //left neighbors
2093  mbr[0] |= sig[0] >> 4; //left neighbors
2094  mbr[0] |= sig[1] << 28; //for last column, right neighbors
2095  prev = sig[0];
2096 
2097  //integrate vertically
2098  ui32 t = mbr[0], z = mbr[0];
2099  z |= (t & 0x77777777) << 1; //above neighbors
2100  z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
2101  mbr[0] = z & ~sig[0]; //remove already significance samples
2102  }
2103  }
2104 
2105  ui32 st = height;
2106  st -= height > 6 ? (((height + 1) & 3) + 3) : height;
2107  for (ui32 y = st; y < height; y += 4)
2108  {
2109  ui32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
2110 
2111  ui32 pattern = 0xFFFFFFFFu; // a pattern needed samples
2112  if (height - y == 3)
2113  pattern = 0x77777777u;
2114  else if (height - y == 2)
2115  pattern = 0x33333333u;
2116  else if (height - y == 1)
2117  pattern = 0x11111111u;
2118 
2119  //add membership from the next stripe, obtained above
2120  if (height - y > 4)
2121  {
2122  cur_sig = y & 0x4 ? sigma2 : sigma1;
2123  cur_mbr = y & 0x4 ? mbr2 : mbr1;
2124  nxt_sig = y & 0x4 ? sigma1 : sigma2;
2125  ui32 prev = 0;
2126  for (ui32 i = 0; i<width; i += 8, cur_mbr++, cur_sig++, nxt_sig++)
2127  {
2128  ui32 t = nxt_sig[0];
2129  t |= prev >> 28; //for first column, left neighbors
2130  t |= nxt_sig[0] << 4; //left neighbors
2131  t |= nxt_sig[0] >> 4; //left neighbors
2132  t |= nxt_sig[1] << 28; //for last column, right neighbors
2133  prev = nxt_sig[0];
2134 
2135  if (stripe_causal == false)
2136  cur_mbr[0] |= (t & 0x11111111u) << 3;
2137  //remove already significance samples
2138  cur_mbr[0] &= ~cur_sig[0];
2139  }
2140  }
2141 
2142  //find new locations and get signs
2143  cur_sig = y & 0x4 ? sigma2 : sigma1;
2144  cur_mbr = y & 0x4 ? mbr2 : mbr1;
2145  nxt_sig = y & 0x4 ? sigma1 : sigma2;
2146  nxt_mbr = y & 0x4 ? mbr1 : mbr2;
2147  ui32 val = 3u << (p - 2);
2148  for (ui32 i = 0; i < width; i += 8,
2149  cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++)
2150  {
2151  ui32 mbr = *cur_mbr & pattern; //skip unneeded samples
2152  ui32 new_sig = 0;
2153  if (mbr)
2154  {
2155  for (ui32 n = 0; n < 8; n += 4)
2156  {
2157  ui32 cwd = frwd_fetch<0>(&sigprop);
2158  ui32 cnt = 0;
2159 
2160  ui32 *dp = decoded_data + y * stride;
2161  dp += i + n;
2162 
2163  ui32 col_mask = 0xFu << (4 * n);
2164 
2165  ui32 inv_sig = ~cur_sig[0] & pattern;
2166 
2167  ui32 end = n + 4 + i < width ? n + 4 : width - i;
2168  for (ui32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
2169  {
2170  if ((col_mask & mbr) == 0)
2171  continue;
2172 
2173  //scan 4 mbr
2174  ui32 sample_mask = 0x11111111u & col_mask;
2175  if (mbr & sample_mask)
2176  {
2177  assert(dp[0] == 0);
2178  if (cwd & 1)
2179  {
2180  new_sig |= sample_mask;
2181  ui32 t = 0x32u << (j * 4);
2182  mbr |= t & inv_sig;
2183  }
2184  cwd >>= 1; ++cnt;
2185  }
2186 
2187  sample_mask += sample_mask;
2188  if (mbr & sample_mask)
2189  {
2190  assert(dp[stride] == 0);
2191  if (cwd & 1)
2192  {
2193  new_sig |= sample_mask;
2194  ui32 t = 0x74u << (j * 4);
2195  mbr |= t & inv_sig;
2196  }
2197  cwd >>= 1; ++cnt;
2198  }
2199 
2200  sample_mask += sample_mask;
2201  if (mbr & sample_mask)
2202  {
2203  assert(dp[2 * stride] == 0);
2204  if (cwd & 1)
2205  {
2206  new_sig |= sample_mask;
2207  ui32 t = 0xE8u << (j * 4);
2208  mbr |= t & inv_sig;
2209  }
2210  cwd >>= 1; ++cnt;
2211  }
2212 
2213  sample_mask += sample_mask;
2214  if (mbr & sample_mask)
2215  {
2216  assert(dp[3 * stride] == 0);
2217  if (cwd & 1)
2218  {
2219  new_sig |= sample_mask;
2220  ui32 t = 0xC0u << (j * 4);
2221  mbr |= t & inv_sig;
2222  }
2223  cwd >>= 1; ++cnt;
2224  }
2225  }
2226 
2227  //signs here
2228  if (new_sig & (0xFFFFu << (4 * n)))
2229  {
2230  ui32 *dp = decoded_data + y * stride;
2231  dp += i + n;
2232  ui32 col_mask = 0xFu << (4 * n);
2233 
2234  for (ui32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
2235  {
2236  if ((col_mask & new_sig) == 0)
2237  continue;
2238 
2239  //scan 4 signs
2240  ui32 sample_mask = 0x11111111u & col_mask;
2241  if (new_sig & sample_mask)
2242  {
2243  assert(dp[0] == 0);
2244  dp[0] |= ((cwd & 1) << 31) | val;
2245  cwd >>= 1; ++cnt;
2246  }
2247 
2248  sample_mask += sample_mask;
2249  if (new_sig & sample_mask)
2250  {
2251  assert(dp[stride] == 0);
2252  dp[stride] |= ((cwd & 1) << 31) | val;
2253  cwd >>= 1; ++cnt;
2254  }
2255 
2256  sample_mask += sample_mask;
2257  if (new_sig & sample_mask)
2258  {
2259  assert(dp[2 * stride] == 0);
2260  dp[2 * stride] |= ((cwd & 1) << 31) | val;
2261  cwd >>= 1; ++cnt;
2262  }
2263 
2264  sample_mask += sample_mask;
2265  if (new_sig & sample_mask)
2266  {
2267  assert(dp[3 * stride] == 0);
2268  dp[3 * stride] |= ((cwd & 1) << 31) | val;
2269  cwd >>= 1; ++cnt;
2270  }
2271  }
2272 
2273  }
2274  frwd_advance(&sigprop, cnt);
2275  cnt = 0;
2276 
2277  //update next columns
2278  if (n == 4)
2279  {
2280  //horizontally
2281  ui32 t = new_sig >> 28;
2282  t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
2283  cur_mbr[1] |= t & ~cur_sig[1];
2284  }
2285  }
2286  }
2287  //propagate down (vertically propagation)
2288  new_sig |= cur_sig[0];
2289  ui32 u = (new_sig & 0x88888888) >> 3;
2290  ui32 t = u | (u << 4) | (u >> 4);
2291  if (i > 0)
2292  nxt_mbr[-1] |= (u << 28) & ~nxt_sig[-1];
2293  nxt_mbr[0] |= t & ~nxt_sig[0];
2294  nxt_mbr[1] |= (u >> 28) & ~nxt_sig[1];
2295  }
2296  }
2297  }
2298  return true;
2299  }
2300  }
2301 }
static bool vlc_tables_initialized
Initializes VLC tables vlc_tbl0 and vlc_tbl1.
static bool vlc_init_tables()
Initializes vlc_tbl0 and vlc_tbl1 tables, from table0.h and table1.h.
static ui16 vlc_tbl1[1024]
vlc_tbl1 contains decoding information for non-initial row of quads
static ui16 vlc_tbl0[1024]
vlc_tbl0 contains decoding information for initial row of quads
static void mel_read(dec_mel_st *melp)
Reads and unstuffs the MEL bitstream.
void frwd_advance(frwd_struct *msp, ui32 num_bits)
Consume num_bits bits from the bitstream of frwd_struct.
ui32 decode_init_uvlc(ui32 vlc, ui32 mode, ui32 *u)
Decode initial UVLC to get the u value (or u_q)
void rev_init(rev_struct *vlcp, ui8 *data, int lcup, int scup)
Initiates the rev_struct structure and reads a few bytes to move the read address to multiple of 4.
ui32 frwd_fetch(frwd_struct *msp)
Fetches 32 bits from the frwd_struct bitstream.
void frwd_init(frwd_struct *msp, const ui8 *data, int size)
Initialize frwd_struct struct and reads some bytes.
ui32 rev_advance_mrp(rev_struct *mrp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static int mel_get_run(dec_mel_st *melp)
Retrieves one run from dec_mel_st; if there are no runs stored MEL segment is decoded.
ui32 rev_fetch(rev_struct *vlcp)
Retrieves 32 bits from the head of a rev_struct structure.
void rev_read_mrp(rev_struct *mrp)
Reads and unstuffs from rev_struct.
static void mel_init(dec_mel_st *melp, ui8 *bbuf, int lcup, int scup)
Initiates a dec_mel_st structure for MEL decoding and reads some bytes in order to get the read addre...
ui32 decode_noninit_uvlc(ui32 vlc, ui32 mode, ui32 *u)
Decode non-initial UVLC to get the u value (or u_q)
ui32 rev_advance(rev_struct *vlcp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
void frwd_read(frwd_struct *msp)
Read and unstuffs 32 bits from forward-growing bitstream.
bool ojph_decode_codeblock(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
void rev_read(rev_struct *vlcp)
Read and unstuff data from a backwardly-growing segment.
ui32 rev_fetch_mrp(rev_struct *mrp)
Retrieves 32 bits from the head of a rev_struct structure.
void rev_init_mrp(rev_struct *mrp, ui8 *data, int lcup, int len2)
Initialized rev_struct structure for MRP segment, and reads a number of bytes such that the next 32 b...
static void mel_decode(dec_mel_st *melp)
Decodes unstuffed MEL segment bits stored in tmp to runs.
uint64_t ui64
Definition: ojph_defs.h:56
uint16_t ui16
Definition: ojph_defs.h:52
static ui32 population_count(ui32 val)
Definition: ojph_arch.h:70
static ui32 count_leading_zeros(ui32 val)
Definition: ojph_arch.h:90
uint32_t ui32
Definition: ojph_defs.h:54
uint8_t ui8
Definition: ojph_defs.h:50
#define OJPH_WARN(t,...)
Definition: ojph_message.h:128
MEL state structure for reading and decoding the MEL bitstream.
bool unstuff
true if the next bit needs to be unstuffed
int num_runs
number of decoded runs left in runs (maximum 8)
int size
number of bytes in MEL code
int k
state of MEL decoder
int bits
number of bits stored in tmp
ui8 * data
the address of data (or bitstream)
ui64 tmp
temporary buffer for read data
ui64 runs
runs of decoded MEL codewords (7 bits/run)
State structure for reading and unstuffing of forward-growing bitstreams; these are: MagSgn and SPP b...
ui64 tmp
temporary buffer of read data
ui32 bits
number of bits stored in tmp
bool unstuff
true if a bit needs to be unstuffed from next byte
const ui8 * data
pointer to bitstream
A structure for reading and unstuffing a segment that grows backward, such as VLC and MRP.
ui8 * data
pointer to where to read data
ui32 bits
number of bits stored in tmp
int size
number of bytes left
ui64 tmp
temporary buffer of read data