OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_colour_avx2.cpp
Go to the documentation of this file.
1 //***************************************************************************/
2 // This software is released under the 2-Clause BSD license, included
3 // below.
4 //
5 // Copyright (c) 2019, Aous Naman
6 // Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7 // Copyright (c) 2019, The University of New South Wales, Australia
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // 1. Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //***************************************************************************/
32 // This file is part of the OpenJPH software implementation.
33 // File: ojph_colour_avx2.cpp
34 // Author: Aous Naman
35 // Date: 11 October 2019
36 //***************************************************************************/
37 
38 #include <cmath>
39 
40 #include "ojph_defs.h"
41 #include "ojph_arch.h"
42 #include "ojph_colour.h"
43 
44 #ifdef OJPH_COMPILER_MSVC
45 #include <intrin.h>
46 #else
47 #include <x86intrin.h>
48 #endif
49 
50 namespace ojph {
51  namespace local {
52 
54  void avx2_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift,
55  ui32 width)
56  {
57  __m256i sh = _mm256_set1_epi32(shift);
58  for (int i = (width + 7) >> 3; i > 0; --i, sp+=8, dp+=8)
59  {
60  __m256i s = _mm256_loadu_si256((__m256i*)sp);
61  s = _mm256_add_epi32(s, sh);
62  _mm256_storeu_si256((__m256i*)dp, s);
63  }
64  }
65 
67  void avx2_rct_forward(const si32 *r, const si32 *g, const si32 *b,
68  si32 *y, si32 *cb, si32 *cr, ui32 repeat)
69  {
70  for (int i = (repeat + 7) >> 3; i > 0; --i)
71  {
72  __m256i mr = _mm256_load_si256((__m256i*)r);
73  __m256i mg = _mm256_load_si256((__m256i*)g);
74  __m256i mb = _mm256_load_si256((__m256i*)b);
75  __m256i t = _mm256_add_epi32(mr, mb);
76  t = _mm256_add_epi32(t, _mm256_slli_epi32(mg, 1));
77  _mm256_store_si256((__m256i*)y, _mm256_srai_epi32(t, 2));
78  t = _mm256_sub_epi32(mb, mg);
79  _mm256_store_si256((__m256i*)cb, t);
80  t = _mm256_sub_epi32(mr, mg);
81  _mm256_store_si256((__m256i*)cr, t);
82 
83  r += 8; g += 8; b += 8;
84  y += 8; cb += 8; cr += 8;
85  }
86  }
87 
89  void avx2_rct_backward(const si32 *y, const si32 *cb, const si32 *cr,
90  si32 *r, si32 *g, si32 *b, ui32 repeat)
91  {
92  for (int i = (repeat + 7) >> 3; i > 0; --i)
93  {
94  __m256i my = _mm256_load_si256((__m256i*)y);
95  __m256i mcb = _mm256_load_si256((__m256i*)cb);
96  __m256i mcr = _mm256_load_si256((__m256i*)cr);
97 
98  __m256i t = _mm256_add_epi32(mcb, mcr);
99  t = _mm256_sub_epi32(my, _mm256_srai_epi32(t, 2));
100  _mm256_store_si256((__m256i*)g, t);
101  __m256i u = _mm256_add_epi32(mcb, t);
102  _mm256_store_si256((__m256i*)b, u);
103  u = _mm256_add_epi32(mcr, t);
104  _mm256_store_si256((__m256i*)r, u);
105 
106  y += 8; cb += 8; cr += 8;
107  r += 8; g += 8; b += 8;
108  }
109  }
110 
111  }
112 }
void avx2_rct_forward(const si32 *r, const si32 *g, const si32 *b, si32 *y, si32 *cb, si32 *cr, ui32 repeat)
void avx2_rct_backward(const si32 *y, const si32 *cb, const si32 *cr, si32 *r, si32 *g, si32 *b, ui32 repeat)
void avx2_cnvrt_si32_to_si32_shftd(const si32 *sp, si32 *dp, int shift, ui32 width)
int32_t si32
Definition: ojph_defs.h:55
uint32_t ui32
Definition: ojph_defs.h:54