diff options
-rw-r--r-- | src/resources/dye/dyepalette_replaceacolor.cpp | 7 | ||||
-rw-r--r-- | src/resources/dye/dyepalette_replaceaoglcolor.cpp | 6 | ||||
-rw-r--r-- | src/resources/dye/dyepalette_replacescolor.cpp | 17 | ||||
-rw-r--r-- | src/resources/dye/dyepalette_replacesoglcolor.cpp | 16 |
4 files changed, 16 insertions, 30 deletions
diff --git a/src/resources/dye/dyepalette_replaceacolor.cpp b/src/resources/dye/dyepalette_replaceacolor.cpp index 7c24d6c1f..898928128 100644 --- a/src/resources/dye/dyepalette_replaceacolor.cpp +++ b/src/resources/dye/dyepalette_replaceacolor.cpp @@ -138,11 +138,13 @@ void DyePalette::replaceAColorDefault(uint32_t *restrict pixels, } #ifdef SIMD_SUPPORTED +/* static void print256(const char *const text, const __m256i &val); static void print256(const char *const text, const __m256i &val) { printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]); } +*/ __attribute__ ((target ("avx2"))) void DyePalette::replaceAColorSimd(uint32_t *restrict pixels, @@ -159,7 +161,7 @@ void DyePalette::replaceAColorSimd(uint32_t *restrict pixels, for (int ptr = 0; ptr < bufEnd; ptr += 8) { - //__m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels)); +// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels)); __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&pixels[ptr])); std::vector<DyeColor>::const_iterator it = mColors.begin(); @@ -178,8 +180,7 @@ void DyePalette::replaceAColorSimd(uint32_t *restrict pixels, ++ it; } - //print256("res ", base); - //_mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base); +// _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base); _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base); } diff --git a/src/resources/dye/dyepalette_replaceaoglcolor.cpp b/src/resources/dye/dyepalette_replaceaoglcolor.cpp index 8ed7af643..869f54205 100644 --- a/src/resources/dye/dyepalette_replaceaoglcolor.cpp +++ b/src/resources/dye/dyepalette_replaceaoglcolor.cpp @@ -146,11 +146,13 @@ void DyePalette::replaceAOGLColorDefault(uint32_t *restrict pixels, #ifdef SIMD_SUPPORTED +/* static void print256(const char *const text, const __m256i &val); static void print256(const char *const text, const __m256i &val) { printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]); } +*/ __attribute__ ((target ("avx2"))) void DyePalette::replaceAOGLColorSimd(uint32_t *restrict pixels, @@ -167,7 +169,7 @@ void DyePalette::replaceAOGLColorSimd(uint32_t *restrict pixels, pixels != p_end; ++pixels) { - //__m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels)); +// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels)); __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pixels)); std::vector<DyeColor>::const_iterator it = mColors.begin(); @@ -186,7 +188,7 @@ void DyePalette::replaceAOGLColorSimd(uint32_t *restrict pixels, ++ it; } - //_mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base); +// _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base); _mm256_storeu_si256(reinterpret_cast<__m256i*>(pixels), base); } } diff --git a/src/resources/dye/dyepalette_replacescolor.cpp b/src/resources/dye/dyepalette_replacescolor.cpp index 02412b186..0076484c0 100644 --- a/src/resources/dye/dyepalette_replacescolor.cpp +++ b/src/resources/dye/dyepalette_replacescolor.cpp @@ -134,11 +134,13 @@ void DyePalette::replaceSColorDefault(uint32_t *restrict pixels, } #ifdef SIMD_SUPPORTED +/* static void print256(const char *const text, const __m256i &val); static void print256(const char *const text, const __m256i &val) { printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]); } +*/ __attribute__ ((target ("avx2"))) void DyePalette::replaceSColorSimd(uint32_t *restrict pixels, @@ -156,44 +158,33 @@ void DyePalette::replaceSColorSimd(uint32_t *restrict pixels, for (int ptr = 0; ptr < bufEnd; ptr += 8) { __m256i mask = _mm256_set1_epi32(0xffffff00); - //__m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels)); +// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels)); __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&pixels[ptr])); - //print256("mask ", mask); std::vector<DyeColor>::const_iterator it = mColors.begin(); while (it != it_end) { - //print256("base ", base); const DyeColor &col = *it; ++ it; const DyeColor &col2 = *it; __m256i base2 = _mm256_and_si256(mask, base); - //print256("base2 ", base2); __m256i newMask = _mm256_set1_epi32(col2.valueS); - //print256("newMask ", newMask); __m256i cmpMask = _mm256_set1_epi32(col.valueS); - //print256("cmpMask ", cmpMask); __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask); - //print256("cmpRes ", cmpRes); cmpRes = _mm256_and_si256(mask, cmpRes); - //print256("cmpRes ", cmpRes); __m256i srcAnd = _mm256_andnot_si256(cmpRes, base); - //print256("srcAnd ", srcAnd); __m256i dstAnd = _mm256_and_si256(cmpRes, newMask); - //print256("dstAnd ", dstAnd); base = _mm256_or_si256(srcAnd, dstAnd); ++ it; } - //print256("res ", base); - //_mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base); +// _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base); _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base); } // complete end without simd for (int ptr = bufSize - mod; ptr < bufSize; ptr ++) { -// logger->log("past"); uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]); #if SDL_BYTEORDER == SDL_BIG_ENDIAN const unsigned int data = pixels[ptr] & 0x00ffffff; diff --git a/src/resources/dye/dyepalette_replacesoglcolor.cpp b/src/resources/dye/dyepalette_replacesoglcolor.cpp index 899286240..ceffd664e 100644 --- a/src/resources/dye/dyepalette_replacesoglcolor.cpp +++ b/src/resources/dye/dyepalette_replacesoglcolor.cpp @@ -142,11 +142,13 @@ void DyePalette::replaceSOGLColorDefault(uint32_t *restrict pixels, } #ifdef SIMD_SUPPORTED +/* static void print256(const char *const text, const __m256i &val); static void print256(const char *const text, const __m256i &val) { printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]); } +*/ __attribute__ ((target ("avx2"))) void DyePalette::replaceSOGLColorSimd(uint32_t *restrict pixels, @@ -164,37 +166,27 @@ void DyePalette::replaceSOGLColorSimd(uint32_t *restrict pixels, pixels += 8) { __m256i mask = _mm256_set1_epi32(0x00ffffff); - //__m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels)); +// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels)); __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pixels)); - //print256("mask ", mask); std::vector<DyeColor>::const_iterator it = mColors.begin(); while (it != it_end) { - //print256("base ", base); const DyeColor &col = *it; ++ it; const DyeColor &col2 = *it; __m256i base2 = _mm256_and_si256(mask, base); - //print256("base2 ", base2); __m256i newMask = _mm256_set1_epi32(col2.valueSOgl); - //print256("newMask ", newMask); __m256i cmpMask = _mm256_set1_epi32(col.valueSOgl); - //print256("cmpMask ", cmpMask); __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask); - //print256("cmpRes ", cmpRes); cmpRes = _mm256_and_si256(mask, cmpRes); - //print256("cmpRes ", cmpRes); __m256i srcAnd = _mm256_andnot_si256(cmpRes, base); - //print256("srcAnd ", srcAnd); __m256i dstAnd = _mm256_and_si256(cmpRes, newMask); - //print256("dstAnd ", dstAnd); base = _mm256_or_si256(srcAnd, dstAnd); ++ it; } - //print256("res ", base); - //_mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base); +// _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base); _mm256_storeu_si256(reinterpret_cast<__m256i*>(pixels), base); } } |