summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/resources/dye/dyepalette_replaceacolor.cpp7
-rw-r--r--src/resources/dye/dyepalette_replaceaoglcolor.cpp6
-rw-r--r--src/resources/dye/dyepalette_replacescolor.cpp17
-rw-r--r--src/resources/dye/dyepalette_replacesoglcolor.cpp16
4 files changed, 16 insertions, 30 deletions
diff --git a/src/resources/dye/dyepalette_replaceacolor.cpp b/src/resources/dye/dyepalette_replaceacolor.cpp
index 7c24d6c1f..898928128 100644
--- a/src/resources/dye/dyepalette_replaceacolor.cpp
+++ b/src/resources/dye/dyepalette_replaceacolor.cpp
@@ -138,11 +138,13 @@ void DyePalette::replaceAColorDefault(uint32_t *restrict pixels,
}
#ifdef SIMD_SUPPORTED
+/*
static void print256(const char *const text, const __m256i &val);
static void print256(const char *const text, const __m256i &val)
{
printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
}
+*/
__attribute__ ((target ("avx2")))
void DyePalette::replaceAColorSimd(uint32_t *restrict pixels,
@@ -159,7 +161,7 @@ void DyePalette::replaceAColorSimd(uint32_t *restrict pixels,
for (int ptr = 0; ptr < bufEnd; ptr += 8)
{
- //__m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
+// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
__m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]));
std::vector<DyeColor>::const_iterator it = mColors.begin();
@@ -178,8 +180,7 @@ void DyePalette::replaceAColorSimd(uint32_t *restrict pixels,
++ it;
}
- //print256("res ", base);
- //_mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
+// _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
_mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
}
diff --git a/src/resources/dye/dyepalette_replaceaoglcolor.cpp b/src/resources/dye/dyepalette_replaceaoglcolor.cpp
index 8ed7af643..869f54205 100644
--- a/src/resources/dye/dyepalette_replaceaoglcolor.cpp
+++ b/src/resources/dye/dyepalette_replaceaoglcolor.cpp
@@ -146,11 +146,13 @@ void DyePalette::replaceAOGLColorDefault(uint32_t *restrict pixels,
#ifdef SIMD_SUPPORTED
+/*
static void print256(const char *const text, const __m256i &val);
static void print256(const char *const text, const __m256i &val)
{
printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
}
+*/
__attribute__ ((target ("avx2")))
void DyePalette::replaceAOGLColorSimd(uint32_t *restrict pixels,
@@ -167,7 +169,7 @@ void DyePalette::replaceAOGLColorSimd(uint32_t *restrict pixels,
pixels != p_end;
++pixels)
{
- //__m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
+// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
__m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pixels));
std::vector<DyeColor>::const_iterator it = mColors.begin();
@@ -186,7 +188,7 @@ void DyePalette::replaceAOGLColorSimd(uint32_t *restrict pixels,
++ it;
}
- //_mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
+// _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
_mm256_storeu_si256(reinterpret_cast<__m256i*>(pixels), base);
}
}
diff --git a/src/resources/dye/dyepalette_replacescolor.cpp b/src/resources/dye/dyepalette_replacescolor.cpp
index 02412b186..0076484c0 100644
--- a/src/resources/dye/dyepalette_replacescolor.cpp
+++ b/src/resources/dye/dyepalette_replacescolor.cpp
@@ -134,11 +134,13 @@ void DyePalette::replaceSColorDefault(uint32_t *restrict pixels,
}
#ifdef SIMD_SUPPORTED
+/*
static void print256(const char *const text, const __m256i &val);
static void print256(const char *const text, const __m256i &val)
{
printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
}
+*/
__attribute__ ((target ("avx2")))
void DyePalette::replaceSColorSimd(uint32_t *restrict pixels,
@@ -156,44 +158,33 @@ void DyePalette::replaceSColorSimd(uint32_t *restrict pixels,
for (int ptr = 0; ptr < bufEnd; ptr += 8)
{
__m256i mask = _mm256_set1_epi32(0xffffff00);
- //__m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
+// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
__m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]));
- //print256("mask ", mask);
std::vector<DyeColor>::const_iterator it = mColors.begin();
while (it != it_end)
{
- //print256("base ", base);
const DyeColor &col = *it;
++ it;
const DyeColor &col2 = *it;
__m256i base2 = _mm256_and_si256(mask, base);
- //print256("base2 ", base2);
__m256i newMask = _mm256_set1_epi32(col2.valueS);
- //print256("newMask ", newMask);
__m256i cmpMask = _mm256_set1_epi32(col.valueS);
- //print256("cmpMask ", cmpMask);
__m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
- //print256("cmpRes ", cmpRes);
cmpRes = _mm256_and_si256(mask, cmpRes);
- //print256("cmpRes ", cmpRes);
__m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
- //print256("srcAnd ", srcAnd);
__m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
- //print256("dstAnd ", dstAnd);
base = _mm256_or_si256(srcAnd, dstAnd);
++ it;
}
- //print256("res ", base);
- //_mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
+// _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
_mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
}
// complete end without simd
for (int ptr = bufSize - mod; ptr < bufSize; ptr ++)
{
-// logger->log("past");
uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
#if SDL_BYTEORDER == SDL_BIG_ENDIAN
const unsigned int data = pixels[ptr] & 0x00ffffff;
diff --git a/src/resources/dye/dyepalette_replacesoglcolor.cpp b/src/resources/dye/dyepalette_replacesoglcolor.cpp
index 899286240..ceffd664e 100644
--- a/src/resources/dye/dyepalette_replacesoglcolor.cpp
+++ b/src/resources/dye/dyepalette_replacesoglcolor.cpp
@@ -142,11 +142,13 @@ void DyePalette::replaceSOGLColorDefault(uint32_t *restrict pixels,
}
#ifdef SIMD_SUPPORTED
+/*
static void print256(const char *const text, const __m256i &val);
static void print256(const char *const text, const __m256i &val)
{
printf("%s 0x%016llx%016llx%016llx%016llx\n", text, val[0], val[1], val[2], val[3]);
}
+*/
__attribute__ ((target ("avx2")))
void DyePalette::replaceSOGLColorSimd(uint32_t *restrict pixels,
@@ -164,37 +166,27 @@ void DyePalette::replaceSOGLColorSimd(uint32_t *restrict pixels,
pixels += 8)
{
__m256i mask = _mm256_set1_epi32(0x00ffffff);
- //__m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
+// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(pixels));
__m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pixels));
- //print256("mask ", mask);
std::vector<DyeColor>::const_iterator it = mColors.begin();
while (it != it_end)
{
- //print256("base ", base);
const DyeColor &col = *it;
++ it;
const DyeColor &col2 = *it;
__m256i base2 = _mm256_and_si256(mask, base);
- //print256("base2 ", base2);
__m256i newMask = _mm256_set1_epi32(col2.valueSOgl);
- //print256("newMask ", newMask);
__m256i cmpMask = _mm256_set1_epi32(col.valueSOgl);
- //print256("cmpMask ", cmpMask);
__m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
- //print256("cmpRes ", cmpRes);
cmpRes = _mm256_and_si256(mask, cmpRes);
- //print256("cmpRes ", cmpRes);
__m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
- //print256("srcAnd ", srcAnd);
__m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
- //print256("dstAnd ", dstAnd);
base = _mm256_or_si256(srcAnd, dstAnd);
++ it;
}
- //print256("res ", base);
- //_mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
+// _mm256_store_si256(reinterpret_cast<__m256i*>(pixels), base);
_mm256_storeu_si256(reinterpret_cast<__m256i*>(pixels), base);
}
}