summaryrefslogtreecommitdiff
path: root/src/resources/dye
diff options
context:
space:
mode:
authorAndrei Karas <akaras@inbox.ru>2017-05-25 00:55:33 +0300
committerAndrei Karas <akaras@inbox.ru>2017-05-25 00:55:33 +0300
commit69aaf75b49cdf385b03469b82dd05480abf6e8b5 (patch)
treeb2a14ea29c6bdd5b06f4812a378bd1f5325f206d /src/resources/dye
parent3480f3ad939e2ae5d0191ed739a57e834658c32e (diff)
downloadplus-69aaf75b49cdf385b03469b82dd05480abf6e8b5.tar.gz
plus-69aaf75b49cdf385b03469b82dd05480abf6e8b5.tar.bz2
plus-69aaf75b49cdf385b03469b82dd05480abf6e8b5.tar.xz
plus-69aaf75b49cdf385b03469b82dd05480abf6e8b5.zip
Switch in replaceSOGLColor into using custom despatcher.
Diffstat (limited to 'src/resources/dye')
-rw-r--r--src/resources/dye/dye_unittest.cc39
-rw-r--r--src/resources/dye/dyepalette.cpp12
-rw-r--r--src/resources/dye/dyepalette.h43
-rw-r--r--src/resources/dye/dyepalette_replacesoglcolor.cpp376
-rw-r--r--src/resources/dye/dyepalette_replacesoglcolor_avx2.hpp55
-rw-r--r--src/resources/dye/dyepalette_replacesoglcolor_default.hpp108
-rw-r--r--src/resources/dye/dyepalette_replacesoglcolor_sse2.hpp55
7 files changed, 350 insertions, 338 deletions
diff --git a/src/resources/dye/dye_unittest.cc b/src/resources/dye/dye_unittest.cc
index 5d2cfdb8d..6d6e6028e 100644
--- a/src/resources/dye/dye_unittest.cc
+++ b/src/resources/dye/dye_unittest.cc
@@ -69,7 +69,7 @@ TEST_CASE("Dye replaceSOGLColor 1 1", "")
DyePalette palette("#00ff00,000011", 6);
uint32_t data[1];
data[0] = buildHex(0x01, 0x02, 0x03, 0x10);
- palette.replaceSOGLColor(&data[0], 1);
+ DYEPALETTE(palette, SOGLColor)(&data[0], 1);
REQUIRE(data[0] == buildHex(0x01, 0x02, 0x03, 0x10));
}
@@ -78,7 +78,7 @@ TEST_CASE("Dye replaceSOGLColor 1 2", "")
DyePalette palette("#01ff02,030411", 6);
uint32_t data[1];
data[0] = buildHex(0x20, 0x02, 0xff, 0x01);
- palette.replaceSOGLColor(&data[0], 1);
+ DYEPALETTE(palette, SOGLColor)(&data[0], 1);
REQUIRE(data[0] == buildHex(0x20, 0x11, 0x04, 0x03));
}
@@ -87,7 +87,7 @@ TEST_CASE("Dye replaceSOGLColor 1 3", "")
DyePalette palette("#404040,200000,0100ee,102030", 6);
uint32_t data[1];
data[0] = buildHex(0x40, 0xee, 0x00, 0x01);
- palette.replaceSOGLColor(&data[0], 1);
+ DYEPALETTE(palette, SOGLColor)(&data[0], 1);
REQUIRE(data[0] == buildHex(0x40, 0x30, 0x20, 0x10));
}
@@ -97,7 +97,7 @@ TEST_CASE("Dye replaceSOGLColor 2 1", "")
uint32_t data[2];
data[0] = buildHex(0x20, 0x02, 0xff, 0x01);
data[1] = buildHex(0x30, 0x02, 0xff, 0x01);
- palette.replaceSOGLColor(&data[0], 2);
+ DYEPALETTE(palette, SOGLColor)(&data[0], 2);
REQUIRE(data[0] == buildHex(0x20, 0x11, 0x04, 0x03));
REQUIRE(data[1] == buildHex(0x30, 0x11, 0x04, 0x03));
}
@@ -110,7 +110,7 @@ TEST_CASE("Dye replaceSOGLColor 4 1", "")
data[1] = buildHex(0x30, 0x02, 0xff, 0x01);
data[2] = buildHex(0x40, 0x02, 0xff, 0x01);
data[3] = buildHex(0x50, 0x02, 0xff, 0x02);
- palette.replaceSOGLColor(&data[0], 4);
+ DYEPALETTE(palette, SOGLColor)(&data[0], 4);
REQUIRE(data[0] == buildHex(0x20, 0x11, 0x04, 0x03));
REQUIRE(data[1] == buildHex(0x30, 0x11, 0x04, 0x03));
REQUIRE(data[2] == buildHex(0x40, 0x11, 0x04, 0x03));
@@ -129,7 +129,7 @@ TEST_CASE("Dye replaceSOGLColor 8 1", "")
data[5] = buildHex(0x30, 0x02, 0xff, 0x01);
data[6] = buildHex(0x40, 0x02, 0xff, 0x01);
data[7] = buildHex(0x60, 0x02, 0xff, 0x02);
- palette.replaceSOGLColor(&data[0], 8);
+ DYEPALETTE(palette, SOGLColor)(&data[0], 8);
REQUIRE(data[0] == buildHex(0x20, 0x11, 0x04, 0x03));
REQUIRE(data[1] == buildHex(0x30, 0x11, 0x04, 0x03));
REQUIRE(data[2] == buildHex(0x40, 0x11, 0x04, 0x03));
@@ -216,29 +216,6 @@ TEST_CASE("Dye replaceSOGLColor 8 1 default", "")
REQUIRE(data[7] == buildHex(0x60, 0x02, 0xff, 0x02));
}
-TEST_CASE("Dye replaceSOGLColor 8 1 simd", "")
-{
- DyePalette palette("#01ff02,030411,01ee02,010203", 6);
- uint32_t data[8];
- data[0] = buildHex(0x20, 0x02, 0xff, 0x01);
- data[1] = buildHex(0x30, 0x02, 0xff, 0x01);
- data[2] = buildHex(0x40, 0x02, 0xff, 0x01);
- data[3] = buildHex(0x50, 0x02, 0xff, 0x02);
- data[4] = buildHex(0x20, 0x02, 0xff, 0x01);
- data[5] = buildHex(0x30, 0x02, 0xff, 0x01);
- data[6] = buildHex(0x40, 0x02, 0xff, 0x01);
- data[7] = buildHex(0x60, 0x02, 0xff, 0x02);
- palette.replaceSOGLColorSimd(&data[0], 8);
- REQUIRE(data[0] == buildHex(0x20, 0x11, 0x04, 0x03));
- REQUIRE(data[1] == buildHex(0x30, 0x11, 0x04, 0x03));
- REQUIRE(data[2] == buildHex(0x40, 0x11, 0x04, 0x03));
- REQUIRE(data[3] == buildHex(0x50, 0x02, 0xff, 0x02));
- REQUIRE(data[4] == buildHex(0x20, 0x11, 0x04, 0x03));
- REQUIRE(data[5] == buildHex(0x30, 0x11, 0x04, 0x03));
- REQUIRE(data[6] == buildHex(0x40, 0x11, 0x04, 0x03));
- REQUIRE(data[7] == buildHex(0x60, 0x02, 0xff, 0x02));
-}
-
TEST_CASE("Dye replaceSOGLColor 8 1 sse2", "")
{
DyePalette palette("#01ff02,030411,01ee02,010203", 6);
@@ -251,7 +228,7 @@ TEST_CASE("Dye replaceSOGLColor 8 1 sse2", "")
data[5] = buildHex(0x30, 0x02, 0xff, 0x01);
data[6] = buildHex(0x40, 0x02, 0xff, 0x01);
data[7] = buildHex(0x60, 0x02, 0xff, 0x02);
- palette.replaceSOGLColorSse2(&data[0], 8);
+ DYEPALETTE(palette, SOGLColorSse2)(&data[0], 8);
REQUIRE(data[0] == buildHex(0x20, 0x11, 0x04, 0x03));
REQUIRE(data[1] == buildHex(0x30, 0x11, 0x04, 0x03));
REQUIRE(data[2] == buildHex(0x40, 0x11, 0x04, 0x03));
@@ -274,7 +251,7 @@ TEST_CASE("Dye replaceSOGLColor 8 1 avx2", "")
data[5] = buildHex(0x30, 0x02, 0xff, 0x01);
data[6] = buildHex(0x40, 0x02, 0xff, 0x01);
data[7] = buildHex(0x60, 0x02, 0xff, 0x02);
- palette.replaceSOGLColorAvx2(&data[0], 8);
+ DYEPALETTE(palette, SOGLColorAvx2)(&data[0], 8);
REQUIRE(data[0] == buildHex(0x20, 0x11, 0x04, 0x03));
REQUIRE(data[1] == buildHex(0x30, 0x11, 0x04, 0x03));
REQUIRE(data[2] == buildHex(0x40, 0x11, 0x04, 0x03));
diff --git a/src/resources/dye/dyepalette.cpp b/src/resources/dye/dyepalette.cpp
index 5a350c8e2..c341d140e 100644
--- a/src/resources/dye/dyepalette.cpp
+++ b/src/resources/dye/dyepalette.cpp
@@ -47,6 +47,9 @@
DyeFunctionPtr DyePalette::funcReplaceSColor = nullptr;
DyeFunctionPtr DyePalette::funcReplaceSColorSse2 = nullptr;
DyeFunctionPtr DyePalette::funcReplaceSColorAvx2 = nullptr;
+DyeFunctionPtr DyePalette::funcReplaceSOGLColor = nullptr;
+DyeFunctionPtr DyePalette::funcReplaceSOGLColorSse2 = nullptr;
+DyeFunctionPtr DyePalette::funcReplaceSOGLColorAvx2 = nullptr;
DyePalette::DyePalette(const std::string &restrict description,
const uint8_t blockSize) :
@@ -242,12 +245,18 @@ void DyePalette::initFunctions()
funcReplaceSColor = &DyePalette::replaceSColorAvx2;
funcReplaceSColorAvx2 = &DyePalette::replaceSColorAvx2;
funcReplaceSColorSse2 = &DyePalette::replaceSColorSse2;
+ funcReplaceSOGLColor = &DyePalette::replaceSOGLColorAvx2;
+ funcReplaceSOGLColorAvx2 = &DyePalette::replaceSOGLColorAvx2;
+ funcReplaceSOGLColorSse2 = &DyePalette::replaceSOGLColorSse2;
}
else if (flags & Cpu::FEATURE_SSE2)
{
funcReplaceSColor = &DyePalette::replaceSColorSse2;
funcReplaceSColorAvx2 = &DyePalette::replaceSColorSse2;
funcReplaceSColorSse2 = &DyePalette::replaceSColorSse2;
+ funcReplaceSOGLColor = &DyePalette::replaceSOGLColorSse2;
+ funcReplaceSOGLColorAvx2 = &DyePalette::replaceSOGLColorSse2;
+ funcReplaceSOGLColorSse2 = &DyePalette::replaceSOGLColorSse2;
}
else
#endif // SIMD_SUPPORTED
@@ -255,5 +264,8 @@ void DyePalette::initFunctions()
funcReplaceSColor = &DyePalette::replaceSColorDefault;
funcReplaceSColorAvx2 = &DyePalette::replaceSColorDefault;
funcReplaceSColorSse2 = &DyePalette::replaceSColorDefault;
+ funcReplaceSOGLColor = &DyePalette::replaceSOGLColorDefault;
+ funcReplaceSOGLColorAvx2 = &DyePalette::replaceSOGLColorDefault;
+ funcReplaceSOGLColorSse2 = &DyePalette::replaceSOGLColorDefault;
}
}
diff --git a/src/resources/dye/dyepalette.h b/src/resources/dye/dyepalette.h
index da165305e..4129928fa 100644
--- a/src/resources/dye/dyepalette.h
+++ b/src/resources/dye/dyepalette.h
@@ -155,53 +155,13 @@ class DyePalette final
/**
* replace colors for OpenGL for S dye.
*/
- void replaceSOGLColor(uint32_t *restrict pixels,
- const int bufSize) const restrict2;
-
- /**
- * replace colors for OpenGL for S dye.
- */
void replaceSOGLColorDefault(uint32_t *restrict pixels,
const int bufSize) const restrict2;
-
- /**
- * replace colors for OpenGL for S dye.
- */
- FUNCTION_SIMD_DEFAULT
- void replaceSOGLColorSimd(uint32_t *restrict pixels,
- const int bufSize) const restrict2;
-
- /**
- * replace colors for OpenGL for S dye.
- */
- FUNCTION_SIMD_DEFAULT
- void replaceSOGLColorSse2(uint32_t *restrict pixels,
- const int bufSize) const restrict2;
-
- /**
- * replace colors for OpenGL for S dye.
- */
- FUNCTION_SIMD_DEFAULT
- void replaceSOGLColorAvx2(uint32_t *restrict pixels,
- const int bufSize) const restrict2;
-
#ifdef SIMD_SUPPORTED
/**
* replace colors for OpenGL for S dye.
*/
__attribute__ ((target ("sse2")))
- void replaceSOGLColorSimd(uint32_t *restrict pixels,
- const int bufSize) const restrict2;
- /**
- * replace colors for OpenGL for S dye.
- */
- __attribute__ ((target ("avx2")))
- void replaceSOGLColorSimd(uint32_t *restrict pixels,
- const int bufSize) const restrict2;
- /**
- * replace colors for OpenGL for S dye.
- */
- __attribute__ ((target ("sse2")))
void replaceSOGLColorSse2(uint32_t *restrict pixels,
const int bufSize) const restrict2;
/**
@@ -285,6 +245,9 @@ class DyePalette final
static DyeFunctionPtr funcReplaceSColor;
static DyeFunctionPtr funcReplaceSColorSse2;
static DyeFunctionPtr funcReplaceSColorAvx2;
+ static DyeFunctionPtr funcReplaceSOGLColor;
+ static DyeFunctionPtr funcReplaceSOGLColorSse2;
+ static DyeFunctionPtr funcReplaceSOGLColorAvx2;
#ifndef UNITTESTS
private:
diff --git a/src/resources/dye/dyepalette_replacesoglcolor.cpp b/src/resources/dye/dyepalette_replacesoglcolor.cpp
index 57ffd6b50..005523b4b 100644
--- a/src/resources/dye/dyepalette_replacesoglcolor.cpp
+++ b/src/resources/dye/dyepalette_replacesoglcolor.cpp
@@ -35,26 +35,98 @@
#include "debug.h"
-void DyePalette::replaceSOGLColor(uint32_t *restrict pixels,
- const int bufSize) const restrict2
-{
-#ifdef SIMD_SUPPORTED
- if (bufSize >= 8)
- replaceSOGLColorSimd(pixels, bufSize);
- else
- replaceSOGLColorDefault(pixels, bufSize);
-#else // SIMD_SUPPORTED
-#include "resources/dye/dyepalette_replacesoglcolor_default.hpp"
-#endif // SIMD_SUPPORTED
-}
-
void DyePalette::replaceSOGLColorDefault(uint32_t *restrict pixels,
const int bufSize) const restrict2
{
-#include "resources/dye/dyepalette_replacesoglcolor_default.hpp"
-}
+ std::vector<DyeColor>::const_iterator it_end = mColors.end();
+ const size_t sz = mColors.size();
+ if (!sz || !pixels)
+ return;
+ if (sz % 2)
+ -- it_end;
+
+#ifdef ENABLE_CILKPLUS
+ cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
+ {
+ uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int data = (pixels[ptr]) & 0xffffff00;
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int data = (pixels[ptr]) & 0x00ffffff;
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ std::vector<DyeColor>::const_iterator it = mColors.begin();
+ while (it != it_end)
+ {
+ const DyeColor &col = *it;
+ ++ it;
+ const DyeColor &col2 = *it;
+
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int coldata = (col.value[0] << 24)
+ | (col.value[1] << 16) | (col.value[2] << 8);
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int coldata = (col.value[0])
+ | (col.value[1] << 8) | (col.value[2] << 16);
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ if (data == coldata)
+ {
+ p[0] = col2.value[0];
+ p[1] = col2.value[1];
+ p[2] = col2.value[2];
+ break;
+ }
+
+ ++ it;
+ }
+ }
+
+#else // ENABLE_CILKPLUS
+
+ for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
+ pixels != p_end;
+ ++pixels)
+ {
+ uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int data = (*pixels) & 0xffffff00;
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int data = (*pixels) & 0x00ffffff;
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ std::vector<DyeColor>::const_iterator it = mColors.begin();
+ while (it != it_end)
+ {
+ const DyeColor &col = *it;
+ ++ it;
+ const DyeColor &col2 = *it;
+
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int coldata = (col.value[0] << 24)
+ | (col.value[1] << 16) | (col.value[2] << 8);
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int coldata = (col.value[0])
+ | (col.value[1] << 8) | (col.value[2] << 16);
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+ if (data == coldata)
+ {
+ p[0] = col2.value[0];
+ p[1] = col2.value[1];
+ p[2] = col2.value[2];
+ break;
+ }
+
+ ++ it;
+ }
+ }
+#endif // ENABLE_CILKPLUS
+}
#ifdef SIMD_SUPPORTED
/*
@@ -66,54 +138,260 @@ static void print256(const char *const text, const __m256i &val)
*/
__attribute__ ((target ("sse2")))
-void DyePalette::replaceSOGLColorSimd(uint32_t *restrict pixels,
+void DyePalette::replaceSOGLColorSse2(uint32_t *restrict pixels,
const int bufSize) const restrict2
{
-#include "resources/dye/dyepalette_replacesoglcolor_sse2.hpp"
-}
+ std::vector<DyeColor>::const_iterator it_end = mColors.end();
+ const size_t sz = mColors.size();
+ if (!sz || !pixels)
+ return;
+ if (sz % 2)
+ -- it_end;
-__attribute__ ((target ("avx2")))
-void DyePalette::replaceSOGLColorSimd(uint32_t *restrict pixels,
- const int bufSize) const restrict2
-{
-#include "resources/dye/dyepalette_replacesoglcolor_avx2.hpp"
-}
+ if (bufSize >= 8)
+ {
+ for (int ptr = 0; ptr < bufSize; ptr += 4)
+ {
+ __m128i mask = _mm_set1_epi32(0x00ffffff);
+// __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
+// &pixels[ptr]));
+ __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
+ &pixels[ptr]));
-__attribute__ ((target ("sse2")))
-void DyePalette::replaceSOGLColorSse2(uint32_t *restrict pixels,
- const int bufSize) const restrict2
-{
-#include "resources/dye/dyepalette_replacesoglcolor_sse2.hpp"
+ std::vector<DyeColor>::const_iterator it = mColors.begin();
+ while (it != it_end)
+ {
+ const DyeColor &col = *it;
+ ++ it;
+ const DyeColor &col2 = *it;
+
+ __m128i base2 = _mm_and_si128(mask, base);
+ __m128i newMask = _mm_set1_epi32(col2.valueSOgl);
+ __m128i cmpMask = _mm_set1_epi32(col.valueSOgl);
+ __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
+ cmpRes = _mm_and_si128(mask, cmpRes);
+ __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
+ __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
+ base = _mm_or_si128(srcAnd, dstAnd);
+ ++ it;
+ }
+// _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
+ }
+ }
+ else
+ {
+#ifdef ENABLE_CILKPLUS
+ cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
+ {
+ uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int data = (pixels[ptr]) & 0xffffff00;
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int data = (pixels[ptr]) & 0x00ffffff;
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ std::vector<DyeColor>::const_iterator it = mColors.begin();
+ while (it != it_end)
+ {
+ const DyeColor &col = *it;
+ ++ it;
+ const DyeColor &col2 = *it;
+
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int coldata = (col.value[0] << 24)
+ | (col.value[1] << 16) | (col.value[2] << 8);
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int coldata = (col.value[0])
+ | (col.value[1] << 8) | (col.value[2] << 16);
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ if (data == coldata)
+ {
+ p[0] = col2.value[0];
+ p[1] = col2.value[1];
+ p[2] = col2.value[2];
+ break;
+ }
+
+ ++ it;
+ }
+ }
+
+#else // ENABLE_CILKPLUS
+
+ for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
+ pixels != p_end;
+ ++pixels)
+ {
+ uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int data = (*pixels) & 0xffffff00;
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int data = (*pixels) & 0x00ffffff;
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ std::vector<DyeColor>::const_iterator it = mColors.begin();
+ while (it != it_end)
+ {
+ const DyeColor &col = *it;
+ ++ it;
+ const DyeColor &col2 = *it;
+
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int coldata = (col.value[0] << 24)
+ | (col.value[1] << 16) | (col.value[2] << 8);
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int coldata = (col.value[0])
+ | (col.value[1] << 8) | (col.value[2] << 16);
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ if (data == coldata)
+ {
+ p[0] = col2.value[0];
+ p[1] = col2.value[1];
+ p[2] = col2.value[2];
+ break;
+ }
+
+ ++ it;
+ }
+ }
+#endif // ENABLE_CILKPLUS
+ }
}
__attribute__ ((target ("avx2")))
void DyePalette::replaceSOGLColorAvx2(uint32_t *restrict pixels,
const int bufSize) const restrict2
{
-#include "resources/dye/dyepalette_replacesoglcolor_avx2.hpp"
-}
+ std::vector<DyeColor>::const_iterator it_end = mColors.end();
+ const size_t sz = mColors.size();
+ if (!sz || !pixels)
+ return;
+ if (sz % 2)
+ -- it_end;
-#endif // SIMD_SUPPORTED
+ if (bufSize >= 8)
+ {
+ for (int ptr = 0; ptr < bufSize; ptr += 8)
+ {
+ __m256i mask = _mm256_set1_epi32(0x00ffffff);
+// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
+// &pixels[ptr]));
+ __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
+ &pixels[ptr]));
-FUNCTION_SIMD_DEFAULT
-void DyePalette::replaceSOGLColorSimd(uint32_t *restrict pixels,
- const int bufSize) const restrict2
-{
-#include "resources/dye/dyepalette_replacesoglcolor_default.hpp"
-}
+ std::vector<DyeColor>::const_iterator it = mColors.begin();
+ while (it != it_end)
+ {
+ const DyeColor &col = *it;
+ ++ it;
+ const DyeColor &col2 = *it;
-FUNCTION_SIMD_DEFAULT
-void DyePalette::replaceSOGLColorSse2(uint32_t *restrict pixels,
- const int bufSize) const restrict2
-{
-#include "resources/dye/dyepalette_replacesoglcolor_default.hpp"
-}
+ __m256i base2 = _mm256_and_si256(mask, base);
+ __m256i newMask = _mm256_set1_epi32(col2.valueSOgl);
+ __m256i cmpMask = _mm256_set1_epi32(col.valueSOgl);
+ __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
+ cmpRes = _mm256_and_si256(mask, cmpRes);
+ __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
+ __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
+ base = _mm256_or_si256(srcAnd, dstAnd);
+ ++ it;
+ }
+// _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
+ _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
+ }
+ }
+ else
+ {
+#ifdef ENABLE_CILKPLUS
+ cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
+ {
+ uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int data = (pixels[ptr]) & 0xffffff00;
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
-FUNCTION_SIMD_DEFAULT
-void DyePalette::replaceSOGLColorAvx2(uint32_t *restrict pixels,
- const int bufSize) const restrict2
-{
-#include "resources/dye/dyepalette_replacesoglcolor_default.hpp"
+ const unsigned int data = (pixels[ptr]) & 0x00ffffff;
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ std::vector<DyeColor>::const_iterator it = mColors.begin();
+ while (it != it_end)
+ {
+ const DyeColor &col = *it;
+ ++ it;
+ const DyeColor &col2 = *it;
+
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int coldata = (col.value[0] << 24)
+ | (col.value[1] << 16) | (col.value[2] << 8);
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int coldata = (col.value[0])
+ | (col.value[1] << 8) | (col.value[2] << 16);
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ if (data == coldata)
+ {
+ p[0] = col2.value[0];
+ p[1] = col2.value[1];
+ p[2] = col2.value[2];
+ break;
+ }
+
+ ++ it;
+ }
+ }
+
+#else // ENABLE_CILKPLUS
+
+ for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
+ pixels != p_end;
+ ++pixels)
+ {
+ uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int data = (*pixels) & 0xffffff00;
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int data = (*pixels) & 0x00ffffff;
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ std::vector<DyeColor>::const_iterator it = mColors.begin();
+ while (it != it_end)
+ {
+ const DyeColor &col = *it;
+ ++ it;
+ const DyeColor &col2 = *it;
+
+#if SDL_BYTEORDER == SDL_BIG_ENDIAN
+ const unsigned int coldata = (col.value[0] << 24)
+ | (col.value[1] << 16) | (col.value[2] << 8);
+#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ const unsigned int coldata = (col.value[0])
+ | (col.value[1] << 8) | (col.value[2] << 16);
+#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
+
+ if (data == coldata)
+ {
+ p[0] = col2.value[0];
+ p[1] = col2.value[1];
+ p[2] = col2.value[2];
+ break;
+ }
+
+ ++ it;
+ }
+ }
+#endif // ENABLE_CILKPLUS
+ }
}
+#endif // SIMD_SUPPORTED
#endif // USE_OPENGL
diff --git a/src/resources/dye/dyepalette_replacesoglcolor_avx2.hpp b/src/resources/dye/dyepalette_replacesoglcolor_avx2.hpp
deleted file mode 100644
index 6e45f807c..000000000
--- a/src/resources/dye/dyepalette_replacesoglcolor_avx2.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * The ManaPlus Client
- * Copyright (C) 2011-2017 The ManaPlus Developers
- *
- * This file is part of The ManaPlus Client.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
- std::vector<DyeColor>::const_iterator it_end = mColors.end();
- const size_t sz = mColors.size();
- if (!sz || !pixels)
- return;
- if (sz % 2)
- -- it_end;
-
- for (int ptr = 0; ptr < bufSize; ptr += 8)
- {
- __m256i mask = _mm256_set1_epi32(0x00ffffff);
-// __m256i base = _mm256_load_si256(reinterpret_cast<__m256i*>(
-// &pixels[ptr]));
- __m256i base = _mm256_loadu_si256(reinterpret_cast<__m256i*>(
- &pixels[ptr]));
-
- std::vector<DyeColor>::const_iterator it = mColors.begin();
- while (it != it_end)
- {
- const DyeColor &col = *it;
- ++ it;
- const DyeColor &col2 = *it;
-
- __m256i base2 = _mm256_and_si256(mask, base);
- __m256i newMask = _mm256_set1_epi32(col2.valueSOgl);
- __m256i cmpMask = _mm256_set1_epi32(col.valueSOgl);
- __m256i cmpRes = _mm256_cmpeq_epi32(base2, cmpMask);
- cmpRes = _mm256_and_si256(mask, cmpRes);
- __m256i srcAnd = _mm256_andnot_si256(cmpRes, base);
- __m256i dstAnd = _mm256_and_si256(cmpRes, newMask);
- base = _mm256_or_si256(srcAnd, dstAnd);
- ++ it;
- }
-// _mm256_store_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
- _mm256_storeu_si256(reinterpret_cast<__m256i*>(&pixels[ptr]), base);
- }
diff --git a/src/resources/dye/dyepalette_replacesoglcolor_default.hpp b/src/resources/dye/dyepalette_replacesoglcolor_default.hpp
deleted file mode 100644
index 7669b99bf..000000000
--- a/src/resources/dye/dyepalette_replacesoglcolor_default.hpp
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * The ManaPlus Client
- * Copyright (C) 2011-2017 The ManaPlus Developers
- *
- * This file is part of The ManaPlus Client.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
- std::vector<DyeColor>::const_iterator it_end = mColors.end();
- const size_t sz = mColors.size();
- if (!sz || !pixels)
- return;
- if (sz % 2)
- -- it_end;
-
-#ifdef ENABLE_CILKPLUS
- cilk_for (int ptr = 0; ptr < bufSize; ptr ++)
- {
- uint8_t *const p = reinterpret_cast<uint8_t *>(&pixels[ptr]);
-#if SDL_BYTEORDER == SDL_BIG_ENDIAN
- const unsigned int data = (pixels[ptr]) & 0xffffff00;
-#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
-
- const unsigned int data = (pixels[ptr]) & 0x00ffffff;
-#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
-
- std::vector<DyeColor>::const_iterator it = mColors.begin();
- while (it != it_end)
- {
- const DyeColor &col = *it;
- ++ it;
- const DyeColor &col2 = *it;
-
-#if SDL_BYTEORDER == SDL_BIG_ENDIAN
- const unsigned int coldata = (col.value[0] << 24)
- | (col.value[1] << 16) | (col.value[2] << 8);
-#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
-
- const unsigned int coldata = (col.value[0])
- | (col.value[1] << 8) | (col.value[2] << 16);
-#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
-
- if (data == coldata)
- {
- p[0] = col2.value[0];
- p[1] = col2.value[1];
- p[2] = col2.value[2];
- break;
- }
-
- ++ it;
- }
- }
-
-#else // ENABLE_CILKPLUS
-
- for (const uint32_t *const p_end = pixels + CAST_SIZE(bufSize);
- pixels != p_end;
- ++pixels)
- {
- uint8_t *const p = reinterpret_cast<uint8_t *>(pixels);
-#if SDL_BYTEORDER == SDL_BIG_ENDIAN
- const unsigned int data = (*pixels) & 0xffffff00;
-#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
-
- const unsigned int data = (*pixels) & 0x00ffffff;
-#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
-
- std::vector<DyeColor>::const_iterator it = mColors.begin();
- while (it != it_end)
- {
- const DyeColor &col = *it;
- ++ it;
- const DyeColor &col2 = *it;
-
-#if SDL_BYTEORDER == SDL_BIG_ENDIAN
- const unsigned int coldata = (col.value[0] << 24)
- | (col.value[1] << 16) | (col.value[2] << 8);
-#else // SDL_BYTEORDER == SDL_BIG_ENDIAN
-
- const unsigned int coldata = (col.value[0])
- | (col.value[1] << 8) | (col.value[2] << 16);
-#endif // SDL_BYTEORDER == SDL_BIG_ENDIAN
-
- if (data == coldata)
- {
- p[0] = col2.value[0];
- p[1] = col2.value[1];
- p[2] = col2.value[2];
- break;
- }
-
- ++ it;
- }
- }
-#endif // ENABLE_CILKPLUS
diff --git a/src/resources/dye/dyepalette_replacesoglcolor_sse2.hpp b/src/resources/dye/dyepalette_replacesoglcolor_sse2.hpp
deleted file mode 100644
index a59c53979..000000000
--- a/src/resources/dye/dyepalette_replacesoglcolor_sse2.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * The ManaPlus Client
- * Copyright (C) 2011-2017 The ManaPlus Developers
- *
- * This file is part of The ManaPlus Client.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
- std::vector<DyeColor>::const_iterator it_end = mColors.end();
- const size_t sz = mColors.size();
- if (!sz || !pixels)
- return;
- if (sz % 2)
- -- it_end;
-
- for (int ptr = 0; ptr < bufSize; ptr += 4)
- {
- __m128i mask = _mm_set1_epi32(0x00ffffff);
-// __m128i base = _mm_load_si128(reinterpret_cast<__m128i*>(
-// &pixels[ptr]));
- __m128i base = _mm_loadu_si128(reinterpret_cast<__m128i*>(
- &pixels[ptr]));
-
- std::vector<DyeColor>::const_iterator it = mColors.begin();
- while (it != it_end)
- {
- const DyeColor &col = *it;
- ++ it;
- const DyeColor &col2 = *it;
-
- __m128i base2 = _mm_and_si128(mask, base);
- __m128i newMask = _mm_set1_epi32(col2.valueSOgl);
- __m128i cmpMask = _mm_set1_epi32(col.valueSOgl);
- __m128i cmpRes = _mm_cmpeq_epi32(base2, cmpMask);
- cmpRes = _mm_and_si128(mask, cmpRes);
- __m128i srcAnd = _mm_andnot_si128(cmpRes, base);
- __m128i dstAnd = _mm_and_si128(cmpRes, newMask);
- base = _mm_or_si128(srcAnd, dstAnd);
- ++ it;
- }
-// _mm_store_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
- _mm_storeu_si128(reinterpret_cast<__m128i*>(&pixels[ptr]), base);
- }