xbrz_8cpp_source.html

 /*

    Copyright (C) 2014 - 2018 by Chris Beck <render787@gmail.com>

    Part of the Battle for Wesnoth Project https://www.wesnoth.org/

    This program is free software; you can redistribute it and/or modify

    it under the terms of the GNU General Public License as published by

    the Free Software Foundation; either version 2 of the License, or

    (at your option) any later version.

    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY.

    See the COPYING file for more details.


    This is a derivative work of the xBRZ component of the HqMAME project

    by Zenju. The original Licensing statement follows, indented with //

    The primary changes are, syntactic to make it compile with C99+Boost,

    and to make it handle an alpha channel in the image in a manner proper

    for SDL.


    It is not possible to extend the MAME 'special exception' to all of

    the Battle for Wesnoth project, however, the special exception is

    granted for my derivative forms of this work.

 */


 // ****************************************************************************

 // * This file is part of the HqMAME project. It is distributed under         *

 // * GNU General Public License: http://www.gnu.org/licenses/gpl.html         *

 // * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved          *

 // *                                                                          *

 // * Additionally and as a special exception, the author gives permission     *

 // * to link the code of this program with the MAME library (or with modified *

 // * versions of MAME that use the same license as MAME), and distribute      *

 // * linked combinations including the two. You must obey the GNU General     *

 // * Public License in all respects for all of the code used other than MAME. *

 // * If you modify this file, you may extend this exception to your version   *

 // * of the file, but you are not obligated to do so. If you do not wish to   *

 // * do so, delete this exception statement from your version.                *

 // ****************************************************************************


 #include "xBRZ/xbrz.hpp"

 #include "config.hpp"

 #include <cassert>

 #include <cmath>

 #include <algorithm>


 namespace

 {

 template <uint32_t N> inline

 unsigned char getByte(uint32_t val) { return static_cast<unsigned char>((val >> (8 * N)) & 0xff); }


 inline unsigned char getRed  (uint32_t val) { return getByte<2>(val); }

 inline unsigned char getGreen(uint32_t val) { return getByte<1>(val); }

 inline unsigned char getBlue (uint32_t val) { return getByte<0>(val); }


 template <class T> inline

 T abs(T value)

 {

     //static_assert(std::is_signed<T>::value, "");

     return value < 0 ? -value : value;

 }


 const uint32_t redMask   = 0xff0000;

 const uint32_t greenMask = 0x00ff00;

 const uint32_t blueMask  = 0x0000ff;

 const uint32_t alphaMask  = 0xff000000;


 template <unsigned int N, unsigned int M> inline

 void alphaBlend(uint32_t& dst, uint32_t col) //blend color over destination with opacity N / M

 {

     //static_assert(N < 256, "possible overflow of (col & redMask) * N");

     //static_assert(M < 256, "possible overflow of (col & redMask  ) * N + (dst & redMask  ) * (M - N)");

     //static_assert(0 < N && N < M, "");


     //Note: I had to change this to perform alpha compositing -- xbrz assumes there is no alpha channel (and sets it to zero when it blends), our

     //sprites have alpha however.

     uint32_t col_alpha = col >> 24; // & with alphaMask is unnecessary


     if (!col_alpha) return;


     uint32_t dst_alpha = dst >> 24;


     if (!dst_alpha) {

         dst = col;

         return;

     }


     //uint32_t out_alpha = 0xffff - (((0xff - col_alpha)* (0xff - dst_alpha)) >> 8);


     //TODO: Figure out if there's some way to combine the multiplicative approached with the "averaged alpha", and to feedback the

     // alpha into the colors, without making it all very slow. Current approach looks okay, but I think shadows could be better,

     // also I think some units are getting 'black outlines' now because their black pixels with 0 alpha (background) are getting

     // averaged with their foreground.


     dst = (redMask   & ((col & redMask   ) * N + (dst & redMask   ) * (M - N)) / M) | //this works because 8 upper bits are free

           (greenMask & ((col & greenMask ) * N + (dst & greenMask ) * (M - N)) / M) |

           (blueMask  & ((col & blueMask  ) * N + (dst & blueMask  ) * (M - N)) / M) |

           (alphaMask & (((col_alpha       * N + dst_alpha * (M - N)) / M) << 24)); // need to downshift and upshift because of overflow


 /*

     if (!(dst >> 24)) {

     dst = (col & (redMask | greenMask | blueMask)) |

               (((((col >> 24) * N) / M) << 24) & alphaMask);

     return;

     }

 */

 /*


     double src_alpha = static_cast<double>(col >> 24) / 256; //xbrz basically assumes there is no alpha channel, our sprites have alpha however.

     double dst_alpha = static_cast<double>(dst >> 24) / 256;


     src_alpha = 1 - ((1 - src_alpha) * (1 - (N/M))); //apply blending arguments


     // For discussion of alpha compositing, see here: http://en.wikipedia.org/wiki/Alpha_compositing#Analytical_derivation_of_the_over_operator

     double out_alpha = 1 - ((1- src_alpha) * (1-dst_alpha));


     double src_coeff = src_alpha / out_alpha;


     double dst_coeff = dst_alpha / out_alpha;


     uint32_t red_val = (((col & redMask  ) >> 16) * src_coeff) + (((dst & redMask  ) >> 16) * dst_coeff);


     uint32_t grn_val = (((col & greenMask) >> 8 ) * src_coeff) + (((dst & greenMask) >> 8 ) * dst_coeff);


     uint32_t blu_val = (((col & blueMask ) >> 0 ) * src_coeff) + (((dst & blueMask ) >> 0 ) * dst_coeff);


     dst = (red_val << 16) |

           (grn_val << 8 ) |

           (blu_val << 0) |

           (alphaMask & (static_cast<uint32_t>(256 * out_alpha) << 24));

 //    0xff000000; //adding this to try to get rid of black outlines, there are code comments that say 0 is transparent for SDL, not 255 -- iceiceice

 */

 }


 //inline

 //double fastSqrt(double n)

 //{

 //    __asm //speeds up xBRZ by about 9% compared to std::sqrt

 //    {

 //        fld n

 //        fsqrt

 //    }

 //}

 //


 #if 0

 inline

 uint32_t alphaBlend2(uint32_t pix1, uint32_t pix2, double alpha)

 {

     return (redMask   & static_cast<uint32_t>((pix1 & redMask  ) * alpha + (pix2 & redMask  ) * (1 - alpha))) |

            (greenMask & static_cast<uint32_t>((pix1 & greenMask) * alpha + (pix2 & greenMask) * (1 - alpha))) |

            (blueMask  & static_cast<uint32_t>((pix1 & blueMask ) * alpha + (pix2 & blueMask ) * (1 - alpha)));

 }

 #endif


 uint32_t*       byteAdvance(      uint32_t* ptr, int bytes) {  return reinterpret_cast<      uint32_t*>(reinterpret_cast<      char*>(ptr) + bytes); }

 const uint32_t* byteAdvance(const uint32_t* ptr, int bytes) {  return reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(ptr) + bytes); }


 //fill block  with the given color

 inline

 void fillBlock(uint32_t* trg, int pitch, uint32_t col, int blockWidth, int blockHeight)

 {

     //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))

     //    std::fill(trg, trg + blockWidth, col);


     for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))

         for (int x = 0; x < blockWidth; ++x)

             trg[x] = col;

 }


 inline

 void fillBlock(uint32_t* trg, int pitch, uint32_t col, int n) { fillBlock(trg, pitch, col, n, n); }


 #ifdef _MSC_VER

 #define FORCE_INLINE __forceinline

 #elif defined __GNUC__

 #define FORCE_INLINE __attribute__((always_inline)) inline

 #else

 #define FORCE_INLINE inline

 #endif


 enum RotationDegree //clock-wise

 {

     ROT_0,

     ROT_90,

     ROT_180,

     ROT_270

 };


 //calculate input matrix coordinates after rotation at compile time

 template <RotationDegree rotDeg, size_t I, size_t J, size_t N>

 struct MatrixRotation;


 template <size_t I, size_t J, size_t N>

 struct MatrixRotation<ROT_0, I, J, N>

 {

     static const size_t I_old = I;

     static const size_t J_old = J;

 };


 template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix

 struct MatrixRotation

 {

     static const size_t I_old = N - 1 - MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation!

     static const size_t J_old =         MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::I_old; //

 };


 template <size_t N, RotationDegree rotDeg>

 class OutputMatrix

 {

 public:

     OutputMatrix(uint32_t* out, int outWidth) : //access matrix area, top-left at position "out" for image with given width

         out_(out),

         outWidth_(outWidth) {}


     template <size_t I, size_t J>

     uint32_t& ref() const

     {

         static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old;

         static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old;

         return *(out_ + J_old + I_old * outWidth_);

     }


 private:

     uint32_t* out_;

     const int outWidth_;

 };


 template <class T> inline

 T square(T value) { return value * value; }


 /*

 inline

 void rgbtoLuv(uint32_t c, double& L, double& u, double& v)

 {

     //http://www.easyrgb.com/index.php?X=MATH&H=02#text2

     double r = getRed  (c) / 255.0;

     double g = getGreen(c) / 255.0;

     double b = getBlue (c) / 255.0;


     if ( r > 0.04045 )

         r = std::pow(( ( r + 0.055 ) / 1.055 ) , 2.4);

     else

         r /= 12.92;

     if ( g > 0.04045 )

         g = std::pow(( ( g + 0.055 ) / 1.055 ) , 2.4);

     else

         g /=  12.92;

     if ( b > 0.04045 )

         b  = std::pow(( ( b + 0.055 ) / 1.055 ) , 2.4);

     else

         b /=  12.92;


     r *= 100;

     g *= 100;

     b *= 100;


     double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;

     double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;

     double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;

     //---------------------

     double var_U =  4 * x  / ( x +  15 * y  +  3 * z  );

     double var_V =  9 * y  / ( x +  15 * y  +  3 * z  );

     double var_Y = y / 100;


     if ( var_Y > 0.008856 ) var_Y = std::pow(var_Y , 1.0/3 );

     else                    var_Y =  7.787 * var_Y  +  16.0 / 116;


     const double ref_X =  95.047;        //Observer= 2 (degrees), Illuminant= D65

     const double ref_Y = 100.000;

     const double ref_Z = 108.883;


     const double ref_U = ( 4 * ref_X ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );

     const double ref_V = ( 9 * ref_Y ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );


     L = ( 116 * var_Y ) - 16;

     u = 13 * L * ( var_U - ref_U );

     v = 13 * L * ( var_V - ref_V );

 }

 */


 #if 0

 inline

 void rgbtoLab(uint32_t c, unsigned char& L, signed char& A, signed char& B)

 {

     //code: http://www.easyrgb.com/index.php?X=MATH

     //test: http://www.workwithcolor.com/color-converter-01.htm

     //------RGB to XYZ------

     double r = getRed  (c) / 255.0;

     double g = getGreen(c) / 255.0;

     double b = getBlue (c) / 255.0;


     r = r > 0.04045 ? std::pow(( r + 0.055 ) / 1.055, 2.4) : r / 12.92;

     r = g > 0.04045 ? std::pow(( g + 0.055 ) / 1.055, 2.4) : g / 12.92;

     r = b > 0.04045 ? std::pow(( b + 0.055 ) / 1.055, 2.4) : b / 12.92;


     r *= 100;

     g *= 100;

     b *= 100;


     double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;

     double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;

     double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;

     //------XYZ to Lab------

     const double refX = 95.047;  //

     const double refY = 100.000; //Observer= 2 (degrees), Illuminant= D65

     const double refZ = 108.883; //

     double var_X = x / refX;

     double var_Y = y / refY;

     double var_Z = z / refZ;


     var_X = var_X > 0.008856 ? std::pow(var_X, 1.0 / 3) : 7.787 * var_X + 4.0 / 29;

     var_Y = var_Y > 0.008856 ? std::pow(var_Y, 1.0 / 3) : 7.787 * var_Y + 4.0 / 29;

     var_Z = var_Z > 0.008856 ? std::pow(var_Z, 1.0 / 3) : 7.787 * var_Z + 4.0 / 29;


     L = static_cast<unsigned char>(116 * var_Y  - 16);

     A = static_cast<  signed char>(500 * (var_X - var_Y));

     B = static_cast<  signed char>(200 * (var_Y - var_Z));

 };

 #endif


 #if 0

 inline

 double distLAB(uint32_t pix1, uint32_t pix2)

 {

     unsigned char L1 = 0; //[0, 100]

     signed   char a1 = 0; //[-128, 127]

     signed   char b1 = 0; //[-128, 127]

     rgbtoLab(pix1, L1, a1, b1);


     unsigned char L2 = 0;

     signed   char a2 = 0;

     signed   char b2 = 0;

     rgbtoLab(pix2, L2, a2, b2);


     //-----------------------------

     //http://www.easyrgb.com/index.php?X=DELT


     //Delta E/CIE76

     return std::sqrt(square(1.0 * L1 - L2) +

                      square(1.0 * a1 - a2) +

                      square(1.0 * b1 - b2));

 }

 #endif


 /*

 inline

 void rgbtoHsl(uint32_t c, double& h, double& s, double& l)

 {

     //http://www.easyrgb.com/index.php?X=MATH&H=18#text18

     const int r = getRed  (c);

     const int g = getGreen(c);

     const int b = getBlue (c);


     const int varMin = numeric::min(r, g, b);

     const int varMax = numeric::max(r, g, b);

     const int delMax = varMax - varMin;


     l = (varMax + varMin) / 2.0 / 255.0;


     if (delMax == 0) //gray, no chroma...

     {

         h = 0;

         s = 0;

     }

     else

     {

         s = l < 0.5 ?

             delMax / (1.0 * varMax + varMin) :

             delMax / (2.0 * 255 - varMax - varMin);


         double delR = ((varMax - r) / 6.0 + delMax / 2.0) / delMax;

         double delG = ((varMax - g) / 6.0 + delMax / 2.0) / delMax;

         double delB = ((varMax - b) / 6.0 + delMax / 2.0) / delMax;


         if (r == varMax)

             h = delB - delG;

         else if (g == varMax)

             h = 1 / 3.0 + delR - delB;

         else if (b == varMax)

             h = 2 / 3.0 + delG - delR;


         if (h < 0)

             h += 1;

         if (h > 1)

             h -= 1;

     }

 }


 inline

 double distHSL(uint32_t pix1, uint32_t pix2, double lightningWeight)

 {

     double h1 = 0;

     double s1 = 0;

     double l1 = 0;

     rgbtoHsl(pix1, h1, s1, l1);

     double h2 = 0;

     double s2 = 0;

     double l2 = 0;

     rgbtoHsl(pix2, h2, s2, l2);


     //HSL is in cylindric coordinatates where L represents height, S radius, H angle,

     //however we interpret the cylinder as a bi-conic solid with top/bottom radius 0, middle radius 1

     assert(0 <= h1 && h1 <= 1);

     assert(0 <= h2 && h2 <= 1);


     double r1 = l1 < 0.5 ?

                 l1 * 2 :

                 2 - l1 * 2;


     double x1 = r1 * s1 * std::cos(h1 * 2 * numeric::pi);

     double y1 = r1 * s1 * std::sin(h1 * 2 * numeric::pi);

     double z1 = l1;


     double r2 = l2 < 0.5 ?

                 l2 * 2 :

                 2 - l2 * 2;


     double x2 = r2 * s2 * std::cos(h2 * 2 * numeric::pi);

     double y2 = r2 * s2 * std::sin(h2 * 2 * numeric::pi);

     double z2 = l2;


     return 255 * std::sqrt(square(x1 - x2) + square(y1 - y2) +  square(lightningWeight * (z1 - z2)));

 }

 */


 #if 0

 inline

 double distRGB(uint32_t pix1, uint32_t pix2)

 {

     const double r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2);

     const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);

     const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);


     //euklidean RGB distance

     return std::sqrt(square(r_diff) + square(g_diff) + square(b_diff));

 }

 #endif


 #if 0

 inline

 double distNonLinearRGB(uint32_t pix1, uint32_t pix2)

 {

     //non-linear rgb: http://www.compuphase.com/cmetric.htm

     const double r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2);

     const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);

     const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);


     const double r_avg = (static_cast<double>(getRed(pix1)) + getRed(pix2)) / 2;

     return std::sqrt((2 + r_avg / 255) * square(r_diff) + 4 * square(g_diff) + (2 + (255 - r_avg) / 255) * square(b_diff));

 }

 #endif


 inline

 double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)

 {

     //http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion

     //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!

     const int r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2); //we may delay division by 255 to after matrix multiplication

     const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //

     const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //subtraction for int is noticeable faster than for double!


     const double k_b = 0.0722; //ITU-R BT.709 conversion

     const double k_r = 0.2126; //

     const double k_g = 1 - k_b - k_r;


     const double scale_b = 0.5 / (1 - k_b);

     const double scale_r = 0.5 / (1 - k_r);


     const double y   = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!

     const double c_b = scale_b * (b_diff - y);

     const double c_r = scale_r * (r_diff - y);


     //we skip division by 255 to have similar range like other distance functions

     return std::sqrt(square(lumaWeight * y) + square(c_b) +  square(c_r));

 }


 #if 0

 inline

 double distYUV(uint32_t pix1, uint32_t pix2, double luminanceWeight)

 {

     //perf: it's not worthwhile to buffer the YUV-conversion, the direct code is faster by ~ 6%

     //since RGB -> YUV conversion is essentially a matrix multiplication, we can calculate the RGB diff before the conversion (distributive property)

     const double r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2);

     const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);

     const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);


     //http://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB

     const double w_b = 0.114;

     const double w_r = 0.299;

     const double w_g = 1 - w_r - w_b;


     const double u_max = 0.436;

     const double v_max = 0.615;


     const double scale_u = u_max / (1 - w_b);

     const double scale_v = v_max / (1 - w_r);


     double y = w_r * r_diff + w_g * g_diff + w_b * b_diff;//value range: 255 * [-1, 1]

     double u = scale_u * (b_diff - y);                    //value range: 255 * 2 * u_max * [-1, 1]

     double v = scale_v * (r_diff - y);                    //value range: 255 * 2 * v_max * [-1, 1]


 #ifndef NDEBUG

     const double eps = 0.5;

 #endif

     assert(std::abs(y) <= 255 + eps);

     assert(std::abs(u) <= 255 * 2 * u_max + eps);

     assert(std::abs(v) <= 255 * 2 * v_max + eps);


     return std::sqrt(square(luminanceWeight * y) + square(u) +  square(v));

 }

 #endif


 inline

 double colorDist(uint32_t pix1, uint32_t pix2, double luminanceWeight)

 {

     if (pix1 == pix2) //about 8% perf boost

         return 0;


     //return distHSL(pix1, pix2, luminanceWeight);

     //return distRGB(pix1, pix2);

     //return distLAB(pix1, pix2);

     //return distNonLinearRGB(pix1, pix2);

     //return distYUV(pix1, pix2, luminanceWeight);


     return distYCbCr(pix1, pix2, luminanceWeight);

 }


 enum BlendType

 {

     BLEND_NONE = 0,

     BLEND_NORMAL,   //a normal indication to blend

     BLEND_DOMINANT, //a strong indication to blend

     //attention: BlendType must fit into the value range of 2 bit!!!

 };


 struct BlendResult

 {

     BlendType

     /**/blend_f, blend_g,

     /**/blend_j, blend_k;


     BlendResult() : blend_f(), blend_g(), blend_j(), blend_k() {}

 };


 struct Kernel_4x4 //kernel for preprocessing step

 {

     uint32_t

     /**/a=0, b=0, c=0, d=0,

     /**/e=0, f=0, g=0, h=0,

     /**/i=0, j=0, k=0, l=0,

     /**/m=0, n=0, o=0, p=0;


     Kernel_4x4() {}

 };


 /*

 input kernel area naming convention:

 -----------------

 | A | B | C | D |

 ----|---|---|---|

 | E | F | G | H |   //evaluate the four corners between F, G, J, K

 ----|---|---|---|   //input pixel is at position F

 | I | J | K | L |

 ----|---|---|---|

 | M | N | O | P |

 -----------------

 */

 FORCE_INLINE //detect blend direction

 BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) //result: F, G, J, K corners of "GradientType"

 {

     BlendResult result;


     if ((ker.f == ker.g &&

          ker.j == ker.k) ||

         (ker.f == ker.j &&

          ker.g == ker.k))

         return result;


     auto dist = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };


     const int weight = 4;

     double jg = dist(ker.i, ker.f) + dist(ker.f, ker.c) + dist(ker.n, ker.k) + dist(ker.k, ker.h) + weight * dist(ker.j, ker.g);

     double fk = dist(ker.e, ker.j) + dist(ker.j, ker.o) + dist(ker.b, ker.g) + dist(ker.g, ker.l) + weight * dist(ker.f, ker.k);


     if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8

     {

         const bool dominantGradient = cfg.dominantDirectionThreshold * jg < fk;

         if (ker.f != ker.g && ker.f != ker.j)

             result.blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;


         if (ker.k != ker.j && ker.k != ker.g)

             result.blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;

     }

     else if (fk < jg)

     {

         const bool dominantGradient = cfg.dominantDirectionThreshold * fk < jg;

         if (ker.j != ker.f && ker.j != ker.k)

             result.blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;


         if (ker.g != ker.f && ker.g != ker.k)

             result.blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;

     }

     return result;

 }


 struct Kernel_3x3

 {

     uint32_t

     /**/a=0,  b=0,  c=0,

     /**/d=0,  e=0,  f=0,

     /**/g=0,  h=0,  i=0;


     Kernel_3x3() {}

 };


 #define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }

 //we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token

 DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c)

 DEF_GETTER(d) DEF_GETTER(e) DEF_GETTER(f)

 DEF_GETTER(g) DEF_GETTER(h) DEF_GETTER(i)

 #undef DEF_GETTER


 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }

 /*DEF_GETTER(a, g)*/ DEF_GETTER(b, d) DEF_GETTER(c, a)

 DEF_GETTER(d, h) DEF_GETTER(e, e) DEF_GETTER(f, b)

 DEF_GETTER(g, i) DEF_GETTER(h, f) DEF_GETTER(i, c)

 #undef DEF_GETTER


 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }

 /*DEF_GETTER(a, i)*/ DEF_GETTER(b, h) DEF_GETTER(c, g)

 DEF_GETTER(d, f) DEF_GETTER(e, e) DEF_GETTER(f, d)

 DEF_GETTER(g, c) DEF_GETTER(h, b) DEF_GETTER(i, a)

 #undef DEF_GETTER


 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }

 /*DEF_GETTER(a, c)*/ DEF_GETTER(b, f) DEF_GETTER(c, i)

 DEF_GETTER(d, b) DEF_GETTER(e, e) DEF_GETTER(f, h)

 DEF_GETTER(g, a) DEF_GETTER(h, d) DEF_GETTER(i, g)

 #undef DEF_GETTER


 //compress four blend types into a single byte

 //inline BlendType getTopL   (unsigned char b) { return static_cast<BlendType>(0x3 & b); }

 inline BlendType getTopR   (unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 2)); }

 inline BlendType getBottomR(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 4)); }

 inline BlendType getBottomL(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 6)); }


 inline void setTopL   (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!

 inline void setTopR   (unsigned char& b, BlendType bt) { b |= (bt << 2); }

 inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); }

 inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); }


 inline bool blendingNeeded(unsigned char b) { return b != 0; }


 template <RotationDegree rotDeg> inline

 unsigned char rotateBlendInfo(unsigned char b) { return b; }

 template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; }

 template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; }

 template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }


 #ifndef NDEBUG

 int debugPixelX = -1;

 int debugPixelY = 84;

 bool breakIntoDebugger = false;

 #endif


 /*

 input kernel area naming convention:

 -------------

 | A | B | C |

 ----|---|---|

 | D | E | F | //input pixel is at position E

 ----|---|---|

 | G | H | I |

 -------------

 */

 template <class Scaler, RotationDegree rotDeg>

 FORCE_INLINE //perf: quite worth it!

 void scalePixel(const Kernel_3x3& ker,

                 uint32_t* target, int trgWidth,

                 unsigned char blendInfo, //result of preprocessing all four corners of pixel "e"

                 const xbrz::ScalerCfg& cfg)

 {

 #define a get_a<rotDeg>(ker)

 #define b get_b<rotDeg>(ker)

 #define c get_c<rotDeg>(ker)

 #define d get_d<rotDeg>(ker)

 #define e get_e<rotDeg>(ker)

 #define f get_f<rotDeg>(ker)

 #define g get_g<rotDeg>(ker)

 #define h get_h<rotDeg>(ker)

 #define i get_i<rotDeg>(ker)


 #ifndef NDEBUG

     (void) breakIntoDebugger;

     //if (breakIntoDebugger)

     //    __debugbreak(); //__asm int 3;

 #endif


     const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo);


     if (getBottomR(blend) >= BLEND_NORMAL)

     {

         auto eq   = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_) < cfg.equalColorTolerance_; };


         auto dist = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };


         const uint32_t px = dist(e, f) <= dist(e, h) ? f : h; //choose most similar color


         OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);


         bool doLineBlend = true;

         {

             if (getBottomR(blend) >= BLEND_DOMINANT)

                 doLineBlend = true;


             //make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes

             else if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90 (degrees) corners

                 doLineBlend = false;

             else if (getBottomL(blend) != BLEND_NONE && !eq(e, c))

                 doLineBlend = false;


             //no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")

             else if (eq(g, h) &&  eq(h , i) && eq(i, f) && eq(f, c) && !eq(e, i))

                 doLineBlend = false;


             else doLineBlend = true;

         }


         if (doLineBlend)

         {

             const double fg = dist(f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9

             const double hc = dist(h, c); //


             const bool haveShallowLine = cfg.steepDirectionThreshold * fg <= hc && e != g && d != g;

             const bool haveSteepLine   = cfg.steepDirectionThreshold * hc <= fg && e != c && b != c;


             if (haveShallowLine)

             {

                 if (haveSteepLine)

                     Scaler::blendLineSteepAndShallow(px, out);

                 else

                     Scaler::blendLineShallow(px, out);

             }

             else

             {

                 if (haveSteepLine)

                     Scaler::blendLineSteep(px, out);

                 else

                     Scaler::blendLineDiagonal(px,out);

             }

         }

         else

             Scaler::blendCorner(px, out);

     }


 #undef a

 #undef b

 #undef c

 #undef d

 #undef e

 #undef f

 #undef g

 #undef h

 #undef i

 }


 template <class Scaler> //scaler policy: see "Scaler2x" reference implementation

 void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)

 {

     yFirst = std::max(yFirst, 0);

     yLast  = std::min(yLast, srcHeight);

     if (yFirst >= yLast || srcWidth <= 0)

         return;


     const int trgWidth = srcWidth * Scaler::scale;


     //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of

     //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing

     const int bufferSize = srcWidth;

     unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;

     std::fill(preProcBuffer, preProcBuffer + bufferSize, static_cast<unsigned char>(0));

     //static_assert(BLEND_NONE == 0, "");


     //initialize preprocessing buffer for first row: detect upper left and right corner blending

     //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!

     if (yFirst > 0)

     {

         const int y = yFirst - 1;


         const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);

         const uint32_t* s_0  = src + srcWidth * y; //center line

         const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);

         const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);


         for (int x = 0; x < srcWidth; ++x)

         {

             const int x_m1 = std::max(x - 1, 0);

             const int x_p1 = std::min(x + 1, srcWidth - 1);

             const int x_p2 = std::min(x + 2, srcWidth - 1);


             Kernel_4x4 ker; //perf: initialization is negligible

             ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible

             ker.b = s_m1[x];

             ker.c = s_m1[x_p1];

             ker.d = s_m1[x_p2];


             ker.e = s_0[x_m1];

             ker.f = s_0[x];

             ker.g = s_0[x_p1];

             ker.h = s_0[x_p2];


             ker.i = s_p1[x_m1];

             ker.j = s_p1[x];

             ker.k = s_p1[x_p1];

             ker.l = s_p1[x_p2];


             ker.m = s_p2[x_m1];

             ker.n = s_p2[x];

             ker.o = s_p2[x_p1];

             ker.p = s_p2[x_p2];


             const BlendResult res = preProcessCorners(ker, cfg);

             /*

             preprocessing blend result:

             ---------

             | F | G |   //evaluate corner between F, G, J, K

             ----|---|   //input pixel is at position F

             | J | K |

             ---------

             */

             setTopR(preProcBuffer[x], res.blend_j);


             if (x + 1 < srcWidth)

                 setTopL(preProcBuffer[x + 1], res.blend_k);

         }

     }

     //------------------------------------------------------------------------------------


     for (int y = yFirst; y < yLast; ++y)

     {

         uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access


         const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);

         const uint32_t* s_0  = src + srcWidth * y; //center line

         const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);

         const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);


         unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position


         for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)

         {

 #ifndef NDEBUG

             breakIntoDebugger = debugPixelX == x && debugPixelY == y;

 #endif

             //all those bounds checks have only insignificant impact on performance!

             const int x_m1 = std::max(x - 1, 0); //perf: prefer array indexing to additional pointers!

             const int x_p1 = std::min(x + 1, srcWidth - 1);

             const int x_p2 = std::min(x + 2, srcWidth - 1);


             //evaluate the four corners on bottom-right of current pixel

             unsigned char blend_xy = 0; //for current (x, y) position

             {

                 Kernel_4x4 ker; //perf: initialization is negligible

                 ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible

                 ker.b = s_m1[x];

                 ker.c = s_m1[x_p1];

                 ker.d = s_m1[x_p2];


                 ker.e = s_0[x_m1];

                 ker.f = s_0[x];

                 ker.g = s_0[x_p1];

                 ker.h = s_0[x_p2];


                 ker.i = s_p1[x_m1];

                 ker.j = s_p1[x];

                 ker.k = s_p1[x_p1];

                 ker.l = s_p1[x_p2];


                 ker.m = s_p2[x_m1];

                 ker.n = s_p2[x];

                 ker.o = s_p2[x_p1];

                 ker.p = s_p2[x_p2];


                 const BlendResult res = preProcessCorners(ker, cfg);

                 /*

                 preprocessing blend result:

                 ---------

                 | F | G |   // evaluate corner between F, G, J, K

                 ----|---|   // current input pixel is at position F

                 | J | K |

                 ---------

                 */

                 blend_xy = preProcBuffer[x];

                 setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!


                 setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)

                 preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row


                 blend_xy1 = 0;

                 setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column


                 if (x + 1 < srcWidth) //set 3rd known corner for (x + 1, y)

                     setBottomL(preProcBuffer[x + 1], res.blend_g);

             }


             //fill block of size scale * scale with the given color

             fillBlock(out, trgWidth * sizeof(uint32_t), s_0[x], Scaler::scale); //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!


             //blend four corners of current pixel

             if (blendingNeeded(blend_xy)) //good 20% perf-improvement

             {

                 Kernel_3x3 ker; //perf: initialization is negligible


                 ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible

                 ker.b = s_m1[x];

                 ker.c = s_m1[x_p1];


                 ker.d = s_0[x_m1];

                 ker.e = s_0[x];

                 ker.f = s_0[x_p1];


                 ker.g = s_p1[x_m1];

                 ker.h = s_p1[x];

                 ker.i = s_p1[x_p1];


                 scalePixel<Scaler, ROT_0  >(ker, out, trgWidth, blend_xy, cfg);

                 scalePixel<Scaler, ROT_90 >(ker, out, trgWidth, blend_xy, cfg);

                 scalePixel<Scaler, ROT_180>(ker, out, trgWidth, blend_xy, cfg);

                 scalePixel<Scaler, ROT_270>(ker, out, trgWidth, blend_xy, cfg);

             }

         }

     }

 }


 struct Scaler2x

 {

     static const int scale = 2;


     template <class OutputMatrix>

     static void blendLineShallow(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);

         alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);

     }


     template <class OutputMatrix>

     static void blendLineSteep(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);

         alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);

     }


     template <class OutputMatrix>

     static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<1, 0>(), col);

         alphaBlend<1, 4>(out.template ref<0, 1>(), col);

         alphaBlend<5, 6>(out.template ref<1, 1>(), col); //[!] fixes 7/8 used in xBR

     }


     template <class OutputMatrix>

     static void blendLineDiagonal(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 2>(out.template ref<1, 1>(), col);

     }


     template <class OutputMatrix>

     static void blendCorner(uint32_t col, OutputMatrix& out)

     {

         //model a round corner

         alphaBlend<21, 100>(out.template ref<1, 1>(), col); //exact: 1 - pi/4 = 0.2146018366

     }

 };


 struct Scaler3x

 {

     static const int scale = 3;


     template <class OutputMatrix>

     static void blendLineShallow(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);

         alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);


         alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);

         out.template ref<scale - 1, 2>() = col;

     }


     template <class OutputMatrix>

     static void blendLineSteep(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);

         alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);


         alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);

         out.template ref<2, scale - 1>() = col;

     }


     template <class OutputMatrix>

     static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<2, 0>(), col);

         alphaBlend<1, 4>(out.template ref<0, 2>(), col);

         alphaBlend<3, 4>(out.template ref<2, 1>(), col);

         alphaBlend<3, 4>(out.template ref<1, 2>(), col);

         out.template ref<2, 2>() = col;

     }


     template <class OutputMatrix>

     static void blendLineDiagonal(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 8>(out.template ref<1, 2>(), col);

         alphaBlend<1, 8>(out.template ref<2, 1>(), col);

         alphaBlend<7, 8>(out.template ref<2, 2>(), col);

     }


     template <class OutputMatrix>

     static void blendCorner(uint32_t col, OutputMatrix& out)

     {

         //model a round corner

         alphaBlend<45, 100>(out.template ref<2, 2>(), col); //exact: 0.4545939598

         //alphaBlend<14, 1000>(out.template ref<2, 1>(), col); //0.01413008627 -> negligible

         //alphaBlend<14, 1000>(out.template ref<1, 2>(), col); //0.01413008627

     }

 };


 struct Scaler4x

 {

     static const int scale = 4;


     template <class OutputMatrix>

     static void blendLineShallow(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);

         alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);


         alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);

         alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);


         out.template ref<scale - 1, 2>() = col;

         out.template ref<scale - 1, 3>() = col;

     }


     template <class OutputMatrix>

     static void blendLineSteep(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);

         alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);


         alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);

         alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);


         out.template ref<2, scale - 1>() = col;

         out.template ref<3, scale - 1>() = col;

     }


     template <class OutputMatrix>

     static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<3, 4>(out.template ref<3, 1>(), col);

         alphaBlend<3, 4>(out.template ref<1, 3>(), col);

         alphaBlend<1, 4>(out.template ref<3, 0>(), col);

         alphaBlend<1, 4>(out.template ref<0, 3>(), col);

         alphaBlend<1, 3>(out.template ref<2, 2>(), col); //[!] fixes 1/4 used in xBR

         out.template ref<3, 3>() = out.template ref<3, 2>() = out.template ref<2, 3>() = col;

     }


     template <class OutputMatrix>

     static void blendLineDiagonal(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 2>(out.template ref<scale - 1, scale / 2    >(), col);

         alphaBlend<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);

         out.template ref<scale - 1, scale - 1>() = col;

     }


     template <class OutputMatrix>

     static void blendCorner(uint32_t col, OutputMatrix& out)

     {

         //model a round corner

         alphaBlend<68, 100>(out.template ref<3, 3>(), col); //exact: 0.6848532563

         alphaBlend< 9, 100>(out.template ref<3, 2>(), col); //0.08677704501

         alphaBlend< 9, 100>(out.template ref<2, 3>(), col); //0.08677704501

     }

 };


 struct Scaler5x

 {

     static const int scale = 5;


     template <class OutputMatrix>

     static void blendLineShallow(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);

         alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);

         alphaBlend<1, 4>(out.template ref<scale - 3, 4>(), col);


         alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);

         alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);


         out.template ref<scale - 1, 2>() = col;

         out.template ref<scale - 1, 3>() = col;

         out.template ref<scale - 1, 4>() = col;

         out.template ref<scale - 2, 4>() = col;

     }


     template <class OutputMatrix>

     static void blendLineSteep(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);

         alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);

         alphaBlend<1, 4>(out.template ref<4, scale - 3>(), col);


         alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);

         alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);


         out.template ref<2, scale - 1>() = col;

         out.template ref<3, scale - 1>() = col;

         out.template ref<4, scale - 1>() = col;

         out.template ref<4, scale - 2>() = col;

     }


     template <class OutputMatrix>

     static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);

         alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);

         alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);


         alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);

         alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);

         alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);


         out.template ref<2, scale - 1>() = col;

         out.template ref<3, scale - 1>() = col;


         out.template ref<scale - 1, 2>() = col;

         out.template ref<scale - 1, 3>() = col;


         out.template ref<4, scale - 1>() = col;


         alphaBlend<2, 3>(out.template ref<3, 3>(), col);

     }


     template <class OutputMatrix>

     static void blendLineDiagonal(uint32_t col, OutputMatrix& out)

     {

         alphaBlend<1, 8>(out.template ref<scale - 1, scale / 2    >(), col);

         alphaBlend<1, 8>(out.template ref<scale - 2, scale / 2 + 1>(), col);

         alphaBlend<1, 8>(out.template ref<scale - 3, scale / 2 + 2>(), col);


         alphaBlend<7, 8>(out.template ref<4, 3>(), col);

         alphaBlend<7, 8>(out.template ref<3, 4>(), col);


         out.template ref<4, 4>() = col;

     }


     template <class OutputMatrix>

     static void blendCorner(uint32_t col, OutputMatrix& out)

     {

         //model a round corner

         alphaBlend<86, 100>(out.template ref<4, 4>(), col); //exact: 0.8631434088

         alphaBlend<23, 100>(out.template ref<4, 3>(), col); //0.2306749731

         alphaBlend<23, 100>(out.template ref<3, 4>(), col); //0.2306749731

         //alphaBlend<8, 1000>(out.template ref<4, 2>(), col); //0.008384061834 -> negligible

         //alphaBlend<8, 1000>(out.template ref<2, 4>(), col); //0.008384061834

     }

 };

 }


 void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)

 {

     switch (factor)

     {

         case 2:

             return scaleImage<Scaler2x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);

         case 3:

             return scaleImage<Scaler3x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);

         case 4:

             return scaleImage<Scaler4x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);

         case 5:

             return scaleImage<Scaler5x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);

     }

     assert(false);

 }


 bool xbrz::equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)

 {

     return colorDist(col1, col2, luminanceWeight) < equalColorTolerance;

 }


 void xbrz::nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch,

                                 uint32_t* trg, int trgWidth, int trgHeight, int trgPitch,

                                 SliceType st, int yFirst, int yLast)

 {

     if (srcPitch < srcWidth * static_cast<int>(sizeof(uint32_t))  ||

         trgPitch < trgWidth * static_cast<int>(sizeof(uint32_t)))

     {

         assert(false);

         return;

     }


     switch (st)

     {

         case NN_SCALE_SLICE_SOURCE:

             //nearest-neighbor (going over source image - fast for upscaling, since source is read only once

             yFirst = std::max(yFirst, 0);

             yLast  = std::min(yLast, srcHeight);

             if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return;


             for (int y = yFirst; y < yLast; ++y)

             {

                 //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)

                 // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight


                 //keep within for loop to support MT input slices!

                 const int yTrg_first = ( y      * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)

                 const int yTrg_last  = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)

                 const int blockHeight = yTrg_last - yTrg_first;


                 if (blockHeight > 0)

                 {

                     const uint32_t* srcLine = byteAdvance(src, y * srcPitch);

                     uint32_t* trgLine  = byteAdvance(trg, yTrg_first * trgPitch);

                     int xTrg_first = 0;


                     for (int x = 0; x < srcWidth; ++x)

                     {

                         int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;

                         const int blockWidth = xTrg_last - xTrg_first;

                         if (blockWidth > 0)

                         {

                             xTrg_first = xTrg_last;

                             fillBlock(trgLine, trgPitch, srcLine[x], blockWidth, blockHeight);

                             trgLine += blockWidth;

                         }

                     }

                 }

             }

             break;


         case NN_SCALE_SLICE_TARGET:

             //nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)

             yFirst = std::max(yFirst, 0);

             yLast  = std::min(yLast, trgHeight);

             if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;


             for (int y = yFirst; y < yLast; ++y)

             {

                 uint32_t* trgLine = byteAdvance(trg, y * trgPitch);

                 const int ySrc = srcHeight * y / trgHeight;

                 const uint32_t* srcLine = byteAdvance(src, ySrc * srcPitch);

                 for (int x = 0; x < trgWidth; ++x)

                 {

                     const int xSrc = srcWidth * x / trgWidth;

                     trgLine[x] = srcLine[xSrc];

                 }

             }

             break;

     }

 }

draw::fill
void fill(const SDL_Rect &rect, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
Fill an area with the given colour.
Definition: draw.cpp:50

xbrz::equalColor
bool equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
Definition: xbrz.cpp:1206

xbrz::SliceType
SliceType
Definition: xbrz.hpp:83

xbrz::NN_SCALE_SLICE_TARGET
@ NN_SCALE_SLICE_TARGET
Definition: xbrz.hpp:85

xbrz::NN_SCALE_SLICE_SOURCE
@ NN_SCALE_SLICE_SOURCE
Definition: xbrz.hpp:84

xbrz::nearestNeighborScale
void nearestNeighborScale(const uint32_t *src, int srcWidth, int srcHeight, uint32_t *trg, int trgWidth, int trgHeight)
Definition: xbrz.hpp:100

xbrz::scale
void scale(size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight, const ScalerCfg &cfg=ScalerCfg(), int yFirst=0, int yLast=std::numeric_limits< int >::max())
Definition: xbrz.cpp:1189

xbrz::ScalerCfg
Definition: config.hpp:46

xbrz::ScalerCfg::luminanceWeight_
double luminanceWeight_
Definition: config.hpp:54

xbrz::ScalerCfg::steepDirectionThreshold
double steepDirectionThreshold
Definition: config.hpp:57

xbrz::ScalerCfg::dominantDirectionThreshold
double dominantDirectionThreshold
Definition: config.hpp:56

xbrz::ScalerCfg::equalColorTolerance_
double equalColorTolerance_
Definition: config.hpp:55

p
mock_party p
Definition: test_formula_core.cpp:69

n
static map_location::DIRECTION n
Definition: test_map_location.cpp:25

config.hpp

i
#define i

g
#define g

d
#define d

e
#define e

DEF_GETTER
#define DEF_GETTER(x)
Definition: xbrz.cpp:628

h
#define h

f
#define f

a
#define a

c
#define c

b
#define b

FORCE_INLINE
#define FORCE_INLINE
Definition: xbrz.cpp:184

xbrz.hpp