The Battle for Wesnoth  1.19.0-dev
xbrz.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2014 - 2018 by Chris Beck <render787@gmail.com>
3  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 2 of the License, or
7  (at your option) any later version.
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY.
10  See the COPYING file for more details.
11 
12  This is a derivative work of the xBRZ component of the HqMAME project
13  by Zenju. The original Licensing statement follows, indented with //
14  The primary changes are, syntactic to make it compile with C99+Boost,
15  and to make it handle an alpha channel in the image in a manner proper
16  for SDL.
17 
18  It is not possible to extend the MAME 'special exception' to all of
19  the Battle for Wesnoth project, however, the special exception is
20  granted for my derivative forms of this work.
21 */
22 
23 // ****************************************************************************
24 // * This file is part of the HqMAME project. It is distributed under *
25 // * GNU General Public License: http://www.gnu.org/licenses/gpl.html *
26 // * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved *
27 // * *
28 // * Additionally and as a special exception, the author gives permission *
29 // * to link the code of this program with the MAME library (or with modified *
30 // * versions of MAME that use the same license as MAME), and distribute *
31 // * linked combinations including the two. You must obey the GNU General *
32 // * Public License in all respects for all of the code used other than MAME. *
33 // * If you modify this file, you may extend this exception to your version *
34 // * of the file, but you are not obligated to do so. If you do not wish to *
35 // * do so, delete this exception statement from your version. *
36 // ****************************************************************************
37 
38 #include "xBRZ/xbrz.hpp"
39 #include "config.hpp"
40 #include <cassert>
41 #include <cmath>
42 #include <algorithm>
43 
44 
45 namespace
46 {
47 template <uint32_t N> inline
48 unsigned char getByte(uint32_t val) { return static_cast<unsigned char>((val >> (8 * N)) & 0xff); }
49 
50 inline unsigned char getRed (uint32_t val) { return getByte<2>(val); }
51 inline unsigned char getGreen(uint32_t val) { return getByte<1>(val); }
52 inline unsigned char getBlue (uint32_t val) { return getByte<0>(val); }
53 
54 template <class T> inline
55 T abs(T value)
56 {
57  //static_assert(std::is_signed<T>::value, "");
58  return value < 0 ? -value : value;
59 }
60 
61 const uint32_t redMask = 0xff0000;
62 const uint32_t greenMask = 0x00ff00;
63 const uint32_t blueMask = 0x0000ff;
64 const uint32_t alphaMask = 0xff000000;
65 
66 template <unsigned int N, unsigned int M> inline
67 void alphaBlend(uint32_t& dst, uint32_t col) //blend color over destination with opacity N / M
68 {
69  //static_assert(N < 256, "possible overflow of (col & redMask) * N");
70  //static_assert(M < 256, "possible overflow of (col & redMask ) * N + (dst & redMask ) * (M - N)");
71  //static_assert(0 < N && N < M, "");
72 
73  //Note: I had to change this to perform alpha compositing -- xbrz assumes there is no alpha channel (and sets it to zero when it blends), our
74  //sprites have alpha however.
75  uint32_t col_alpha = col >> 24; // & with alphaMask is unnecessary
76 
77  if (!col_alpha) return;
78 
79  uint32_t dst_alpha = dst >> 24;
80 
81  if (!dst_alpha) {
82  dst = col;
83  return;
84  }
85 
86  //uint32_t out_alpha = 0xffff - (((0xff - col_alpha)* (0xff - dst_alpha)) >> 8);
87 
88  //TODO: Figure out if there's some way to combine the multiplicative approached with the "averaged alpha", and to feedback the
89  // alpha into the colors, without making it all very slow. Current approach looks okay, but I think shadows could be better,
90  // also I think some units are getting 'black outlines' now because their black pixels with 0 alpha (background) are getting
91  // averaged with their foreground.
92 
93  dst = (redMask & ((col & redMask ) * N + (dst & redMask ) * (M - N)) / M) | //this works because 8 upper bits are free
94  (greenMask & ((col & greenMask ) * N + (dst & greenMask ) * (M - N)) / M) |
95  (blueMask & ((col & blueMask ) * N + (dst & blueMask ) * (M - N)) / M) |
96  (alphaMask & (((col_alpha * N + dst_alpha * (M - N)) / M) << 24)); // need to downshift and upshift because of overflow
97 
98 /*
99  if (!(dst >> 24)) {
100  dst = (col & (redMask | greenMask | blueMask)) |
101  (((((col >> 24) * N) / M) << 24) & alphaMask);
102  return;
103  }
104 */
105 /*
106 
107  double src_alpha = static_cast<double>(col >> 24) / 256; //xbrz basically assumes there is no alpha channel, our sprites have alpha however.
108  double dst_alpha = static_cast<double>(dst >> 24) / 256;
109 
110  src_alpha = 1 - ((1 - src_alpha) * (1 - (N/M))); //apply blending arguments
111 
112  // For discussion of alpha compositing, see here: http://en.wikipedia.org/wiki/Alpha_compositing#Analytical_derivation_of_the_over_operator
113  double out_alpha = 1 - ((1- src_alpha) * (1-dst_alpha));
114 
115  double src_coeff = src_alpha / out_alpha;
116 
117  double dst_coeff = dst_alpha / out_alpha;
118 
119 
120 
121  uint32_t red_val = (((col & redMask ) >> 16) * src_coeff) + (((dst & redMask ) >> 16) * dst_coeff);
122 
123  uint32_t grn_val = (((col & greenMask) >> 8 ) * src_coeff) + (((dst & greenMask) >> 8 ) * dst_coeff);
124 
125  uint32_t blu_val = (((col & blueMask ) >> 0 ) * src_coeff) + (((dst & blueMask ) >> 0 ) * dst_coeff);
126 
127 
128 
129  dst = (red_val << 16) |
130  (grn_val << 8 ) |
131  (blu_val << 0) |
132  (alphaMask & (static_cast<uint32_t>(256 * out_alpha) << 24));
133 // 0xff000000; //adding this to try to get rid of black outlines, there are code comments that say 0 is transparent for SDL, not 255 -- iceiceice
134 */
135 }
136 
137 
138 //inline
139 //double fastSqrt(double n)
140 //{
141 // __asm //speeds up xBRZ by about 9% compared to std::sqrt
142 // {
143 // fld n
144 // fsqrt
145 // }
146 //}
147 //
148 
149 #if 0
150 inline
151 uint32_t alphaBlend2(uint32_t pix1, uint32_t pix2, double alpha)
152 {
153  return (redMask & static_cast<uint32_t>((pix1 & redMask ) * alpha + (pix2 & redMask ) * (1 - alpha))) |
154  (greenMask & static_cast<uint32_t>((pix1 & greenMask) * alpha + (pix2 & greenMask) * (1 - alpha))) |
155  (blueMask & static_cast<uint32_t>((pix1 & blueMask ) * alpha + (pix2 & blueMask ) * (1 - alpha)));
156 }
157 #endif
158 
159 uint32_t* byteAdvance( uint32_t* ptr, int bytes) { return reinterpret_cast< uint32_t*>(reinterpret_cast< char*>(ptr) + bytes); }
160 const uint32_t* byteAdvance(const uint32_t* ptr, int bytes) { return reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(ptr) + bytes); }
161 
162 
163 //fill block with the given color
164 inline
165 void fillBlock(uint32_t* trg, int pitch, uint32_t col, int blockWidth, int blockHeight)
166 {
167  //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
168  // std::fill(trg, trg + blockWidth, col);
169 
170  for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
171  for (int x = 0; x < blockWidth; ++x)
172  trg[x] = col;
173 }
174 
175 inline
176 void fillBlock(uint32_t* trg, int pitch, uint32_t col, int n) { fillBlock(trg, pitch, col, n, n); }
177 
178 
179 #ifdef _MSC_VER
180 #define FORCE_INLINE __forceinline
181 #elif defined __GNUC__
182 #define FORCE_INLINE __attribute__((always_inline)) inline
183 #else
184 #define FORCE_INLINE inline
185 #endif
186 
187 
188 enum RotationDegree //clock-wise
189 {
190  ROT_0,
191  ROT_90,
192  ROT_180,
193  ROT_270
194 };
195 
196 //calculate input matrix coordinates after rotation at compile time
197 template <RotationDegree rotDeg, size_t I, size_t J, size_t N>
198 struct MatrixRotation;
199 
200 template <size_t I, size_t J, size_t N>
201 struct MatrixRotation<ROT_0, I, J, N>
202 {
203  static const size_t I_old = I;
204  static const size_t J_old = J;
205 };
206 
207 template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix
208 struct MatrixRotation
209 {
210  static const size_t I_old = N - 1 - MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation!
211  static const size_t J_old = MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::I_old; //
212 };
213 
214 
215 template <size_t N, RotationDegree rotDeg>
216 class OutputMatrix
217 {
218 public:
219  OutputMatrix(uint32_t* out, int outWidth) : //access matrix area, top-left at position "out" for image with given width
220  out_(out),
221  outWidth_(outWidth) {}
222 
223  template <size_t I, size_t J>
224  uint32_t& ref() const
225  {
226  static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old;
227  static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old;
228  return *(out_ + J_old + I_old * outWidth_);
229  }
230 
231 private:
232  uint32_t* out_;
233  const int outWidth_;
234 };
235 
236 
237 template <class T> inline
238 T square(T value) { return value * value; }
239 
240 
241 /*
242 inline
243 void rgbtoLuv(uint32_t c, double& L, double& u, double& v)
244 {
245  //http://www.easyrgb.com/index.php?X=MATH&H=02#text2
246  double r = getRed (c) / 255.0;
247  double g = getGreen(c) / 255.0;
248  double b = getBlue (c) / 255.0;
249 
250  if ( r > 0.04045 )
251  r = std::pow(( ( r + 0.055 ) / 1.055 ) , 2.4);
252  else
253  r /= 12.92;
254  if ( g > 0.04045 )
255  g = std::pow(( ( g + 0.055 ) / 1.055 ) , 2.4);
256  else
257  g /= 12.92;
258  if ( b > 0.04045 )
259  b = std::pow(( ( b + 0.055 ) / 1.055 ) , 2.4);
260  else
261  b /= 12.92;
262 
263  r *= 100;
264  g *= 100;
265  b *= 100;
266 
267  double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
268  double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
269  double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
270  //---------------------
271  double var_U = 4 * x / ( x + 15 * y + 3 * z );
272  double var_V = 9 * y / ( x + 15 * y + 3 * z );
273  double var_Y = y / 100;
274 
275  if ( var_Y > 0.008856 ) var_Y = std::pow(var_Y , 1.0/3 );
276  else var_Y = 7.787 * var_Y + 16.0 / 116;
277 
278  const double ref_X = 95.047; //Observer= 2 (degrees), Illuminant= D65
279  const double ref_Y = 100.000;
280  const double ref_Z = 108.883;
281 
282  const double ref_U = ( 4 * ref_X ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );
283  const double ref_V = ( 9 * ref_Y ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );
284 
285  L = ( 116 * var_Y ) - 16;
286  u = 13 * L * ( var_U - ref_U );
287  v = 13 * L * ( var_V - ref_V );
288 }
289 */
290 
291 #if 0
292 inline
293 void rgbtoLab(uint32_t c, unsigned char& L, signed char& A, signed char& B)
294 {
295  //code: http://www.easyrgb.com/index.php?X=MATH
296  //test: http://www.workwithcolor.com/color-converter-01.htm
297  //------RGB to XYZ------
298  double r = getRed (c) / 255.0;
299  double g = getGreen(c) / 255.0;
300  double b = getBlue (c) / 255.0;
301 
302  r = r > 0.04045 ? std::pow(( r + 0.055 ) / 1.055, 2.4) : r / 12.92;
303  r = g > 0.04045 ? std::pow(( g + 0.055 ) / 1.055, 2.4) : g / 12.92;
304  r = b > 0.04045 ? std::pow(( b + 0.055 ) / 1.055, 2.4) : b / 12.92;
305 
306  r *= 100;
307  g *= 100;
308  b *= 100;
309 
310  double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
311  double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
312  double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
313  //------XYZ to Lab------
314  const double refX = 95.047; //
315  const double refY = 100.000; //Observer= 2 (degrees), Illuminant= D65
316  const double refZ = 108.883; //
317  double var_X = x / refX;
318  double var_Y = y / refY;
319  double var_Z = z / refZ;
320 
321  var_X = var_X > 0.008856 ? std::pow(var_X, 1.0 / 3) : 7.787 * var_X + 4.0 / 29;
322  var_Y = var_Y > 0.008856 ? std::pow(var_Y, 1.0 / 3) : 7.787 * var_Y + 4.0 / 29;
323  var_Z = var_Z > 0.008856 ? std::pow(var_Z, 1.0 / 3) : 7.787 * var_Z + 4.0 / 29;
324 
325  L = static_cast<unsigned char>(116 * var_Y - 16);
326  A = static_cast< signed char>(500 * (var_X - var_Y));
327  B = static_cast< signed char>(200 * (var_Y - var_Z));
328 };
329 #endif
330 
331 #if 0
332 inline
333 double distLAB(uint32_t pix1, uint32_t pix2)
334 {
335  unsigned char L1 = 0; //[0, 100]
336  signed char a1 = 0; //[-128, 127]
337  signed char b1 = 0; //[-128, 127]
338  rgbtoLab(pix1, L1, a1, b1);
339 
340  unsigned char L2 = 0;
341  signed char a2 = 0;
342  signed char b2 = 0;
343  rgbtoLab(pix2, L2, a2, b2);
344 
345  //-----------------------------
346  //http://www.easyrgb.com/index.php?X=DELT
347 
348  //Delta E/CIE76
349  return std::sqrt(square(1.0 * L1 - L2) +
350  square(1.0 * a1 - a2) +
351  square(1.0 * b1 - b2));
352 }
353 #endif
354 
355 /*
356 inline
357 void rgbtoHsl(uint32_t c, double& h, double& s, double& l)
358 {
359  //http://www.easyrgb.com/index.php?X=MATH&H=18#text18
360  const int r = getRed (c);
361  const int g = getGreen(c);
362  const int b = getBlue (c);
363 
364  const int varMin = numeric::min(r, g, b);
365  const int varMax = numeric::max(r, g, b);
366  const int delMax = varMax - varMin;
367 
368  l = (varMax + varMin) / 2.0 / 255.0;
369 
370  if (delMax == 0) //gray, no chroma...
371  {
372  h = 0;
373  s = 0;
374  }
375  else
376  {
377  s = l < 0.5 ?
378  delMax / (1.0 * varMax + varMin) :
379  delMax / (2.0 * 255 - varMax - varMin);
380 
381  double delR = ((varMax - r) / 6.0 + delMax / 2.0) / delMax;
382  double delG = ((varMax - g) / 6.0 + delMax / 2.0) / delMax;
383  double delB = ((varMax - b) / 6.0 + delMax / 2.0) / delMax;
384 
385  if (r == varMax)
386  h = delB - delG;
387  else if (g == varMax)
388  h = 1 / 3.0 + delR - delB;
389  else if (b == varMax)
390  h = 2 / 3.0 + delG - delR;
391 
392  if (h < 0)
393  h += 1;
394  if (h > 1)
395  h -= 1;
396  }
397 }
398 
399 inline
400 double distHSL(uint32_t pix1, uint32_t pix2, double lightningWeight)
401 {
402  double h1 = 0;
403  double s1 = 0;
404  double l1 = 0;
405  rgbtoHsl(pix1, h1, s1, l1);
406  double h2 = 0;
407  double s2 = 0;
408  double l2 = 0;
409  rgbtoHsl(pix2, h2, s2, l2);
410 
411  //HSL is in cylindric coordinatates where L represents height, S radius, H angle,
412  //however we interpret the cylinder as a bi-conic solid with top/bottom radius 0, middle radius 1
413  assert(0 <= h1 && h1 <= 1);
414  assert(0 <= h2 && h2 <= 1);
415 
416  double r1 = l1 < 0.5 ?
417  l1 * 2 :
418  2 - l1 * 2;
419 
420  double x1 = r1 * s1 * std::cos(h1 * 2 * numeric::pi);
421  double y1 = r1 * s1 * std::sin(h1 * 2 * numeric::pi);
422  double z1 = l1;
423 
424  double r2 = l2 < 0.5 ?
425  l2 * 2 :
426  2 - l2 * 2;
427 
428  double x2 = r2 * s2 * std::cos(h2 * 2 * numeric::pi);
429  double y2 = r2 * s2 * std::sin(h2 * 2 * numeric::pi);
430  double z2 = l2;
431 
432  return 255 * std::sqrt(square(x1 - x2) + square(y1 - y2) + square(lightningWeight * (z1 - z2)));
433 }
434 */
435 
436 #if 0
437 inline
438 double distRGB(uint32_t pix1, uint32_t pix2)
439 {
440  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
441  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
442  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
443 
444  //euklidean RGB distance
445  return std::sqrt(square(r_diff) + square(g_diff) + square(b_diff));
446 }
447 #endif
448 
449 #if 0
450 inline
451 double distNonLinearRGB(uint32_t pix1, uint32_t pix2)
452 {
453  //non-linear rgb: http://www.compuphase.com/cmetric.htm
454  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
455  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
456  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
457 
458  const double r_avg = (static_cast<double>(getRed(pix1)) + getRed(pix2)) / 2;
459  return std::sqrt((2 + r_avg / 255) * square(r_diff) + 4 * square(g_diff) + (2 + (255 - r_avg) / 255) * square(b_diff));
460 }
461 #endif
462 
463 inline
464 double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
465 {
466  //http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
467  //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
468  const int r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2); //we may delay division by 255 to after matrix multiplication
469  const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
470  const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //subtraction for int is noticeable faster than for double!
471 
472  const double k_b = 0.0722; //ITU-R BT.709 conversion
473  const double k_r = 0.2126; //
474  const double k_g = 1 - k_b - k_r;
475 
476  const double scale_b = 0.5 / (1 - k_b);
477  const double scale_r = 0.5 / (1 - k_r);
478 
479  const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
480  const double c_b = scale_b * (b_diff - y);
481  const double c_r = scale_r * (r_diff - y);
482 
483  //we skip division by 255 to have similar range like other distance functions
484  return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
485 }
486 
487 #if 0
488 inline
489 double distYUV(uint32_t pix1, uint32_t pix2, double luminanceWeight)
490 {
491  //perf: it's not worthwhile to buffer the YUV-conversion, the direct code is faster by ~ 6%
492  //since RGB -> YUV conversion is essentially a matrix multiplication, we can calculate the RGB diff before the conversion (distributive property)
493  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
494  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
495  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
496 
497  //http://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB
498  const double w_b = 0.114;
499  const double w_r = 0.299;
500  const double w_g = 1 - w_r - w_b;
501 
502  const double u_max = 0.436;
503  const double v_max = 0.615;
504 
505  const double scale_u = u_max / (1 - w_b);
506  const double scale_v = v_max / (1 - w_r);
507 
508  double y = w_r * r_diff + w_g * g_diff + w_b * b_diff;//value range: 255 * [-1, 1]
509  double u = scale_u * (b_diff - y); //value range: 255 * 2 * u_max * [-1, 1]
510  double v = scale_v * (r_diff - y); //value range: 255 * 2 * v_max * [-1, 1]
511 
512 #ifndef NDEBUG
513  const double eps = 0.5;
514 #endif
515  assert(std::abs(y) <= 255 + eps);
516  assert(std::abs(u) <= 255 * 2 * u_max + eps);
517  assert(std::abs(v) <= 255 * 2 * v_max + eps);
518 
519  return std::sqrt(square(luminanceWeight * y) + square(u) + square(v));
520 }
521 #endif
522 
523 inline
524 double colorDist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
525 {
526  if (pix1 == pix2) //about 8% perf boost
527  return 0;
528 
529  //return distHSL(pix1, pix2, luminanceWeight);
530  //return distRGB(pix1, pix2);
531  //return distLAB(pix1, pix2);
532  //return distNonLinearRGB(pix1, pix2);
533  //return distYUV(pix1, pix2, luminanceWeight);
534 
535  return distYCbCr(pix1, pix2, luminanceWeight);
536 }
537 
538 
539 enum BlendType
540 {
541  BLEND_NONE = 0,
542  BLEND_NORMAL, //a normal indication to blend
543  BLEND_DOMINANT, //a strong indication to blend
544  //attention: BlendType must fit into the value range of 2 bit!!!
545 };
546 
547 struct BlendResult
548 {
549  BlendType
550  /**/blend_f, blend_g,
551  /**/blend_j, blend_k;
552 
553  BlendResult() : blend_f(), blend_g(), blend_j(), blend_k() {}
554 };
555 
556 
557 struct Kernel_4x4 //kernel for preprocessing step
558 {
559  uint32_t
560  /**/a=0, b=0, c=0, d=0,
561  /**/e=0, f=0, g=0, h=0,
562  /**/i=0, j=0, k=0, l=0,
563  /**/m=0, n=0, o=0, p=0;
564 
565  Kernel_4x4() {}
566 };
567 
568 /*
569 input kernel area naming convention:
570 -----------------
571 | A | B | C | D |
572 ----|---|---|---|
573 | E | F | G | H | //evaluate the four corners between F, G, J, K
574 ----|---|---|---| //input pixel is at position F
575 | I | J | K | L |
576 ----|---|---|---|
577 | M | N | O | P |
578 -----------------
579 */
580 FORCE_INLINE //detect blend direction
581 BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) //result: F, G, J, K corners of "GradientType"
582 {
583  BlendResult result;
584 
585  if ((ker.f == ker.g &&
586  ker.j == ker.k) ||
587  (ker.f == ker.j &&
588  ker.g == ker.k))
589  return result;
590 
591  auto dist = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
592 
593  const int weight = 4;
594  double jg = dist(ker.i, ker.f) + dist(ker.f, ker.c) + dist(ker.n, ker.k) + dist(ker.k, ker.h) + weight * dist(ker.j, ker.g);
595  double fk = dist(ker.e, ker.j) + dist(ker.j, ker.o) + dist(ker.b, ker.g) + dist(ker.g, ker.l) + weight * dist(ker.f, ker.k);
596 
597  if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
598  {
599  const bool dominantGradient = cfg.dominantDirectionThreshold * jg < fk;
600  if (ker.f != ker.g && ker.f != ker.j)
601  result.blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
602 
603  if (ker.k != ker.j && ker.k != ker.g)
604  result.blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
605  }
606  else if (fk < jg)
607  {
608  const bool dominantGradient = cfg.dominantDirectionThreshold * fk < jg;
609  if (ker.j != ker.f && ker.j != ker.k)
610  result.blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
611 
612  if (ker.g != ker.f && ker.g != ker.k)
613  result.blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
614  }
615  return result;
616 }
617 
618 struct Kernel_3x3
619 {
620  uint32_t
621  /**/a=0, b=0, c=0,
622  /**/d=0, e=0, f=0,
623  /**/g=0, h=0, i=0;
624 
625  Kernel_3x3() {}
626 };
627 
628 #define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
629 //we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
633 #undef DEF_GETTER
634 
635 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }
636 /*DEF_GETTER(a, g)*/ DEF_GETTER(b, d) DEF_GETTER(c, a)
639 #undef DEF_GETTER
640 
641 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }
642 /*DEF_GETTER(a, i)*/ DEF_GETTER(b, h) DEF_GETTER(c, g)
645 #undef DEF_GETTER
646 
647 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }
648 /*DEF_GETTER(a, c)*/ DEF_GETTER(b, f) DEF_GETTER(c, i)
651 #undef DEF_GETTER
652 
653 //compress four blend types into a single byte
654 //inline BlendType getTopL (unsigned char b) { return static_cast<BlendType>(0x3 & b); }
655 inline BlendType getTopR (unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 2)); }
656 inline BlendType getBottomR(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 4)); }
657 inline BlendType getBottomL(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 6)); }
658 
659 inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!
660 inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); }
661 inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); }
662 inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); }
663 
664 inline bool blendingNeeded(unsigned char b) { return b != 0; }
665 
666 template <RotationDegree rotDeg> inline
667 unsigned char rotateBlendInfo(unsigned char b) { return b; }
668 template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; }
669 template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; }
670 template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }
671 
672 
673 #ifndef NDEBUG
674 int debugPixelX = -1;
675 int debugPixelY = 84;
676 bool breakIntoDebugger = false;
677 #endif
678 
679 /*
680 input kernel area naming convention:
681 -------------
682 | A | B | C |
683 ----|---|---|
684 | D | E | F | //input pixel is at position E
685 ----|---|---|
686 | G | H | I |
687 -------------
688 */
689 template <class Scaler, RotationDegree rotDeg>
690 FORCE_INLINE //perf: quite worth it!
691 void scalePixel(const Kernel_3x3& ker,
692  uint32_t* target, int trgWidth,
693  unsigned char blendInfo, //result of preprocessing all four corners of pixel "e"
694  const xbrz::ScalerCfg& cfg)
695 {
696 #define a get_a<rotDeg>(ker)
697 #define b get_b<rotDeg>(ker)
698 #define c get_c<rotDeg>(ker)
699 #define d get_d<rotDeg>(ker)
700 #define e get_e<rotDeg>(ker)
701 #define f get_f<rotDeg>(ker)
702 #define g get_g<rotDeg>(ker)
703 #define h get_h<rotDeg>(ker)
704 #define i get_i<rotDeg>(ker)
705 
706 #ifndef NDEBUG
707  (void) breakIntoDebugger;
708  //if (breakIntoDebugger)
709  // __debugbreak(); //__asm int 3;
710 #endif
711 
712  const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo);
713 
714  if (getBottomR(blend) >= BLEND_NORMAL)
715  {
716  auto eq = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_) < cfg.equalColorTolerance_; };
717 
718  auto dist = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
719 
720  const uint32_t px = dist(e, f) <= dist(e, h) ? f : h; //choose most similar color
721 
722  OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);
723 
724  bool doLineBlend = true;
725  {
726  if (getBottomR(blend) >= BLEND_DOMINANT)
727  doLineBlend = true;
728 
729  //make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
730  else if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90 (degrees) corners
731  doLineBlend = false;
732  else if (getBottomL(blend) != BLEND_NONE && !eq(e, c))
733  doLineBlend = false;
734 
735  //no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
736  else if (eq(g, h) && eq(h , i) && eq(i, f) && eq(f, c) && !eq(e, i))
737  doLineBlend = false;
738 
739  else doLineBlend = true;
740  }
741 
742  if (doLineBlend)
743  {
744  const double fg = dist(f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
745  const double hc = dist(h, c); //
746 
747  const bool haveShallowLine = cfg.steepDirectionThreshold * fg <= hc && e != g && d != g;
748  const bool haveSteepLine = cfg.steepDirectionThreshold * hc <= fg && e != c && b != c;
749 
750  if (haveShallowLine)
751  {
752  if (haveSteepLine)
753  Scaler::blendLineSteepAndShallow(px, out);
754  else
755  Scaler::blendLineShallow(px, out);
756  }
757  else
758  {
759  if (haveSteepLine)
760  Scaler::blendLineSteep(px, out);
761  else
762  Scaler::blendLineDiagonal(px,out);
763  }
764  }
765  else
766  Scaler::blendCorner(px, out);
767  }
768 
769 #undef a
770 #undef b
771 #undef c
772 #undef d
773 #undef e
774 #undef f
775 #undef g
776 #undef h
777 #undef i
778 }
779 
780 
781 template <class Scaler> //scaler policy: see "Scaler2x" reference implementation
782 void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
783 {
784  yFirst = std::max(yFirst, 0);
785  yLast = std::min(yLast, srcHeight);
786  if (yFirst >= yLast || srcWidth <= 0)
787  return;
788 
789  const int trgWidth = srcWidth * Scaler::scale;
790 
791  //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
792  //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
793  const int bufferSize = srcWidth;
794  unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;
795  std::fill(preProcBuffer, preProcBuffer + bufferSize, static_cast<unsigned char>(0));
796  //static_assert(BLEND_NONE == 0, "");
797 
798  //initialize preprocessing buffer for first row: detect upper left and right corner blending
799  //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
800  if (yFirst > 0)
801  {
802  const int y = yFirst - 1;
803 
804  const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
805  const uint32_t* s_0 = src + srcWidth * y; //center line
806  const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
807  const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
808 
809  for (int x = 0; x < srcWidth; ++x)
810  {
811  const int x_m1 = std::max(x - 1, 0);
812  const int x_p1 = std::min(x + 1, srcWidth - 1);
813  const int x_p2 = std::min(x + 2, srcWidth - 1);
814 
815  Kernel_4x4 ker; //perf: initialization is negligible
816  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
817  ker.b = s_m1[x];
818  ker.c = s_m1[x_p1];
819  ker.d = s_m1[x_p2];
820 
821  ker.e = s_0[x_m1];
822  ker.f = s_0[x];
823  ker.g = s_0[x_p1];
824  ker.h = s_0[x_p2];
825 
826  ker.i = s_p1[x_m1];
827  ker.j = s_p1[x];
828  ker.k = s_p1[x_p1];
829  ker.l = s_p1[x_p2];
830 
831  ker.m = s_p2[x_m1];
832  ker.n = s_p2[x];
833  ker.o = s_p2[x_p1];
834  ker.p = s_p2[x_p2];
835 
836  const BlendResult res = preProcessCorners(ker, cfg);
837  /*
838  preprocessing blend result:
839  ---------
840  | F | G | //evaluate corner between F, G, J, K
841  ----|---| //input pixel is at position F
842  | J | K |
843  ---------
844  */
845  setTopR(preProcBuffer[x], res.blend_j);
846 
847  if (x + 1 < srcWidth)
848  setTopL(preProcBuffer[x + 1], res.blend_k);
849  }
850  }
851  //------------------------------------------------------------------------------------
852 
853  for (int y = yFirst; y < yLast; ++y)
854  {
855  uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
856 
857  const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
858  const uint32_t* s_0 = src + srcWidth * y; //center line
859  const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
860  const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
861 
862  unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
863 
864  for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
865  {
866 #ifndef NDEBUG
867  breakIntoDebugger = debugPixelX == x && debugPixelY == y;
868 #endif
869  //all those bounds checks have only insignificant impact on performance!
870  const int x_m1 = std::max(x - 1, 0); //perf: prefer array indexing to additional pointers!
871  const int x_p1 = std::min(x + 1, srcWidth - 1);
872  const int x_p2 = std::min(x + 2, srcWidth - 1);
873 
874  //evaluate the four corners on bottom-right of current pixel
875  unsigned char blend_xy = 0; //for current (x, y) position
876  {
877  Kernel_4x4 ker; //perf: initialization is negligible
878  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
879  ker.b = s_m1[x];
880  ker.c = s_m1[x_p1];
881  ker.d = s_m1[x_p2];
882 
883  ker.e = s_0[x_m1];
884  ker.f = s_0[x];
885  ker.g = s_0[x_p1];
886  ker.h = s_0[x_p2];
887 
888  ker.i = s_p1[x_m1];
889  ker.j = s_p1[x];
890  ker.k = s_p1[x_p1];
891  ker.l = s_p1[x_p2];
892 
893  ker.m = s_p2[x_m1];
894  ker.n = s_p2[x];
895  ker.o = s_p2[x_p1];
896  ker.p = s_p2[x_p2];
897 
898  const BlendResult res = preProcessCorners(ker, cfg);
899  /*
900  preprocessing blend result:
901  ---------
902  | F | G | // evaluate corner between F, G, J, K
903  ----|---| // current input pixel is at position F
904  | J | K |
905  ---------
906  */
907  blend_xy = preProcBuffer[x];
908  setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
909 
910  setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
911  preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
912 
913  blend_xy1 = 0;
914  setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
915 
916  if (x + 1 < srcWidth) //set 3rd known corner for (x + 1, y)
917  setBottomL(preProcBuffer[x + 1], res.blend_g);
918  }
919 
920  //fill block of size scale * scale with the given color
921  fillBlock(out, trgWidth * sizeof(uint32_t), s_0[x], Scaler::scale); //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
922 
923  //blend four corners of current pixel
924  if (blendingNeeded(blend_xy)) //good 20% perf-improvement
925  {
926  Kernel_3x3 ker; //perf: initialization is negligible
927 
928  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
929  ker.b = s_m1[x];
930  ker.c = s_m1[x_p1];
931 
932  ker.d = s_0[x_m1];
933  ker.e = s_0[x];
934  ker.f = s_0[x_p1];
935 
936  ker.g = s_p1[x_m1];
937  ker.h = s_p1[x];
938  ker.i = s_p1[x_p1];
939 
940  scalePixel<Scaler, ROT_0 >(ker, out, trgWidth, blend_xy, cfg);
941  scalePixel<Scaler, ROT_90 >(ker, out, trgWidth, blend_xy, cfg);
942  scalePixel<Scaler, ROT_180>(ker, out, trgWidth, blend_xy, cfg);
943  scalePixel<Scaler, ROT_270>(ker, out, trgWidth, blend_xy, cfg);
944  }
945  }
946  }
947 }
948 
949 
950 struct Scaler2x
951 {
952  static const int scale = 2;
953 
954  template <class OutputMatrix>
955  static void blendLineShallow(uint32_t col, OutputMatrix& out)
956  {
957  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
958  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
959  }
960 
961  template <class OutputMatrix>
962  static void blendLineSteep(uint32_t col, OutputMatrix& out)
963  {
964  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
965  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
966  }
967 
968  template <class OutputMatrix>
969  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
970  {
971  alphaBlend<1, 4>(out.template ref<1, 0>(), col);
972  alphaBlend<1, 4>(out.template ref<0, 1>(), col);
973  alphaBlend<5, 6>(out.template ref<1, 1>(), col); //[!] fixes 7/8 used in xBR
974  }
975 
976  template <class OutputMatrix>
977  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
978  {
979  alphaBlend<1, 2>(out.template ref<1, 1>(), col);
980  }
981 
982  template <class OutputMatrix>
983  static void blendCorner(uint32_t col, OutputMatrix& out)
984  {
985  //model a round corner
986  alphaBlend<21, 100>(out.template ref<1, 1>(), col); //exact: 1 - pi/4 = 0.2146018366
987  }
988 };
989 
990 
991 struct Scaler3x
992 {
993  static const int scale = 3;
994 
995  template <class OutputMatrix>
996  static void blendLineShallow(uint32_t col, OutputMatrix& out)
997  {
998  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
999  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1000 
1001  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1002  out.template ref<scale - 1, 2>() = col;
1003  }
1004 
1005  template <class OutputMatrix>
1006  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1007  {
1008  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1009  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1010 
1011  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1012  out.template ref<2, scale - 1>() = col;
1013  }
1014 
1015  template <class OutputMatrix>
1016  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1017  {
1018  alphaBlend<1, 4>(out.template ref<2, 0>(), col);
1019  alphaBlend<1, 4>(out.template ref<0, 2>(), col);
1020  alphaBlend<3, 4>(out.template ref<2, 1>(), col);
1021  alphaBlend<3, 4>(out.template ref<1, 2>(), col);
1022  out.template ref<2, 2>() = col;
1023  }
1024 
1025  template <class OutputMatrix>
1026  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1027  {
1028  alphaBlend<1, 8>(out.template ref<1, 2>(), col);
1029  alphaBlend<1, 8>(out.template ref<2, 1>(), col);
1030  alphaBlend<7, 8>(out.template ref<2, 2>(), col);
1031  }
1032 
1033  template <class OutputMatrix>
1034  static void blendCorner(uint32_t col, OutputMatrix& out)
1035  {
1036  //model a round corner
1037  alphaBlend<45, 100>(out.template ref<2, 2>(), col); //exact: 0.4545939598
1038  //alphaBlend<14, 1000>(out.template ref<2, 1>(), col); //0.01413008627 -> negligible
1039  //alphaBlend<14, 1000>(out.template ref<1, 2>(), col); //0.01413008627
1040  }
1041 };
1042 
1043 
1044 struct Scaler4x
1045 {
1046  static const int scale = 4;
1047 
1048  template <class OutputMatrix>
1049  static void blendLineShallow(uint32_t col, OutputMatrix& out)
1050  {
1051  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1052  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1053 
1054  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1055  alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);
1056 
1057  out.template ref<scale - 1, 2>() = col;
1058  out.template ref<scale - 1, 3>() = col;
1059  }
1060 
1061  template <class OutputMatrix>
1062  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1063  {
1064  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1065  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1066 
1067  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1068  alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);
1069 
1070  out.template ref<2, scale - 1>() = col;
1071  out.template ref<3, scale - 1>() = col;
1072  }
1073 
1074  template <class OutputMatrix>
1075  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1076  {
1077  alphaBlend<3, 4>(out.template ref<3, 1>(), col);
1078  alphaBlend<3, 4>(out.template ref<1, 3>(), col);
1079  alphaBlend<1, 4>(out.template ref<3, 0>(), col);
1080  alphaBlend<1, 4>(out.template ref<0, 3>(), col);
1081  alphaBlend<1, 3>(out.template ref<2, 2>(), col); //[!] fixes 1/4 used in xBR
1082  out.template ref<3, 3>() = out.template ref<3, 2>() = out.template ref<2, 3>() = col;
1083  }
1084 
1085  template <class OutputMatrix>
1086  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1087  {
1088  alphaBlend<1, 2>(out.template ref<scale - 1, scale / 2 >(), col);
1089  alphaBlend<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);
1090  out.template ref<scale - 1, scale - 1>() = col;
1091  }
1092 
1093  template <class OutputMatrix>
1094  static void blendCorner(uint32_t col, OutputMatrix& out)
1095  {
1096  //model a round corner
1097  alphaBlend<68, 100>(out.template ref<3, 3>(), col); //exact: 0.6848532563
1098  alphaBlend< 9, 100>(out.template ref<3, 2>(), col); //0.08677704501
1099  alphaBlend< 9, 100>(out.template ref<2, 3>(), col); //0.08677704501
1100  }
1101 };
1102 
1103 
1104 struct Scaler5x
1105 {
1106  static const int scale = 5;
1107 
1108  template <class OutputMatrix>
1109  static void blendLineShallow(uint32_t col, OutputMatrix& out)
1110  {
1111  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1112  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1113  alphaBlend<1, 4>(out.template ref<scale - 3, 4>(), col);
1114 
1115  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1116  alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);
1117 
1118  out.template ref<scale - 1, 2>() = col;
1119  out.template ref<scale - 1, 3>() = col;
1120  out.template ref<scale - 1, 4>() = col;
1121  out.template ref<scale - 2, 4>() = col;
1122  }
1123 
1124  template <class OutputMatrix>
1125  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1126  {
1127  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1128  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1129  alphaBlend<1, 4>(out.template ref<4, scale - 3>(), col);
1130 
1131  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1132  alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);
1133 
1134  out.template ref<2, scale - 1>() = col;
1135  out.template ref<3, scale - 1>() = col;
1136  out.template ref<4, scale - 1>() = col;
1137  out.template ref<4, scale - 2>() = col;
1138  }
1139 
1140  template <class OutputMatrix>
1141  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1142  {
1143  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1144  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1145  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1146 
1147  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1148  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1149  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1150 
1151  out.template ref<2, scale - 1>() = col;
1152  out.template ref<3, scale - 1>() = col;
1153 
1154  out.template ref<scale - 1, 2>() = col;
1155  out.template ref<scale - 1, 3>() = col;
1156 
1157  out.template ref<4, scale - 1>() = col;
1158 
1159  alphaBlend<2, 3>(out.template ref<3, 3>(), col);
1160  }
1161 
1162  template <class OutputMatrix>
1163  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1164  {
1165  alphaBlend<1, 8>(out.template ref<scale - 1, scale / 2 >(), col);
1166  alphaBlend<1, 8>(out.template ref<scale - 2, scale / 2 + 1>(), col);
1167  alphaBlend<1, 8>(out.template ref<scale - 3, scale / 2 + 2>(), col);
1168 
1169  alphaBlend<7, 8>(out.template ref<4, 3>(), col);
1170  alphaBlend<7, 8>(out.template ref<3, 4>(), col);
1171 
1172  out.template ref<4, 4>() = col;
1173  }
1174 
1175  template <class OutputMatrix>
1176  static void blendCorner(uint32_t col, OutputMatrix& out)
1177  {
1178  //model a round corner
1179  alphaBlend<86, 100>(out.template ref<4, 4>(), col); //exact: 0.8631434088
1180  alphaBlend<23, 100>(out.template ref<4, 3>(), col); //0.2306749731
1181  alphaBlend<23, 100>(out.template ref<3, 4>(), col); //0.2306749731
1182  //alphaBlend<8, 1000>(out.template ref<4, 2>(), col); //0.008384061834 -> negligible
1183  //alphaBlend<8, 1000>(out.template ref<2, 4>(), col); //0.008384061834
1184  }
1185 };
1186 }
1187 
1188 
1189 void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
1190 {
1191  switch (factor)
1192  {
1193  case 2:
1194  return scaleImage<Scaler2x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1195  case 3:
1196  return scaleImage<Scaler3x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1197  case 4:
1198  return scaleImage<Scaler4x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1199  case 5:
1200  return scaleImage<Scaler5x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1201  }
1202  assert(false);
1203 }
1204 
1205 
1206 bool xbrz::equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
1207 {
1208  return colorDist(col1, col2, luminanceWeight) < equalColorTolerance;
1209 }
1210 
1211 
1212 void xbrz::nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch,
1213  uint32_t* trg, int trgWidth, int trgHeight, int trgPitch,
1214  SliceType st, int yFirst, int yLast)
1215 {
1216  if (srcPitch < srcWidth * static_cast<int>(sizeof(uint32_t)) ||
1217  trgPitch < trgWidth * static_cast<int>(sizeof(uint32_t)))
1218  {
1219  assert(false);
1220  return;
1221  }
1222 
1223  switch (st)
1224  {
1225  case NN_SCALE_SLICE_SOURCE:
1226  //nearest-neighbor (going over source image - fast for upscaling, since source is read only once
1227  yFirst = std::max(yFirst, 0);
1228  yLast = std::min(yLast, srcHeight);
1229  if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return;
1230 
1231  for (int y = yFirst; y < yLast; ++y)
1232  {
1233  //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
1234  // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
1235 
1236  //keep within for loop to support MT input slices!
1237  const int yTrg_first = ( y * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
1238  const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
1239  const int blockHeight = yTrg_last - yTrg_first;
1240 
1241  if (blockHeight > 0)
1242  {
1243  const uint32_t* srcLine = byteAdvance(src, y * srcPitch);
1244  uint32_t* trgLine = byteAdvance(trg, yTrg_first * trgPitch);
1245  int xTrg_first = 0;
1246 
1247  for (int x = 0; x < srcWidth; ++x)
1248  {
1249  int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
1250  const int blockWidth = xTrg_last - xTrg_first;
1251  if (blockWidth > 0)
1252  {
1253  xTrg_first = xTrg_last;
1254  fillBlock(trgLine, trgPitch, srcLine[x], blockWidth, blockHeight);
1255  trgLine += blockWidth;
1256  }
1257  }
1258  }
1259  }
1260  break;
1261 
1262  case NN_SCALE_SLICE_TARGET:
1263  //nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
1264  yFirst = std::max(yFirst, 0);
1265  yLast = std::min(yLast, trgHeight);
1266  if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
1267 
1268  for (int y = yFirst; y < yLast; ++y)
1269  {
1270  uint32_t* trgLine = byteAdvance(trg, y * trgPitch);
1271  const int ySrc = srcHeight * y / trgHeight;
1272  const uint32_t* srcLine = byteAdvance(src, ySrc * srcPitch);
1273  for (int x = 0; x < trgWidth; ++x)
1274  {
1275  const int xSrc = srcWidth * x / trgWidth;
1276  trgLine[x] = srcLine[xSrc];
1277  }
1278  }
1279  break;
1280  }
1281 }
void fill(const SDL_Rect &rect, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
Fill an area with the given colour.
Definition: draw.cpp:50
bool equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
Definition: xbrz.cpp:1206
SliceType
Definition: xbrz.hpp:83
@ NN_SCALE_SLICE_TARGET
Definition: xbrz.hpp:85
@ NN_SCALE_SLICE_SOURCE
Definition: xbrz.hpp:84
void nearestNeighborScale(const uint32_t *src, int srcWidth, int srcHeight, uint32_t *trg, int trgWidth, int trgHeight)
Definition: xbrz.hpp:100
void scale(size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight, const ScalerCfg &cfg=ScalerCfg(), int yFirst=0, int yLast=std::numeric_limits< int >::max())
Definition: xbrz.cpp:1189
double luminanceWeight_
Definition: config.hpp:54
double steepDirectionThreshold
Definition: config.hpp:57
double dominantDirectionThreshold
Definition: config.hpp:56
double equalColorTolerance_
Definition: config.hpp:55
mock_party p
static map_location::DIRECTION n
#define i
#define g
#define d
#define e
#define DEF_GETTER(x)
Definition: xbrz.cpp:628
#define h
#define f
#define a
#define c
#define b
#define FORCE_INLINE
Definition: xbrz.cpp:184