source/libvpx/third_party/libyuv/source/row_common.cc - Issue 341293003: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/third_party/libyuv/source/row_common.cc

Issue 341293003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 /*

	2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #include "third_party/libyuv/include/libyuv/row.h"

	12

	13 #include <string.h> // For memcpy and memset.

	14

	15 #include "third_party/libyuv/include/libyuv/basic_types.h"

	16

	17 #ifdef __cplusplus

	18 namespace libyuv {

	19 extern "C" {

	20 #endif

	21

	22 // llvm x86 is poor at ternary operator, so use branchless min/max.

	23

	24 #define USE_BRANCHLESS 1

	25 #if USE_BRANCHLESS

	26 static __inline int32 clamp0(int32 v) {

	27 return ((-(v) >> 31) & (v));

	28 }

	29

	30 static __inline int32 clamp255(int32 v) {

	31 return (((255 - (v)) >> 31) \| (v)) & 255;

	32 }

	33

	34 static __inline uint32 Clamp(int32 val) {

	35 int v = clamp0(val);

	36 return (uint32)(clamp255(v));

	37 }

	38

	39 static __inline uint32 Abs(int32 v) {

	40 int m = v >> 31;

	41 return (v + m) ^ m;

	42 }

	43 #else // USE_BRANCHLESS

	44 static __inline int32 clamp0(int32 v) {

	45 return (v < 0) ? 0 : v;

	46 }

	47

	48 static __inline int32 clamp255(int32 v) {

	49 return (v > 255) ? 255 : v;

	50 }

	51

	52 static __inline uint32 Clamp(int32 val) {

	53 int v = clamp0(val);

	54 return (uint32)(clamp255(v));

	55 }

	56

	57 static __inline uint32 Abs(int32 v) {

	58 return (v < 0) ? -v : v;

	59 }

	60 #endif // USE_BRANCHLESS

	61

	62 #ifdef LIBYUV_LITTLE_ENDIAN

	63 #define WRITEWORD(p, v) (uint32)(p) = v

	64 #else

	65 static inline void WRITEWORD(uint8* p, uint32 v) {

	66 p[0] = (uint8)(v & 255);

	67 p[1] = (uint8)((v >> 8) & 255);

	68 p[2] = (uint8)((v >> 16) & 255);

	69 p[3] = (uint8)((v >> 24) & 255);

	70 }

	71 #endif

	72

	73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {

	74 int x;

	75 for (x = 0; x < width; ++x) {

	76 uint8 b = src_rgb24[0];

	77 uint8 g = src_rgb24[1];

	78 uint8 r = src_rgb24[2];

	79 dst_argb[0] = b;

	80 dst_argb[1] = g;

	81 dst_argb[2] = r;

	82 dst_argb[3] = 255u;

	83 dst_argb += 4;

	84 src_rgb24 += 3;

	85 }

	86 }

	87

	88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {

	89 int x;

	90 for (x = 0; x < width; ++x) {

	91 uint8 r = src_raw[0];

	92 uint8 g = src_raw[1];

	93 uint8 b = src_raw[2];

	94 dst_argb[0] = b;

	95 dst_argb[1] = g;

	96 dst_argb[2] = r;

	97 dst_argb[3] = 255u;

	98 dst_argb += 4;

	99 src_raw += 3;

	100 }

	101 }

	102

	103 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {

	104 int x;

	105 for (x = 0; x < width; ++x) {

	106 uint8 b = src_rgb565[0] & 0x1f;

	107 uint8 g = (src_rgb565[0] >> 5) \| ((src_rgb565[1] & 0x07) << 3);

	108 uint8 r = src_rgb565[1] >> 3;

	109 dst_argb[0] = (b << 3) \| (b >> 2);

	110 dst_argb[1] = (g << 2) \| (g >> 4);

	111 dst_argb[2] = (r << 3) \| (r >> 2);

	112 dst_argb[3] = 255u;

	113 dst_argb += 4;

	114 src_rgb565 += 2;

	115 }

	116 }

	117

	118 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,

	119 int width) {

	120 int x;

	121 for (x = 0; x < width; ++x) {

	122 uint8 b = src_argb1555[0] & 0x1f;

	123 uint8 g = (src_argb1555[0] >> 5) \| ((src_argb1555[1] & 0x03) << 3);

	124 uint8 r = (src_argb1555[1] & 0x7c) >> 2;

	125 uint8 a = src_argb1555[1] >> 7;

	126 dst_argb[0] = (b << 3) \| (b >> 2);

	127 dst_argb[1] = (g << 3) \| (g >> 2);

	128 dst_argb[2] = (r << 3) \| (r >> 2);

	129 dst_argb[3] = -a;

	130 dst_argb += 4;

	131 src_argb1555 += 2;

	132 }

	133 }

	134

	135 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,

	136 int width) {

	137 int x;

	138 for (x = 0; x < width; ++x) {

	139 uint8 b = src_argb4444[0] & 0x0f;

	140 uint8 g = src_argb4444[0] >> 4;

	141 uint8 r = src_argb4444[1] & 0x0f;

	142 uint8 a = src_argb4444[1] >> 4;

	143 dst_argb[0] = (b << 4) \| b;

	144 dst_argb[1] = (g << 4) \| g;

	145 dst_argb[2] = (r << 4) \| r;

	146 dst_argb[3] = (a << 4) \| a;

	147 dst_argb += 4;

	148 src_argb4444 += 2;

	149 }

	150 }

	151

	152 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {

	153 int x;

	154 for (x = 0; x < width; ++x) {

	155 uint8 b = src_argb[0];

	156 uint8 g = src_argb[1];

	157 uint8 r = src_argb[2];

	158 dst_rgb[0] = b;

	159 dst_rgb[1] = g;

	160 dst_rgb[2] = r;

	161 dst_rgb += 3;

	162 src_argb += 4;

	163 }

	164 }

	165

	166 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {

	167 int x;

	168 for (x = 0; x < width; ++x) {

	169 uint8 b = src_argb[0];

	170 uint8 g = src_argb[1];

	171 uint8 r = src_argb[2];

	172 dst_rgb[0] = r;

	173 dst_rgb[1] = g;

	174 dst_rgb[2] = b;

	175 dst_rgb += 3;

	176 src_argb += 4;

	177 }

	178 }

	179

	180 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {

	181 int x;

	182 for (x = 0; x < width - 1; x += 2) {

	183 uint8 b0 = src_argb[0] >> 3;

	184 uint8 g0 = src_argb[1] >> 2;

	185 uint8 r0 = src_argb[2] >> 3;

	186 uint8 b1 = src_argb[4] >> 3;

	187 uint8 g1 = src_argb[5] >> 2;

	188 uint8 r1 = src_argb[6] >> 3;

	189 WRITEWORD(dst_rgb, b0 \| (g0 << 5) \| (r0 << 11) \|

	190 (b1 << 16) \| (g1 << 21) \| (r1 << 27));

	191 dst_rgb += 4;

	192 src_argb += 8;

	193 }

	194 if (width & 1) {

	195 uint8 b0 = src_argb[0] >> 3;

	196 uint8 g0 = src_argb[1] >> 2;

	197 uint8 r0 = src_argb[2] >> 3;

	198 (uint16)(dst_rgb) = b0 \| (g0 << 5) \| (r0 << 11);

	199 }

	200 }

	201

	202 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {

	203 int x;

	204 for (x = 0; x < width - 1; x += 2) {

	205 uint8 b0 = src_argb[0] >> 3;

	206 uint8 g0 = src_argb[1] >> 3;

	207 uint8 r0 = src_argb[2] >> 3;

	208 uint8 a0 = src_argb[3] >> 7;

	209 uint8 b1 = src_argb[4] >> 3;

	210 uint8 g1 = src_argb[5] >> 3;

	211 uint8 r1 = src_argb[6] >> 3;

	212 uint8 a1 = src_argb[7] >> 7;

	213 (uint32)(dst_rgb) =

	214 b0 \| (g0 << 5) \| (r0 << 10) \| (a0 << 15) \|

	215 (b1 << 16) \| (g1 << 21) \| (r1 << 26) \| (a1 << 31);

	216 dst_rgb += 4;

	217 src_argb += 8;

	218 }

	219 if (width & 1) {

	220 uint8 b0 = src_argb[0] >> 3;

	221 uint8 g0 = src_argb[1] >> 3;

	222 uint8 r0 = src_argb[2] >> 3;

	223 uint8 a0 = src_argb[3] >> 7;

	224 (uint16)(dst_rgb) =

	225 b0 \| (g0 << 5) \| (r0 << 10) \| (a0 << 15);

	226 }

	227 }

	228

	229 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {

	230 int x;

	231 for (x = 0; x < width - 1; x += 2) {

	232 uint8 b0 = src_argb[0] >> 4;

	233 uint8 g0 = src_argb[1] >> 4;

	234 uint8 r0 = src_argb[2] >> 4;

	235 uint8 a0 = src_argb[3] >> 4;

	236 uint8 b1 = src_argb[4] >> 4;

	237 uint8 g1 = src_argb[5] >> 4;

	238 uint8 r1 = src_argb[6] >> 4;

	239 uint8 a1 = src_argb[7] >> 4;

	240 (uint32)(dst_rgb) =

	241 b0 \| (g0 << 4) \| (r0 << 8) \| (a0 << 12) \|

	242 (b1 << 16) \| (g1 << 20) \| (r1 << 24) \| (a1 << 28);

	243 dst_rgb += 4;

	244 src_argb += 8;

	245 }

	246 if (width & 1) {

	247 uint8 b0 = src_argb[0] >> 4;

	248 uint8 g0 = src_argb[1] >> 4;

	249 uint8 r0 = src_argb[2] >> 4;

	250 uint8 a0 = src_argb[3] >> 4;

	251 (uint16)(dst_rgb) =

	252 b0 \| (g0 << 4) \| (r0 << 8) \| (a0 << 12);

	253 }

	254 }

	255

	256 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {

	257 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;

	258 }

	259

	260 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {

	261 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;

	262 }

	263 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {

	264 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;

	265 }

	266

	267 #define MAKEROWY(NAME, R, G, B, BPP) \

	268 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \

	269 int x; \

	270 for (x = 0; x < width; ++x) { \

	271 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \

	272 src_argb0 += BPP; \

	273 dst_y += 1; \

	274 } \

	275 } \

	276 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \

	277 uint8* dst_u, uint8* dst_v, int width) { \

	278 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \

	279 int x; \

	280 for (x = 0; x < width - 1; x += 2) { \

	281 uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \

	282 src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \

	283 uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \

	284 src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \

	285 uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \

	286 src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \

	287 dst_u[0] = RGBToU(ar, ag, ab); \

	288 dst_v[0] = RGBToV(ar, ag, ab); \

	289 src_rgb0 += BPP * 2; \

	290 src_rgb1 += BPP * 2; \

	291 dst_u += 1; \

	292 dst_v += 1; \

	293 } \

	294 if (width & 1) { \

	295 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \

	296 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \

	297 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \

	298 dst_u[0] = RGBToU(ar, ag, ab); \

	299 dst_v[0] = RGBToV(ar, ag, ab); \

	300 } \

	301 }

	302

	303 MAKEROWY(ARGB, 2, 1, 0, 4)

	304 MAKEROWY(BGRA, 1, 2, 3, 4)

	305 MAKEROWY(ABGR, 0, 1, 2, 4)

	306 MAKEROWY(RGBA, 3, 2, 1, 4)

	307 MAKEROWY(RGB24, 2, 1, 0, 3)

	308 MAKEROWY(RAW, 0, 1, 2, 3)

	309 #undef MAKEROWY

	310

	311 // JPeg uses a variation on BT.601-1 full range

	312 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b

	313 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center

	314 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center

	315 // BT.601 Mpeg range uses:

	316 // b 0.1016 * 255 = 25.908 = 25

	317 // g 0.5078 * 255 = 129.489 = 129

	318 // r 0.2578 * 255 = 65.739 = 66

	319 // JPeg 8 bit Y (not used):

	320 // b 0.11400 * 256 = 29.184 = 29

	321 // g 0.58700 * 256 = 150.272 = 150

	322 // r 0.29900 * 256 = 76.544 = 77

	323 // JPeg 7 bit Y:

	324 // b 0.11400 * 128 = 14.592 = 15

	325 // g 0.58700 * 128 = 75.136 = 75

	326 // r 0.29900 * 128 = 38.272 = 38

	327 // JPeg 8 bit U:

	328 // b 0.50000 * 255 = 127.5 = 127

	329 // g -0.33126 * 255 = -84.4713 = -84

	330 // r -0.16874 * 255 = -43.0287 = -43

	331 // JPeg 8 bit V:

	332 // b -0.08131 * 255 = -20.73405 = -20

	333 // g -0.41869 * 255 = -106.76595 = -107

	334 // r 0.50000 * 255 = 127.5 = 127

	335

	336 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {

	337 return (38 * r + 75 * g + 15 * b + 64) >> 7;

	338 }

	339

	340 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {

	341 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;

	342 }

	343 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {

	344 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;

	345 }

	346

	347 #define AVGB(a, b) (((a) + (b) + 1) >> 1)

	348

	349 #define MAKEROWYJ(NAME, R, G, B, BPP) \

	350 void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \

	351 int x; \

	352 for (x = 0; x < width; ++x) { \

	353 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \

	354 src_argb0 += BPP; \

	355 dst_y += 1; \

	356 } \

	357 } \

	358 void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \

	359 uint8* dst_u, uint8* dst_v, int width) { \

	360 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \

	361 int x; \

	362 for (x = 0; x < width - 1; x += 2) { \

	363 uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \

	364 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \

	365 uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \

	366 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \

	367 uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \

	368 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \

	369 dst_u[0] = RGBToUJ(ar, ag, ab); \

	370 dst_v[0] = RGBToVJ(ar, ag, ab); \

	371 src_rgb0 += BPP * 2; \

	372 src_rgb1 += BPP * 2; \

	373 dst_u += 1; \

	374 dst_v += 1; \

	375 } \

	376 if (width & 1) { \

	377 uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \

	378 uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \

	379 uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \

	380 dst_u[0] = RGBToUJ(ar, ag, ab); \

	381 dst_v[0] = RGBToVJ(ar, ag, ab); \

	382 } \

	383 }

	384

	385 MAKEROWYJ(ARGB, 2, 1, 0, 4)

	386 #undef MAKEROWYJ

	387

	388 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {

	389 int x;

	390 for (x = 0; x < width; ++x) {

	391 uint8 b = src_rgb565[0] & 0x1f;

	392 uint8 g = (src_rgb565[0] >> 5) \| ((src_rgb565[1] & 0x07) << 3);

	393 uint8 r = src_rgb565[1] >> 3;

	394 b = (b << 3) \| (b >> 2);

	395 g = (g << 2) \| (g >> 4);

	396 r = (r << 3) \| (r >> 2);

	397 dst_y[0] = RGBToY(r, g, b);

	398 src_rgb565 += 2;

	399 dst_y += 1;

	400 }

	401 }

	402

	403 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {

	404 int x;

	405 for (x = 0; x < width; ++x) {

	406 uint8 b = src_argb1555[0] & 0x1f;

	407 uint8 g = (src_argb1555[0] >> 5) \| ((src_argb1555[1] & 0x03) << 3);

	408 uint8 r = (src_argb1555[1] & 0x7c) >> 2;

	409 b = (b << 3) \| (b >> 2);

	410 g = (g << 3) \| (g >> 2);

	411 r = (r << 3) \| (r >> 2);

	412 dst_y[0] = RGBToY(r, g, b);

	413 src_argb1555 += 2;

	414 dst_y += 1;

	415 }

	416 }

	417

	418 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {

	419 int x;

	420 for (x = 0; x < width; ++x) {

	421 uint8 b = src_argb4444[0] & 0x0f;

	422 uint8 g = src_argb4444[0] >> 4;

	423 uint8 r = src_argb4444[1] & 0x0f;

	424 b = (b << 4) \| b;

	425 g = (g << 4) \| g;

	426 r = (r << 4) \| r;

	427 dst_y[0] = RGBToY(r, g, b);

	428 src_argb4444 += 2;

	429 dst_y += 1;

	430 }

	431 }

	432

	433 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,

	434 uint8* dst_u, uint8* dst_v, int width) {

	435 const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;

	436 int x;

	437 for (x = 0; x < width - 1; x += 2) {

	438 uint8 b0 = src_rgb565[0] & 0x1f;

	439 uint8 g0 = (src_rgb565[0] >> 5) \| ((src_rgb565[1] & 0x07) << 3);

	440 uint8 r0 = src_rgb565[1] >> 3;

	441 uint8 b1 = src_rgb565[2] & 0x1f;

	442 uint8 g1 = (src_rgb565[2] >> 5) \| ((src_rgb565[3] & 0x07) << 3);

	443 uint8 r1 = src_rgb565[3] >> 3;

	444 uint8 b2 = next_rgb565[0] & 0x1f;

	445 uint8 g2 = (next_rgb565[0] >> 5) \| ((next_rgb565[1] & 0x07) << 3);

	446 uint8 r2 = next_rgb565[1] >> 3;

	447 uint8 b3 = next_rgb565[2] & 0x1f;

	448 uint8 g3 = (next_rgb565[2] >> 5) \| ((next_rgb565[3] & 0x07) << 3);

	449 uint8 r3 = next_rgb565[3] >> 3;

	450 uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.

	451 uint8 g = (g0 + g1 + g2 + g3);

	452 uint8 r = (r0 + r1 + r2 + r3);

	453 b = (b << 1) \| (b >> 6); // 787 -> 888.

	454 r = (r << 1) \| (r >> 6);

	455 dst_u[0] = RGBToU(r, g, b);

	456 dst_v[0] = RGBToV(r, g, b);

	457 src_rgb565 += 4;

	458 next_rgb565 += 4;

	459 dst_u += 1;

	460 dst_v += 1;

	461 }

	462 if (width & 1) {

	463 uint8 b0 = src_rgb565[0] & 0x1f;

	464 uint8 g0 = (src_rgb565[0] >> 5) \| ((src_rgb565[1] & 0x07) << 3);

	465 uint8 r0 = src_rgb565[1] >> 3;

	466 uint8 b2 = next_rgb565[0] & 0x1f;

	467 uint8 g2 = (next_rgb565[0] >> 5) \| ((next_rgb565[1] & 0x07) << 3);

	468 uint8 r2 = next_rgb565[1] >> 3;

	469 uint8 b = (b0 + b2); // 565 * 2 = 676.

	470 uint8 g = (g0 + g2);

	471 uint8 r = (r0 + r2);

	472 b = (b << 2) \| (b >> 4); // 676 -> 888

	473 g = (g << 1) \| (g >> 6);

	474 r = (r << 2) \| (r >> 4);

	475 dst_u[0] = RGBToU(r, g, b);

	476 dst_v[0] = RGBToV(r, g, b);

	477 }

	478 }

	479

	480 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,

	481 uint8* dst_u, uint8* dst_v, int width) {

	482 const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;

	483 int x;

	484 for (x = 0; x < width - 1; x += 2) {

	485 uint8 b0 = src_argb1555[0] & 0x1f;

	486 uint8 g0 = (src_argb1555[0] >> 5) \| ((src_argb1555[1] & 0x03) << 3);

	487 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;

	488 uint8 b1 = src_argb1555[2] & 0x1f;

	489 uint8 g1 = (src_argb1555[2] >> 5) \| ((src_argb1555[3] & 0x03) << 3);

	490 uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;

	491 uint8 b2 = next_argb1555[0] & 0x1f;

	492 uint8 g2 = (next_argb1555[0] >> 5) \| ((next_argb1555[1] & 0x03) << 3);

	493 uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;

	494 uint8 b3 = next_argb1555[2] & 0x1f;

	495 uint8 g3 = (next_argb1555[2] >> 5) \| ((next_argb1555[3] & 0x03) << 3);

	496 uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;

	497 uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.

	498 uint8 g = (g0 + g1 + g2 + g3);

	499 uint8 r = (r0 + r1 + r2 + r3);

	500 b = (b << 1) \| (b >> 6); // 777 -> 888.

	501 g = (g << 1) \| (g >> 6);

	502 r = (r << 1) \| (r >> 6);

	503 dst_u[0] = RGBToU(r, g, b);

	504 dst_v[0] = RGBToV(r, g, b);

	505 src_argb1555 += 4;

	506 next_argb1555 += 4;

	507 dst_u += 1;

	508 dst_v += 1;

	509 }

	510 if (width & 1) {

	511 uint8 b0 = src_argb1555[0] & 0x1f;

	512 uint8 g0 = (src_argb1555[0] >> 5) \| ((src_argb1555[1] & 0x03) << 3);

	513 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;

	514 uint8 b2 = next_argb1555[0] & 0x1f;

	515 uint8 g2 = (next_argb1555[0] >> 5) \| ((next_argb1555[1] & 0x03) << 3);

	516 uint8 r2 = next_argb1555[1] >> 3;

	517 uint8 b = (b0 + b2); // 555 * 2 = 666.

	518 uint8 g = (g0 + g2);

	519 uint8 r = (r0 + r2);

	520 b = (b << 2) \| (b >> 4); // 666 -> 888.

	521 g = (g << 2) \| (g >> 4);

	522 r = (r << 2) \| (r >> 4);

	523 dst_u[0] = RGBToU(r, g, b);

	524 dst_v[0] = RGBToV(r, g, b);

	525 }

	526 }

	527

	528 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,

	529 uint8* dst_u, uint8* dst_v, int width) {

	530 const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;

	531 int x;

	532 for (x = 0; x < width - 1; x += 2) {

	533 uint8 b0 = src_argb4444[0] & 0x0f;

	534 uint8 g0 = src_argb4444[0] >> 4;

	535 uint8 r0 = src_argb4444[1] & 0x0f;

	536 uint8 b1 = src_argb4444[2] & 0x0f;

	537 uint8 g1 = src_argb4444[2] >> 4;

	538 uint8 r1 = src_argb4444[3] & 0x0f;

	539 uint8 b2 = next_argb4444[0] & 0x0f;

	540 uint8 g2 = next_argb4444[0] >> 4;

	541 uint8 r2 = next_argb4444[1] & 0x0f;

	542 uint8 b3 = next_argb4444[2] & 0x0f;

	543 uint8 g3 = next_argb4444[2] >> 4;

	544 uint8 r3 = next_argb4444[3] & 0x0f;

	545 uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.

	546 uint8 g = (g0 + g1 + g2 + g3);

	547 uint8 r = (r0 + r1 + r2 + r3);

	548 b = (b << 2) \| (b >> 4); // 666 -> 888.

	549 g = (g << 2) \| (g >> 4);

	550 r = (r << 2) \| (r >> 4);

	551 dst_u[0] = RGBToU(r, g, b);

	552 dst_v[0] = RGBToV(r, g, b);

	553 src_argb4444 += 4;

	554 next_argb4444 += 4;

	555 dst_u += 1;

	556 dst_v += 1;

	557 }

	558 if (width & 1) {

	559 uint8 b0 = src_argb4444[0] & 0x0f;

	560 uint8 g0 = src_argb4444[0] >> 4;

	561 uint8 r0 = src_argb4444[1] & 0x0f;

	562 uint8 b2 = next_argb4444[0] & 0x0f;

	563 uint8 g2 = next_argb4444[0] >> 4;

	564 uint8 r2 = next_argb4444[1] & 0x0f;

	565 uint8 b = (b0 + b2); // 444 * 2 = 555.

	566 uint8 g = (g0 + g2);

	567 uint8 r = (r0 + r2);

	568 b = (b << 3) \| (b >> 2); // 555 -> 888.

	569 g = (g << 3) \| (g >> 2);

	570 r = (r << 3) \| (r >> 2);

	571 dst_u[0] = RGBToU(r, g, b);

	572 dst_v[0] = RGBToV(r, g, b);

	573 }

	574 }

	575

	576 void ARGBToUV444Row_C(const uint8* src_argb,

	577 uint8* dst_u, uint8* dst_v, int width) {

	578 int x;

	579 for (x = 0; x < width; ++x) {

	580 uint8 ab = src_argb[0];

	581 uint8 ag = src_argb[1];

	582 uint8 ar = src_argb[2];

	583 dst_u[0] = RGBToU(ar, ag, ab);

	584 dst_v[0] = RGBToV(ar, ag, ab);

	585 src_argb += 4;

	586 dst_u += 1;

	587 dst_v += 1;

	588 }

	589 }

	590

	591 void ARGBToUV422Row_C(const uint8* src_argb,

	592 uint8* dst_u, uint8* dst_v, int width) {

	593 int x;

	594 for (x = 0; x < width - 1; x += 2) {

	595 uint8 ab = (src_argb[0] + src_argb[4]) >> 1;

	596 uint8 ag = (src_argb[1] + src_argb[5]) >> 1;

	597 uint8 ar = (src_argb[2] + src_argb[6]) >> 1;

	598 dst_u[0] = RGBToU(ar, ag, ab);

	599 dst_v[0] = RGBToV(ar, ag, ab);

	600 src_argb += 8;

	601 dst_u += 1;

	602 dst_v += 1;

	603 }

	604 if (width & 1) {

	605 uint8 ab = src_argb[0];

	606 uint8 ag = src_argb[1];

	607 uint8 ar = src_argb[2];

	608 dst_u[0] = RGBToU(ar, ag, ab);

	609 dst_v[0] = RGBToV(ar, ag, ab);

	610 }

	611 }

	612

	613 void ARGBToUV411Row_C(const uint8* src_argb,

	614 uint8* dst_u, uint8* dst_v, int width) {

	615 int x;

	616 for (x = 0; x < width - 3; x += 4) {

	617 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;

	618 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;

	619 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;

	620 dst_u[0] = RGBToU(ar, ag, ab);

	621 dst_v[0] = RGBToV(ar, ag, ab);

	622 src_argb += 16;

	623 dst_u += 1;

	624 dst_v += 1;

	625 }

	626 if ((width & 3) == 3) {

	627 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;

	628 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;

	629 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;

	630 dst_u[0] = RGBToU(ar, ag, ab);

	631 dst_v[0] = RGBToV(ar, ag, ab);

	632 } else if ((width & 3) == 2) {

	633 uint8 ab = (src_argb[0] + src_argb[4]) >> 1;

	634 uint8 ag = (src_argb[1] + src_argb[5]) >> 1;

	635 uint8 ar = (src_argb[2] + src_argb[6]) >> 1;

	636 dst_u[0] = RGBToU(ar, ag, ab);

	637 dst_v[0] = RGBToV(ar, ag, ab);

	638 } else if ((width & 3) == 1) {

	639 uint8 ab = src_argb[0];

	640 uint8 ag = src_argb[1];

	641 uint8 ar = src_argb[2];

	642 dst_u[0] = RGBToU(ar, ag, ab);

	643 dst_v[0] = RGBToV(ar, ag, ab);

	644 }

	645 }

	646

	647 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {

	648 int x;

	649 for (x = 0; x < width; ++x) {

	650 uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);

	651 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;

	652 dst_argb[3] = src_argb[3];

	653 dst_argb += 4;

	654 src_argb += 4;

	655 }

	656 }

	657

	658 // Convert a row of image to Sepia tone.

	659 void ARGBSepiaRow_C(uint8* dst_argb, int width) {

	660 int x;

	661 for (x = 0; x < width; ++x) {

	662 int b = dst_argb[0];

	663 int g = dst_argb[1];

	664 int r = dst_argb[2];

	665 int sb = (b * 17 + g * 68 + r * 35) >> 7;

	666 int sg = (b * 22 + g * 88 + r * 45) >> 7;

	667 int sr = (b * 24 + g * 98 + r * 50) >> 7;

	668 // b does not over flow. a is preserved from original.

	669 dst_argb[0] = sb;

	670 dst_argb[1] = clamp255(sg);

	671 dst_argb[2] = clamp255(sr);

	672 dst_argb += 4;

	673 }

	674 }

	675

	676 // Apply color matrix to a row of image. Matrix is signed.

	677 // TODO(fbarchard): Consider adding rounding (+32).

	678 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,

	679 const int8* matrix_argb, int width) {

	680 int x;

	681 for (x = 0; x < width; ++x) {

	682 int b = src_argb[0];

	683 int g = src_argb[1];

	684 int r = src_argb[2];

	685 int a = src_argb[3];

	686 int sb = (b * matrix_argb[0] + g * matrix_argb[1] +

	687 r * matrix_argb[2] + a * matrix_argb[3]) >> 6;

	688 int sg = (b * matrix_argb[4] + g * matrix_argb[5] +

	689 r * matrix_argb[6] + a * matrix_argb[7]) >> 6;

	690 int sr = (b * matrix_argb[8] + g * matrix_argb[9] +

	691 r * matrix_argb[10] + a * matrix_argb[11]) >> 6;

	692 int sa = (b * matrix_argb[12] + g * matrix_argb[13] +

	693 r * matrix_argb[14] + a * matrix_argb[15]) >> 6;

	694 dst_argb[0] = Clamp(sb);

	695 dst_argb[1] = Clamp(sg);

	696 dst_argb[2] = Clamp(sr);

	697 dst_argb[3] = Clamp(sa);

	698 src_argb += 4;

	699 dst_argb += 4;

	700 }

	701 }

	702

	703 // Apply color table to a row of image.

	704 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {

	705 int x;

	706 for (x = 0; x < width; ++x) {

	707 int b = dst_argb[0];

	708 int g = dst_argb[1];

	709 int r = dst_argb[2];

	710 int a = dst_argb[3];

	711 dst_argb[0] = table_argb[b * 4 + 0];

	712 dst_argb[1] = table_argb[g * 4 + 1];

	713 dst_argb[2] = table_argb[r * 4 + 2];

	714 dst_argb[3] = table_argb[a * 4 + 3];

	715 dst_argb += 4;

	716 }

	717 }

	718

	719 // Apply color table to a row of image.

	720 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {

	721 int x;

	722 for (x = 0; x < width; ++x) {

	723 int b = dst_argb[0];

	724 int g = dst_argb[1];

	725 int r = dst_argb[2];

	726 dst_argb[0] = table_argb[b * 4 + 0];

	727 dst_argb[1] = table_argb[g * 4 + 1];

	728 dst_argb[2] = table_argb[r * 4 + 2];

	729 dst_argb += 4;

	730 }

	731 }

	732

	733 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,

	734 int interval_offset, int width) {

	735 int x;

	736 for (x = 0; x < width; ++x) {

	737 int b = dst_argb[0];

	738 int g = dst_argb[1];

	739 int r = dst_argb[2];

	740 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;

	741 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;

	742 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;

	743 dst_argb += 4;

	744 }

	745 }

	746

	747 #define REPEAT8(v) (v) \| ((v) << 8)

	748 #define SHADE(f, v) v * f >> 24

	749

	750 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,

	751 uint32 value) {

	752 const uint32 b_scale = REPEAT8(value & 0xff);

	753 const uint32 g_scale = REPEAT8((value >> 8) & 0xff);

	754 const uint32 r_scale = REPEAT8((value >> 16) & 0xff);

	755 const uint32 a_scale = REPEAT8(value >> 24);

	756

	757 int i;

	758 for (i = 0; i < width; ++i) {

	759 const uint32 b = REPEAT8(src_argb[0]);

	760 const uint32 g = REPEAT8(src_argb[1]);

	761 const uint32 r = REPEAT8(src_argb[2]);

	762 const uint32 a = REPEAT8(src_argb[3]);

	763 dst_argb[0] = SHADE(b, b_scale);

	764 dst_argb[1] = SHADE(g, g_scale);

	765 dst_argb[2] = SHADE(r, r_scale);

	766 dst_argb[3] = SHADE(a, a_scale);

	767 src_argb += 4;

	768 dst_argb += 4;

	769 }

	770 }

	771 #undef REPEAT8

	772 #undef SHADE

	773

	774 #define REPEAT8(v) (v) \| ((v) << 8)

	775 #define SHADE(f, v) v * f >> 16

	776

	777 void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,

	778 uint8* dst_argb, int width) {

	779 int i;

	780 for (i = 0; i < width; ++i) {

	781 const uint32 b = REPEAT8(src_argb0[0]);

	782 const uint32 g = REPEAT8(src_argb0[1]);

	783 const uint32 r = REPEAT8(src_argb0[2]);

	784 const uint32 a = REPEAT8(src_argb0[3]);

	785 const uint32 b_scale = src_argb1[0];

	786 const uint32 g_scale = src_argb1[1];

	787 const uint32 r_scale = src_argb1[2];

	788 const uint32 a_scale = src_argb1[3];

	789 dst_argb[0] = SHADE(b, b_scale);

	790 dst_argb[1] = SHADE(g, g_scale);

	791 dst_argb[2] = SHADE(r, r_scale);

	792 dst_argb[3] = SHADE(a, a_scale);

	793 src_argb0 += 4;

	794 src_argb1 += 4;

	795 dst_argb += 4;

	796 }

	797 }

	798 #undef REPEAT8

	799 #undef SHADE

	800

	801 #define SHADE(f, v) clamp255(v + f)

	802

	803 void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,

	804 uint8* dst_argb, int width) {

	805 int i;

	806 for (i = 0; i < width; ++i) {

	807 const int b = src_argb0[0];

	808 const int g = src_argb0[1];

	809 const int r = src_argb0[2];

	810 const int a = src_argb0[3];

	811 const int b_add = src_argb1[0];

	812 const int g_add = src_argb1[1];

	813 const int r_add = src_argb1[2];

	814 const int a_add = src_argb1[3];

	815 dst_argb[0] = SHADE(b, b_add);

	816 dst_argb[1] = SHADE(g, g_add);

	817 dst_argb[2] = SHADE(r, r_add);

	818 dst_argb[3] = SHADE(a, a_add);

	819 src_argb0 += 4;

	820 src_argb1 += 4;

	821 dst_argb += 4;

	822 }

	823 }

	824 #undef SHADE

	825

	826 #define SHADE(f, v) clamp0(f - v)

	827

	828 void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,

	829 uint8* dst_argb, int width) {

	830 int i;

	831 for (i = 0; i < width; ++i) {

	832 const int b = src_argb0[0];

	833 const int g = src_argb0[1];

	834 const int r = src_argb0[2];

	835 const int a = src_argb0[3];

	836 const int b_sub = src_argb1[0];

	837 const int g_sub = src_argb1[1];

	838 const int r_sub = src_argb1[2];

	839 const int a_sub = src_argb1[3];

	840 dst_argb[0] = SHADE(b, b_sub);

	841 dst_argb[1] = SHADE(g, g_sub);

	842 dst_argb[2] = SHADE(r, r_sub);

	843 dst_argb[3] = SHADE(a, a_sub);

	844 src_argb0 += 4;

	845 src_argb1 += 4;

	846 dst_argb += 4;

	847 }

	848 }

	849 #undef SHADE

	850

	851 // Sobel functions which mimics SSSE3.

	852 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,

	853 uint8* dst_sobelx, int width) {

	854 int i;

	855 for (i = 0; i < width; ++i) {

	856 int a = src_y0[i];

	857 int b = src_y1[i];

	858 int c = src_y2[i];

	859 int a_sub = src_y0[i + 2];

	860 int b_sub = src_y1[i + 2];

	861 int c_sub = src_y2[i + 2];

	862 int a_diff = a - a_sub;

	863 int b_diff = b - b_sub;

	864 int c_diff = c - c_sub;

	865 int sobel = Abs(a_diff + b_diff * 2 + c_diff);

	866 dst_sobelx[i] = (uint8)(clamp255(sobel));

	867 }

	868 }

	869

	870 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,

	871 uint8* dst_sobely, int width) {

	872 int i;

	873 for (i = 0; i < width; ++i) {

	874 int a = src_y0[i + 0];

	875 int b = src_y0[i + 1];

	876 int c = src_y0[i + 2];

	877 int a_sub = src_y1[i + 0];

	878 int b_sub = src_y1[i + 1];

	879 int c_sub = src_y1[i + 2];

	880 int a_diff = a - a_sub;

	881 int b_diff = b - b_sub;

	882 int c_diff = c - c_sub;

	883 int sobel = Abs(a_diff + b_diff * 2 + c_diff);

	884 dst_sobely[i] = (uint8)(clamp255(sobel));

	885 }

	886 }

	887

	888 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,

	889 uint8* dst_argb, int width) {

	890 int i;

	891 for (i = 0; i < width; ++i) {

	892 int r = src_sobelx[i];

	893 int b = src_sobely[i];

	894 int s = clamp255(r + b);

	895 dst_argb[0] = (uint8)(s);

	896 dst_argb[1] = (uint8)(s);

	897 dst_argb[2] = (uint8)(s);

	898 dst_argb[3] = (uint8)(255u);

	899 dst_argb += 4;

	900 }

	901 }

	902

	903 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,

	904 uint8* dst_y, int width) {

	905 int i;

	906 for (i = 0; i < width; ++i) {

	907 int r = src_sobelx[i];

	908 int b = src_sobely[i];

	909 int s = clamp255(r + b);

	910 dst_y[i] = (uint8)(s);

	911 }

	912 }

	913

	914 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,

	915 uint8* dst_argb, int width) {

	916 int i;

	917 for (i = 0; i < width; ++i) {

	918 int r = src_sobelx[i];

	919 int b = src_sobely[i];

	920 int g = clamp255(r + b);

	921 dst_argb[0] = (uint8)(b);

	922 dst_argb[1] = (uint8)(g);

	923 dst_argb[2] = (uint8)(r);

	924 dst_argb[3] = (uint8)(255u);

	925 dst_argb += 4;

	926 }

	927 }

	928

	929 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {

	930 // Copy a Y to RGB.

	931 int x;

	932 for (x = 0; x < width; ++x) {

	933 uint8 y = src_y[0];

	934 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;

	935 dst_argb[3] = 255u;

	936 dst_argb += 4;

	937 ++src_y;

	938 }

	939 }

	940

	941 // C reference code that mimics the YUV assembly.

	942

	943 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */

	944

	945 #define UB 127 /* min(63,(int8)(2.018 * 64)) */

	946 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */

	947 #define UR 0

	948

	949 #define VB 0

	950 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */

	951 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */

	952

	953 // Bias

	954 #define BB UB * 128 + VB * 128

	955 #define BG UG * 128 + VG * 128

	956 #define BR UR * 128 + VR * 128

	957

	958 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,

	959 uint8* b, uint8* g, uint8* r) {

	960 int32 y1 = ((int32)(y) - 16) * YG;

	961 b = Clamp((int32)((u UB + v * VB) - (BB) + y1) >> 6);

	962 g = Clamp((int32)((u UG + v * VG) - (BG) + y1) >> 6);

	963 r = Clamp((int32)((u UR + v * VR) - (BR) + y1) >> 6);

	964 }

	965

	966 #if !defined(LIBYUV_DISABLE_NEON) && \

	967 (defined(__ARM_NEON__) \|\| defined(LIBYUV_NEON))

	968 // C mimic assembly.

	969 // TODO(fbarchard): Remove subsampling from Neon.

	970 void I444ToARGBRow_C(const uint8* src_y,

	971 const uint8* src_u,

	972 const uint8* src_v,

	973 uint8* rgb_buf,

	974 int width) {

	975 int x;

	976 for (x = 0; x < width - 1; x += 2) {

	977 uint8 u = (src_u[0] + src_u[1] + 1) >> 1;

	978 uint8 v = (src_v[0] + src_v[1] + 1) >> 1;

	979 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	980 rgb_buf[3] = 255;

	981 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	982 rgb_buf[7] = 255;

	983 src_y += 2;

	984 src_u += 2;

	985 src_v += 2;

	986 rgb_buf += 8; // Advance 2 pixels.

	987 }

	988 if (width & 1) {

	989 YuvPixel(src_y[0], src_u[0], src_v[0],

	990 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	991 }

	992 }

	993 #else

	994 void I444ToARGBRow_C(const uint8* src_y,

	995 const uint8* src_u,

	996 const uint8* src_v,

	997 uint8* rgb_buf,

	998 int width) {

	999 int x;

	1000 for (x = 0; x < width; ++x) {

	1001 YuvPixel(src_y[0], src_u[0], src_v[0],

	1002 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1003 rgb_buf[3] = 255;

	1004 src_y += 1;

	1005 src_u += 1;

	1006 src_v += 1;

	1007 rgb_buf += 4; // Advance 1 pixel.

	1008 }

	1009 }

	1010 #endif

	1011 // Also used for 420

	1012 void I422ToARGBRow_C(const uint8* src_y,

	1013 const uint8* src_u,

	1014 const uint8* src_v,

	1015 uint8* rgb_buf,

	1016 int width) {

	1017 int x;

	1018 for (x = 0; x < width - 1; x += 2) {

	1019 YuvPixel(src_y[0], src_u[0], src_v[0],

	1020 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1021 rgb_buf[3] = 255;

	1022 YuvPixel(src_y[1], src_u[0], src_v[0],

	1023 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	1024 rgb_buf[7] = 255;

	1025 src_y += 2;

	1026 src_u += 1;

	1027 src_v += 1;

	1028 rgb_buf += 8; // Advance 2 pixels.

	1029 }

	1030 if (width & 1) {

	1031 YuvPixel(src_y[0], src_u[0], src_v[0],

	1032 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1033 rgb_buf[3] = 255;

	1034 }

	1035 }

	1036

	1037 void I422ToRGB24Row_C(const uint8* src_y,

	1038 const uint8* src_u,

	1039 const uint8* src_v,

	1040 uint8* rgb_buf,

	1041 int width) {

	1042 int x;

	1043 for (x = 0; x < width - 1; x += 2) {

	1044 YuvPixel(src_y[0], src_u[0], src_v[0],

	1045 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1046 YuvPixel(src_y[1], src_u[0], src_v[0],

	1047 rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);

	1048 src_y += 2;

	1049 src_u += 1;

	1050 src_v += 1;

	1051 rgb_buf += 6; // Advance 2 pixels.

	1052 }

	1053 if (width & 1) {

	1054 YuvPixel(src_y[0], src_u[0], src_v[0],

	1055 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1056 }

	1057 }

	1058

	1059 void I422ToRAWRow_C(const uint8* src_y,

	1060 const uint8* src_u,

	1061 const uint8* src_v,

	1062 uint8* rgb_buf,

	1063 int width) {

	1064 int x;

	1065 for (x = 0; x < width - 1; x += 2) {

	1066 YuvPixel(src_y[0], src_u[0], src_v[0],

	1067 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);

	1068 YuvPixel(src_y[1], src_u[0], src_v[0],

	1069 rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);

	1070 src_y += 2;

	1071 src_u += 1;

	1072 src_v += 1;

	1073 rgb_buf += 6; // Advance 2 pixels.

	1074 }

	1075 if (width & 1) {

	1076 YuvPixel(src_y[0], src_u[0], src_v[0],

	1077 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);

	1078 }

	1079 }

	1080

	1081 void I422ToARGB4444Row_C(const uint8* src_y,

	1082 const uint8* src_u,

	1083 const uint8* src_v,

	1084 uint8* dst_argb4444,

	1085 int width) {

	1086 uint8 b0;

	1087 uint8 g0;

	1088 uint8 r0;

	1089 uint8 b1;

	1090 uint8 g1;

	1091 uint8 r1;

	1092 int x;

	1093 for (x = 0; x < width - 1; x += 2) {

	1094 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);

	1095 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);

	1096 b0 = b0 >> 4;

	1097 g0 = g0 >> 4;

	1098 r0 = r0 >> 4;

	1099 b1 = b1 >> 4;

	1100 g1 = g1 >> 4;

	1101 r1 = r1 >> 4;

	1102 (uint32)(dst_argb4444) = b0 \| (g0 << 4) \| (r0 << 8) \|

	1103 (b1 << 16) \| (g1 << 20) \| (r1 << 24) \| 0xf000f000;

	1104 src_y += 2;

	1105 src_u += 1;

	1106 src_v += 1;

	1107 dst_argb4444 += 4; // Advance 2 pixels.

	1108 }

	1109 if (width & 1) {

	1110 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);

	1111 b0 = b0 >> 4;

	1112 g0 = g0 >> 4;

	1113 r0 = r0 >> 4;

	1114 (uint16)(dst_argb4444) = b0 \| (g0 << 4) \| (r0 << 8) \|

	1115 0xf000;

	1116 }

	1117 }

	1118

	1119 void I422ToARGB1555Row_C(const uint8* src_y,

	1120 const uint8* src_u,

	1121 const uint8* src_v,

	1122 uint8* dst_argb1555,

	1123 int width) {

	1124 uint8 b0;

	1125 uint8 g0;

	1126 uint8 r0;

	1127 uint8 b1;

	1128 uint8 g1;

	1129 uint8 r1;

	1130 int x;

	1131 for (x = 0; x < width - 1; x += 2) {

	1132 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);

	1133 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);

	1134 b0 = b0 >> 3;

	1135 g0 = g0 >> 3;

	1136 r0 = r0 >> 3;

	1137 b1 = b1 >> 3;

	1138 g1 = g1 >> 3;

	1139 r1 = r1 >> 3;

	1140 (uint32)(dst_argb1555) = b0 \| (g0 << 5) \| (r0 << 10) \|

	1141 (b1 << 16) \| (g1 << 21) \| (r1 << 26) \| 0x80008000;

	1142 src_y += 2;

	1143 src_u += 1;

	1144 src_v += 1;

	1145 dst_argb1555 += 4; // Advance 2 pixels.

	1146 }

	1147 if (width & 1) {

	1148 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);

	1149 b0 = b0 >> 3;

	1150 g0 = g0 >> 3;

	1151 r0 = r0 >> 3;

	1152 (uint16)(dst_argb1555) = b0 \| (g0 << 5) \| (r0 << 10) \|

	1153 0x8000;

	1154 }

	1155 }

	1156

	1157 void I422ToRGB565Row_C(const uint8* src_y,

	1158 const uint8* src_u,

	1159 const uint8* src_v,

	1160 uint8* dst_rgb565,

	1161 int width) {

	1162 uint8 b0;

	1163 uint8 g0;

	1164 uint8 r0;

	1165 uint8 b1;

	1166 uint8 g1;

	1167 uint8 r1;

	1168 int x;

	1169 for (x = 0; x < width - 1; x += 2) {

	1170 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);

	1171 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);

	1172 b0 = b0 >> 3;

	1173 g0 = g0 >> 2;

	1174 r0 = r0 >> 3;

	1175 b1 = b1 >> 3;

	1176 g1 = g1 >> 2;

	1177 r1 = r1 >> 3;

	1178 (uint32)(dst_rgb565) = b0 \| (g0 << 5) \| (r0 << 11) \|

	1179 (b1 << 16) \| (g1 << 21) \| (r1 << 27);

	1180 src_y += 2;

	1181 src_u += 1;

	1182 src_v += 1;

	1183 dst_rgb565 += 4; // Advance 2 pixels.

	1184 }

	1185 if (width & 1) {

	1186 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);

	1187 b0 = b0 >> 3;

	1188 g0 = g0 >> 2;

	1189 r0 = r0 >> 3;

	1190 (uint16)(dst_rgb565) = b0 \| (g0 << 5) \| (r0 << 11);

	1191 }

	1192 }

	1193

	1194 void I411ToARGBRow_C(const uint8* src_y,

	1195 const uint8* src_u,

	1196 const uint8* src_v,

	1197 uint8* rgb_buf,

	1198 int width) {

	1199 int x;

	1200 for (x = 0; x < width - 3; x += 4) {

	1201 YuvPixel(src_y[0], src_u[0], src_v[0],

	1202 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1203 rgb_buf[3] = 255;

	1204 YuvPixel(src_y[1], src_u[0], src_v[0],

	1205 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	1206 rgb_buf[7] = 255;

	1207 YuvPixel(src_y[2], src_u[0], src_v[0],

	1208 rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);

	1209 rgb_buf[11] = 255;

	1210 YuvPixel(src_y[3], src_u[0], src_v[0],

	1211 rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);

	1212 rgb_buf[15] = 255;

	1213 src_y += 4;

	1214 src_u += 1;

	1215 src_v += 1;

	1216 rgb_buf += 16; // Advance 4 pixels.

	1217 }

	1218 if (width & 2) {

	1219 YuvPixel(src_y[0], src_u[0], src_v[0],

	1220 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1221 rgb_buf[3] = 255;

	1222 YuvPixel(src_y[1], src_u[0], src_v[0],

	1223 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	1224 rgb_buf[7] = 255;

	1225 src_y += 2;

	1226 rgb_buf += 8; // Advance 2 pixels.

	1227 }

	1228 if (width & 1) {

	1229 YuvPixel(src_y[0], src_u[0], src_v[0],

	1230 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1231 rgb_buf[3] = 255;

	1232 }

	1233 }

	1234

	1235 void NV12ToARGBRow_C(const uint8* src_y,

	1236 const uint8* usrc_v,

	1237 uint8* rgb_buf,

	1238 int width) {

	1239 int x;

	1240 for (x = 0; x < width - 1; x += 2) {

	1241 YuvPixel(src_y[0], usrc_v[0], usrc_v[1],

	1242 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1243 rgb_buf[3] = 255;

	1244 YuvPixel(src_y[1], usrc_v[0], usrc_v[1],

	1245 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	1246 rgb_buf[7] = 255;

	1247 src_y += 2;

	1248 usrc_v += 2;

	1249 rgb_buf += 8; // Advance 2 pixels.

	1250 }

	1251 if (width & 1) {

	1252 YuvPixel(src_y[0], usrc_v[0], usrc_v[1],

	1253 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1254 rgb_buf[3] = 255;

	1255 }

	1256 }

	1257

	1258 void NV21ToARGBRow_C(const uint8* src_y,

	1259 const uint8* src_vu,

	1260 uint8* rgb_buf,

	1261 int width) {

	1262 int x;

	1263 for (x = 0; x < width - 1; x += 2) {

	1264 YuvPixel(src_y[0], src_vu[1], src_vu[0],

	1265 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1266 rgb_buf[3] = 255;

	1267

	1268 YuvPixel(src_y[1], src_vu[1], src_vu[0],

	1269 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	1270 rgb_buf[7] = 255;

	1271

	1272 src_y += 2;

	1273 src_vu += 2;

	1274 rgb_buf += 8; // Advance 2 pixels.

	1275 }

	1276 if (width & 1) {

	1277 YuvPixel(src_y[0], src_vu[1], src_vu[0],

	1278 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1279 rgb_buf[3] = 255;

	1280 }

	1281 }

	1282

	1283 void NV12ToRGB565Row_C(const uint8* src_y,

	1284 const uint8* usrc_v,

	1285 uint8* dst_rgb565,

	1286 int width) {

	1287 uint8 b0;

	1288 uint8 g0;

	1289 uint8 r0;

	1290 uint8 b1;

	1291 uint8 g1;

	1292 uint8 r1;

	1293 int x;

	1294 for (x = 0; x < width - 1; x += 2) {

	1295 YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);

	1296 YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);

	1297 b0 = b0 >> 3;

	1298 g0 = g0 >> 2;

	1299 r0 = r0 >> 3;

	1300 b1 = b1 >> 3;

	1301 g1 = g1 >> 2;

	1302 r1 = r1 >> 3;

	1303 (uint32)(dst_rgb565) = b0 \| (g0 << 5) \| (r0 << 11) \|

	1304 (b1 << 16) \| (g1 << 21) \| (r1 << 27);

	1305 src_y += 2;

	1306 usrc_v += 2;

	1307 dst_rgb565 += 4; // Advance 2 pixels.

	1308 }

	1309 if (width & 1) {

	1310 YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);

	1311 b0 = b0 >> 3;

	1312 g0 = g0 >> 2;

	1313 r0 = r0 >> 3;

	1314 (uint16)(dst_rgb565) = b0 \| (g0 << 5) \| (r0 << 11);

	1315 }

	1316 }

	1317

	1318 void NV21ToRGB565Row_C(const uint8* src_y,

	1319 const uint8* vsrc_u,

	1320 uint8* dst_rgb565,

	1321 int width) {

	1322 uint8 b0;

	1323 uint8 g0;

	1324 uint8 r0;

	1325 uint8 b1;

	1326 uint8 g1;

	1327 uint8 r1;

	1328 int x;

	1329 for (x = 0; x < width - 1; x += 2) {

	1330 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);

	1331 YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);

	1332 b0 = b0 >> 3;

	1333 g0 = g0 >> 2;

	1334 r0 = r0 >> 3;

	1335 b1 = b1 >> 3;

	1336 g1 = g1 >> 2;

	1337 r1 = r1 >> 3;

	1338 (uint32)(dst_rgb565) = b0 \| (g0 << 5) \| (r0 << 11) \|

	1339 (b1 << 16) \| (g1 << 21) \| (r1 << 27);

	1340 src_y += 2;

	1341 vsrc_u += 2;

	1342 dst_rgb565 += 4; // Advance 2 pixels.

	1343 }

	1344 if (width & 1) {

	1345 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);

	1346 b0 = b0 >> 3;

	1347 g0 = g0 >> 2;

	1348 r0 = r0 >> 3;

	1349 (uint16)(dst_rgb565) = b0 \| (g0 << 5) \| (r0 << 11);

	1350 }

	1351 }

	1352

	1353 void YUY2ToARGBRow_C(const uint8* src_yuy2,

	1354 uint8* rgb_buf,

	1355 int width) {

	1356 int x;

	1357 for (x = 0; x < width - 1; x += 2) {

	1358 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],

	1359 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1360 rgb_buf[3] = 255;

	1361 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],

	1362 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	1363 rgb_buf[7] = 255;

	1364 src_yuy2 += 4;

	1365 rgb_buf += 8; // Advance 2 pixels.

	1366 }

	1367 if (width & 1) {

	1368 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],

	1369 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1370 rgb_buf[3] = 255;

	1371 }

	1372 }

	1373

	1374 void UYVYToARGBRow_C(const uint8* src_uyvy,

	1375 uint8* rgb_buf,

	1376 int width) {

	1377 int x;

	1378 for (x = 0; x < width - 1; x += 2) {

	1379 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],

	1380 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1381 rgb_buf[3] = 255;

	1382 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],

	1383 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	1384 rgb_buf[7] = 255;

	1385 src_uyvy += 4;

	1386 rgb_buf += 8; // Advance 2 pixels.

	1387 }

	1388 if (width & 1) {

	1389 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],

	1390 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1391 rgb_buf[3] = 255;

	1392 }

	1393 }

	1394

	1395 void I422ToBGRARow_C(const uint8* src_y,

	1396 const uint8* src_u,

	1397 const uint8* src_v,

	1398 uint8* rgb_buf,

	1399 int width) {

	1400 int x;

	1401 for (x = 0; x < width - 1; x += 2) {

	1402 YuvPixel(src_y[0], src_u[0], src_v[0],

	1403 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);

	1404 rgb_buf[0] = 255;

	1405 YuvPixel(src_y[1], src_u[0], src_v[0],

	1406 rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);

	1407 rgb_buf[4] = 255;

	1408 src_y += 2;

	1409 src_u += 1;

	1410 src_v += 1;

	1411 rgb_buf += 8; // Advance 2 pixels.

	1412 }

	1413 if (width & 1) {

	1414 YuvPixel(src_y[0], src_u[0], src_v[0],

	1415 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);

	1416 rgb_buf[0] = 255;

	1417 }

	1418 }

	1419

	1420 void I422ToABGRRow_C(const uint8* src_y,

	1421 const uint8* src_u,

	1422 const uint8* src_v,

	1423 uint8* rgb_buf,

	1424 int width) {

	1425 int x;

	1426 for (x = 0; x < width - 1; x += 2) {

	1427 YuvPixel(src_y[0], src_u[0], src_v[0],

	1428 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);

	1429 rgb_buf[3] = 255;

	1430 YuvPixel(src_y[1], src_u[0], src_v[0],

	1431 rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);

	1432 rgb_buf[7] = 255;

	1433 src_y += 2;

	1434 src_u += 1;

	1435 src_v += 1;

	1436 rgb_buf += 8; // Advance 2 pixels.

	1437 }

	1438 if (width & 1) {

	1439 YuvPixel(src_y[0], src_u[0], src_v[0],

	1440 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);

	1441 rgb_buf[3] = 255;

	1442 }

	1443 }

	1444

	1445 void I422ToRGBARow_C(const uint8* src_y,

	1446 const uint8* src_u,

	1447 const uint8* src_v,

	1448 uint8* rgb_buf,

	1449 int width) {

	1450 int x;

	1451 for (x = 0; x < width - 1; x += 2) {

	1452 YuvPixel(src_y[0], src_u[0], src_v[0],

	1453 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);

	1454 rgb_buf[0] = 255;

	1455 YuvPixel(src_y[1], src_u[0], src_v[0],

	1456 rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);

	1457 rgb_buf[4] = 255;

	1458 src_y += 2;

	1459 src_u += 1;

	1460 src_v += 1;

	1461 rgb_buf += 8; // Advance 2 pixels.

	1462 }

	1463 if (width & 1) {

	1464 YuvPixel(src_y[0], src_u[0], src_v[0],

	1465 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);

	1466 rgb_buf[0] = 255;

	1467 }

	1468 }

	1469

	1470 void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {

	1471 int x;

	1472 for (x = 0; x < width - 1; x += 2) {

	1473 YuvPixel(src_y[0], 128, 128,

	1474 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1475 rgb_buf[3] = 255;

	1476 YuvPixel(src_y[1], 128, 128,

	1477 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);

	1478 rgb_buf[7] = 255;

	1479 src_y += 2;

	1480 rgb_buf += 8; // Advance 2 pixels.

	1481 }

	1482 if (width & 1) {

	1483 YuvPixel(src_y[0], 128, 128,

	1484 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);

	1485 rgb_buf[3] = 255;

	1486 }

	1487 }

	1488

	1489 void MirrorRow_C(const uint8* src, uint8* dst, int width) {

	1490 int x;

	1491 src += width - 1;

	1492 for (x = 0; x < width - 1; x += 2) {

	1493 dst[x] = src[0];

	1494 dst[x + 1] = src[-1];

	1495 src -= 2;

	1496 }

	1497 if (width & 1) {

	1498 dst[width - 1] = src[0];

	1499 }

	1500 }

	1501

	1502 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {

	1503 int x;

	1504 src_uv += (width - 1) << 1;

	1505 for (x = 0; x < width - 1; x += 2) {

	1506 dst_u[x] = src_uv[0];

	1507 dst_u[x + 1] = src_uv[-2];

	1508 dst_v[x] = src_uv[1];

	1509 dst_v[x + 1] = src_uv[-2 + 1];

	1510 src_uv -= 4;

	1511 }

	1512 if (width & 1) {

	1513 dst_u[width - 1] = src_uv[0];

	1514 dst_v[width - 1] = src_uv[1];

	1515 }

	1516 }

	1517

	1518 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {

	1519 int x;

	1520 const uint32* src32 = (const uint32*)(src);

	1521 uint32* dst32 = (uint32*)(dst);

	1522 src32 += width - 1;

	1523 for (x = 0; x < width - 1; x += 2) {

	1524 dst32[x] = src32[0];

	1525 dst32[x + 1] = src32[-1];

	1526 src32 -= 2;

	1527 }

	1528 if (width & 1) {

	1529 dst32[width - 1] = src32[0];

	1530 }

	1531 }

	1532

	1533 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {

	1534 int x;

	1535 for (x = 0; x < width - 1; x += 2) {

	1536 dst_u[x] = src_uv[0];

	1537 dst_u[x + 1] = src_uv[2];

	1538 dst_v[x] = src_uv[1];

	1539 dst_v[x + 1] = src_uv[3];

	1540 src_uv += 4;

	1541 }

	1542 if (width & 1) {

	1543 dst_u[width - 1] = src_uv[0];

	1544 dst_v[width - 1] = src_uv[1];

	1545 }

	1546 }

	1547

	1548 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,

	1549 int width) {

	1550 int x;

	1551 for (x = 0; x < width - 1; x += 2) {

	1552 dst_uv[0] = src_u[x];

	1553 dst_uv[1] = src_v[x];

	1554 dst_uv[2] = src_u[x + 1];

	1555 dst_uv[3] = src_v[x + 1];

	1556 dst_uv += 4;

	1557 }

	1558 if (width & 1) {

	1559 dst_uv[0] = src_u[width - 1];

	1560 dst_uv[1] = src_v[width - 1];

	1561 }

	1562 }

	1563

	1564 void CopyRow_C(const uint8* src, uint8* dst, int count) {

	1565 memcpy(dst, src, count);

	1566 }

	1567

	1568 void CopyRow_16_C(const uint16* src, uint16* dst, int count) {

	1569 memcpy(dst, src, count * 2);

	1570 }

	1571

	1572 void SetRow_C(uint8* dst, uint32 v8, int count) {

	1573 #ifdef _MSC_VER

	1574 // VC will generate rep stosb.

	1575 int x;

	1576 for (x = 0; x < count; ++x) {

	1577 dst[x] = v8;

	1578 }

	1579 #else

	1580 memset(dst, v8, count);

	1581 #endif

	1582 }

	1583

	1584 void ARGBSetRows_C(uint8* dst, uint32 v32, int width,

	1585 int dst_stride, int height) {

	1586 int y;

	1587 for (y = 0; y < height; ++y) {

	1588 uint32* d = (uint32*)(dst);

	1589 int x;

	1590 for (x = 0; x < width; ++x) {

	1591 d[x] = v32;

	1592 }

	1593 dst += dst_stride;

	1594 }

	1595 }

	1596

	1597 // Filter 2 rows of YUY2 UV's (422) into U and V (420).

	1598 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,

	1599 uint8* dst_u, uint8* dst_v, int width) {

	1600 // Output a row of UV values, filtering 2 rows of YUY2.

	1601 int x;

	1602 for (x = 0; x < width; x += 2) {

	1603 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;

	1604 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;

	1605 src_yuy2 += 4;

	1606 dst_u += 1;

	1607 dst_v += 1;

	1608 }

	1609 }

	1610

	1611 // Copy row of YUY2 UV's (422) into U and V (422).

	1612 void YUY2ToUV422Row_C(const uint8* src_yuy2,

	1613 uint8* dst_u, uint8* dst_v, int width) {

	1614 // Output a row of UV values.

	1615 int x;

	1616 for (x = 0; x < width; x += 2) {

	1617 dst_u[0] = src_yuy2[1];

	1618 dst_v[0] = src_yuy2[3];

	1619 src_yuy2 += 4;

	1620 dst_u += 1;

	1621 dst_v += 1;

	1622 }

	1623 }

	1624

	1625 // Copy row of YUY2 Y's (422) into Y (420/422).

	1626 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {

	1627 // Output a row of Y values.

	1628 int x;

	1629 for (x = 0; x < width - 1; x += 2) {

	1630 dst_y[x] = src_yuy2[0];

	1631 dst_y[x + 1] = src_yuy2[2];

	1632 src_yuy2 += 4;

	1633 }

	1634 if (width & 1) {

	1635 dst_y[width - 1] = src_yuy2[0];

	1636 }

	1637 }

	1638

	1639 // Filter 2 rows of UYVY UV's (422) into U and V (420).

	1640 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,

	1641 uint8* dst_u, uint8* dst_v, int width) {

	1642 // Output a row of UV values.

	1643 int x;

	1644 for (x = 0; x < width; x += 2) {

	1645 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;

	1646 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;

	1647 src_uyvy += 4;

	1648 dst_u += 1;

	1649 dst_v += 1;

	1650 }

	1651 }

	1652

	1653 // Copy row of UYVY UV's (422) into U and V (422).

	1654 void UYVYToUV422Row_C(const uint8* src_uyvy,

	1655 uint8* dst_u, uint8* dst_v, int width) {

	1656 // Output a row of UV values.

	1657 int x;

	1658 for (x = 0; x < width; x += 2) {

	1659 dst_u[0] = src_uyvy[0];

	1660 dst_v[0] = src_uyvy[2];

	1661 src_uyvy += 4;

	1662 dst_u += 1;

	1663 dst_v += 1;

	1664 }

	1665 }

	1666

	1667 // Copy row of UYVY Y's (422) into Y (420/422).

	1668 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {

	1669 // Output a row of Y values.

	1670 int x;

	1671 for (x = 0; x < width - 1; x += 2) {

	1672 dst_y[x] = src_uyvy[1];

	1673 dst_y[x + 1] = src_uyvy[3];

	1674 src_uyvy += 4;

	1675 }

	1676 if (width & 1) {

	1677 dst_y[width - 1] = src_uyvy[1];

	1678 }

	1679 }

	1680

	1681 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f

	1682

	1683 // Blend src_argb0 over src_argb1 and store to dst_argb.

	1684 // dst_argb may be src_argb0 or src_argb1.

	1685 // This code mimics the SSSE3 version for better testability.

	1686 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,

	1687 uint8* dst_argb, int width) {

	1688 int x;

	1689 for (x = 0; x < width - 1; x += 2) {

	1690 uint32 fb = src_argb0[0];

	1691 uint32 fg = src_argb0[1];

	1692 uint32 fr = src_argb0[2];

	1693 uint32 a = src_argb0[3];

	1694 uint32 bb = src_argb1[0];

	1695 uint32 bg = src_argb1[1];

	1696 uint32 br = src_argb1[2];

	1697 dst_argb[0] = BLEND(fb, bb, a);

	1698 dst_argb[1] = BLEND(fg, bg, a);

	1699 dst_argb[2] = BLEND(fr, br, a);

	1700 dst_argb[3] = 255u;

	1701

	1702 fb = src_argb0[4 + 0];

	1703 fg = src_argb0[4 + 1];

	1704 fr = src_argb0[4 + 2];

	1705 a = src_argb0[4 + 3];

	1706 bb = src_argb1[4 + 0];

	1707 bg = src_argb1[4 + 1];

	1708 br = src_argb1[4 + 2];

	1709 dst_argb[4 + 0] = BLEND(fb, bb, a);

	1710 dst_argb[4 + 1] = BLEND(fg, bg, a);

	1711 dst_argb[4 + 2] = BLEND(fr, br, a);

	1712 dst_argb[4 + 3] = 255u;

	1713 src_argb0 += 8;

	1714 src_argb1 += 8;

	1715 dst_argb += 8;

	1716 }

	1717

	1718 if (width & 1) {

	1719 uint32 fb = src_argb0[0];

	1720 uint32 fg = src_argb0[1];

	1721 uint32 fr = src_argb0[2];

	1722 uint32 a = src_argb0[3];

	1723 uint32 bb = src_argb1[0];

	1724 uint32 bg = src_argb1[1];

	1725 uint32 br = src_argb1[2];

	1726 dst_argb[0] = BLEND(fb, bb, a);

	1727 dst_argb[1] = BLEND(fg, bg, a);

	1728 dst_argb[2] = BLEND(fr, br, a);

	1729 dst_argb[3] = 255u;

	1730 }

	1731 }

	1732 #undef BLEND

	1733 #define ATTENUATE(f, a) (a \| (a << 8)) * (f \| (f << 8)) >> 24

	1734

	1735 // Multiply source RGB by alpha and store to destination.

	1736 // This code mimics the SSSE3 version for better testability.

	1737 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {

	1738 int i;

	1739 for (i = 0; i < width - 1; i += 2) {

	1740 uint32 b = src_argb[0];

	1741 uint32 g = src_argb[1];

	1742 uint32 r = src_argb[2];

	1743 uint32 a = src_argb[3];

	1744 dst_argb[0] = ATTENUATE(b, a);

	1745 dst_argb[1] = ATTENUATE(g, a);

	1746 dst_argb[2] = ATTENUATE(r, a);

	1747 dst_argb[3] = a;

	1748 b = src_argb[4];

	1749 g = src_argb[5];

	1750 r = src_argb[6];

	1751 a = src_argb[7];

	1752 dst_argb[4] = ATTENUATE(b, a);

	1753 dst_argb[5] = ATTENUATE(g, a);

	1754 dst_argb[6] = ATTENUATE(r, a);

	1755 dst_argb[7] = a;

	1756 src_argb += 8;

	1757 dst_argb += 8;

	1758 }

	1759

	1760 if (width & 1) {

	1761 const uint32 b = src_argb[0];

	1762 const uint32 g = src_argb[1];

	1763 const uint32 r = src_argb[2];

	1764 const uint32 a = src_argb[3];

	1765 dst_argb[0] = ATTENUATE(b, a);

	1766 dst_argb[1] = ATTENUATE(g, a);

	1767 dst_argb[2] = ATTENUATE(r, a);

	1768 dst_argb[3] = a;

	1769 }

	1770 }

	1771 #undef ATTENUATE

	1772

	1773 // Divide source RGB by alpha and store to destination.

	1774 // b = (b * 255 + (a / 2)) / a;

	1775 // g = (g * 255 + (a / 2)) / a;

	1776 // r = (r * 255 + (a / 2)) / a;

	1777 // Reciprocal method is off by 1 on some values. ie 125

	1778 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.

	1779 #define T(a) 0x01000000 + (0x10000 / a)

	1780 const uint32 fixed_invtbl8[256] = {

	1781 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),

	1782 T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),

	1783 T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),

	1784 T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),

	1785 T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),

	1786 T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),

	1787 T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),

	1788 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),

	1789 T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),

	1790 T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),

	1791 T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),

	1792 T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),

	1793 T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),

	1794 T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),

	1795 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),

	1796 T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),

	1797 T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),

	1798 T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),

	1799 T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),

	1800 T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),

	1801 T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),

	1802 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),

	1803 T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),

	1804 T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),

	1805 T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),

	1806 T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),

	1807 T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),

	1808 T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),

	1809 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),

	1810 T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),

	1811 T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),

	1812 T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };

	1813 #undef T

	1814

	1815 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {

	1816 int i;

	1817 for (i = 0; i < width; ++i) {

	1818 uint32 b = src_argb[0];

	1819 uint32 g = src_argb[1];

	1820 uint32 r = src_argb[2];

	1821 const uint32 a = src_argb[3];

	1822 const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point

	1823 b = (b * ia) >> 8;

	1824 g = (g * ia) >> 8;

	1825 r = (r * ia) >> 8;

	1826 // Clamping should not be necessary but is free in assembly.

	1827 dst_argb[0] = clamp255(b);

	1828 dst_argb[1] = clamp255(g);

	1829 dst_argb[2] = clamp255(r);

	1830 dst_argb[3] = a;

	1831 src_argb += 4;

	1832 dst_argb += 4;

	1833 }

	1834 }

	1835

	1836 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,

	1837 const int32* previous_cumsum, int width) {

	1838 int32 row_sum[4] = {0, 0, 0, 0};

	1839 int x;

	1840 for (x = 0; x < width; ++x) {

	1841 row_sum[0] += row[x * 4 + 0];

	1842 row_sum[1] += row[x * 4 + 1];

	1843 row_sum[2] += row[x * 4 + 2];

	1844 row_sum[3] += row[x * 4 + 3];

	1845 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];

	1846 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];

	1847 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];

	1848 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];

	1849 }

	1850 }

	1851

	1852 void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,

	1853 int w, int area, uint8* dst, int count) {

	1854 float ooa = 1.0f / area;

	1855 int i;

	1856 for (i = 0; i < count; ++i) {

	1857 dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);

	1858 dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);

	1859 dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);

	1860 dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);

	1861 dst += 4;

	1862 tl += 4;

	1863 bl += 4;

	1864 }

	1865 }

	1866

	1867 // Copy pixels from rotated source to destination row with a slope.

	1868 LIBYUV_API

	1869 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,

	1870 uint8* dst_argb, const float* uv_dudv, int width) {

	1871 int i;

	1872 // Render a row of pixels from source into a buffer.

	1873 float uv[2];

	1874 uv[0] = uv_dudv[0];

	1875 uv[1] = uv_dudv[1];

	1876 for (i = 0; i < width; ++i) {

	1877 int x = (int)(uv[0]);

	1878 int y = (int)(uv[1]);

	1879 (uint32)(dst_argb) =

	1880 (const uint32)(src_argb + y * src_argb_stride +

	1881 x * 4);

	1882 dst_argb += 4;

	1883 uv[0] += uv_dudv[2];

	1884 uv[1] += uv_dudv[3];

	1885 }

	1886 }

	1887

	1888 // Blend 2 rows into 1 for conversions such as I422ToI420.

	1889 void HalfRow_C(const uint8* src_uv, int src_uv_stride,

	1890 uint8* dst_uv, int pix) {

	1891 int x;

	1892 for (x = 0; x < pix; ++x) {

	1893 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;

	1894 }

	1895 }

	1896

	1897 void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,

	1898 uint16* dst_uv, int pix) {

	1899 int x;

	1900 for (x = 0; x < pix; ++x) {

	1901 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;

	1902 }

	1903 }

	1904

	1905 // C version 2x2 -> 2x1.

	1906 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,

	1907 ptrdiff_t src_stride,

	1908 int width, int source_y_fraction) {

	1909 int y1_fraction = source_y_fraction;

	1910 int y0_fraction = 256 - y1_fraction;

	1911 const uint8* src_ptr1 = src_ptr + src_stride;

	1912 int x;

	1913 if (source_y_fraction == 0) {

	1914 memcpy(dst_ptr, src_ptr, width);

	1915 return;

	1916 }

	1917 if (source_y_fraction == 128) {

	1918 HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);

	1919 return;

	1920 }

	1921 for (x = 0; x < width - 1; x += 2) {

	1922 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;

	1923 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;

	1924 src_ptr += 2;

	1925 src_ptr1 += 2;

	1926 dst_ptr += 2;

	1927 }

	1928 if (width & 1) {

	1929 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;

	1930 }

	1931 }

	1932

	1933 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,

	1934 ptrdiff_t src_stride,

	1935 int width, int source_y_fraction) {

	1936 int y1_fraction = source_y_fraction;

	1937 int y0_fraction = 256 - y1_fraction;

	1938 const uint16* src_ptr1 = src_ptr + src_stride;

	1939 int x;

	1940 if (source_y_fraction == 0) {

	1941 memcpy(dst_ptr, src_ptr, width * 2);

	1942 return;

	1943 }

	1944 if (source_y_fraction == 128) {

	1945 HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);

	1946 return;

	1947 }

	1948 for (x = 0; x < width - 1; x += 2) {

	1949 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;

	1950 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;

	1951 src_ptr += 2;

	1952 src_ptr1 += 2;

	1953 dst_ptr += 2;

	1954 }

	1955 if (width & 1) {

	1956 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;

	1957 }

	1958 }

	1959

	1960 // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG

	1961 void ARGBToBayerRow_C(const uint8* src_argb,

	1962 uint8* dst_bayer, uint32 selector, int pix) {

	1963 int index0 = selector & 0xff;

	1964 int index1 = (selector >> 8) & 0xff;

	1965 // Copy a row of Bayer.

	1966 int x;

	1967 for (x = 0; x < pix - 1; x += 2) {

	1968 dst_bayer[0] = src_argb[index0];

	1969 dst_bayer[1] = src_argb[index1];

	1970 src_argb += 8;

	1971 dst_bayer += 2;

	1972 }

	1973 if (pix & 1) {

	1974 dst_bayer[0] = src_argb[index0];

	1975 }

	1976 }

	1977

	1978 // Select G channel from ARGB. e.g. GGGGGGGG

	1979 void ARGBToBayerGGRow_C(const uint8* src_argb,

	1980 uint8* dst_bayer, uint32 selector, int pix) {

	1981 // Copy a row of G.

	1982 int x;

	1983 for (x = 0; x < pix - 1; x += 2) {

	1984 dst_bayer[0] = src_argb[1];

	1985 dst_bayer[1] = src_argb[5];

	1986 src_argb += 8;

	1987 dst_bayer += 2;

	1988 }

	1989 if (pix & 1) {

	1990 dst_bayer[0] = src_argb[1];

	1991 }

	1992 }

	1993

	1994 // Use first 4 shuffler values to reorder ARGB channels.

	1995 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,

	1996 const uint8* shuffler, int pix) {

	1997 int index0 = shuffler[0];

	1998 int index1 = shuffler[1];

	1999 int index2 = shuffler[2];

	2000 int index3 = shuffler[3];

	2001 // Shuffle a row of ARGB.

	2002 int x;

	2003 for (x = 0; x < pix; ++x) {

	2004 // To support in-place conversion.

	2005 uint8 b = src_argb[index0];

	2006 uint8 g = src_argb[index1];

	2007 uint8 r = src_argb[index2];

	2008 uint8 a = src_argb[index3];

	2009 dst_argb[0] = b;

	2010 dst_argb[1] = g;

	2011 dst_argb[2] = r;

	2012 dst_argb[3] = a;

	2013 src_argb += 4;

	2014 dst_argb += 4;

	2015 }

	2016 }

	2017

	2018 void I422ToYUY2Row_C(const uint8* src_y,

	2019 const uint8* src_u,

	2020 const uint8* src_v,

	2021 uint8* dst_frame, int width) {

	2022 int x;

	2023 for (x = 0; x < width - 1; x += 2) {

	2024 dst_frame[0] = src_y[0];

	2025 dst_frame[1] = src_u[0];

	2026 dst_frame[2] = src_y[1];

	2027 dst_frame[3] = src_v[0];

	2028 dst_frame += 4;

	2029 src_y += 2;

	2030 src_u += 1;

	2031 src_v += 1;

	2032 }

	2033 if (width & 1) {

	2034 dst_frame[0] = src_y[0];

	2035 dst_frame[1] = src_u[0];

	2036 dst_frame[2] = src_y[0]; // duplicate last y

	2037 dst_frame[3] = src_v[0];

	2038 }

	2039 }

	2040

	2041 void I422ToUYVYRow_C(const uint8* src_y,

	2042 const uint8* src_u,

	2043 const uint8* src_v,

	2044 uint8* dst_frame, int width) {

	2045 int x;

	2046 for (x = 0; x < width - 1; x += 2) {

	2047 dst_frame[0] = src_u[0];

	2048 dst_frame[1] = src_y[0];

	2049 dst_frame[2] = src_v[0];

	2050 dst_frame[3] = src_y[1];

	2051 dst_frame += 4;

	2052 src_y += 2;

	2053 src_u += 1;

	2054 src_v += 1;

	2055 }

	2056 if (width & 1) {

	2057 dst_frame[0] = src_u[0];

	2058 dst_frame[1] = src_y[0];

	2059 dst_frame[2] = src_v[0];

	2060 dst_frame[3] = src_y[0]; // duplicate last y

	2061 }

	2062 }

	2063

	2064 #if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)

	2065 // row_win.cc has asm version, but GCC uses 2 step wrapper.

	2066 #if !defined(_MSC_VER) && (defined(__x86_64__) \|\| defined(__i386__))

	2067 void I422ToRGB565Row_SSSE3(const uint8* src_y,

	2068 const uint8* src_u,

	2069 const uint8* src_v,

	2070 uint8* rgb_buf,

	2071 int width) {

	2072 // Allocate a row of ARGB.

	2073 align_buffer_64(row, width * 4);

	2074 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);

	2075 ARGBToRGB565Row_SSE2(row, rgb_buf, width);

	2076 free_aligned_buffer_64(row);

	2077 }

	2078 #endif // !defined(_MSC_VER) && (defined(__x86_64__) \|\| defined(__i386__))

	2079

	2080 #if defined(_M_IX86) \|\| defined(__x86_64__) \|\| defined(__i386__)

	2081 void I422ToARGB1555Row_SSSE3(const uint8* src_y,

	2082 const uint8* src_u,

	2083 const uint8* src_v,

	2084 uint8* rgb_buf,

	2085 int width) {

	2086 // Allocate a row of ARGB.

	2087 align_buffer_64(row, width * 4);

	2088 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);

	2089 ARGBToARGB1555Row_SSE2(row, rgb_buf, width);

	2090 free_aligned_buffer_64(row);

	2091 }

	2092

	2093 void I422ToARGB4444Row_SSSE3(const uint8* src_y,

	2094 const uint8* src_u,

	2095 const uint8* src_v,

	2096 uint8* rgb_buf,

	2097 int width) {

	2098 // Allocate a row of ARGB.

	2099 align_buffer_64(row, width * 4);

	2100 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);

	2101 ARGBToARGB4444Row_SSE2(row, rgb_buf, width);

	2102 free_aligned_buffer_64(row);

	2103 }

	2104

	2105 void NV12ToRGB565Row_SSSE3(const uint8* src_y,

	2106 const uint8* src_uv,

	2107 uint8* dst_rgb565,

	2108 int width) {

	2109 // Allocate a row of ARGB.

	2110 align_buffer_64(row, width * 4);

	2111 NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);

	2112 ARGBToRGB565Row_SSE2(row, dst_rgb565, width);

	2113 free_aligned_buffer_64(row);

	2114 }

	2115

	2116 void NV21ToRGB565Row_SSSE3(const uint8* src_y,

	2117 const uint8* src_vu,

	2118 uint8* dst_rgb565,

	2119 int width) {

	2120 // Allocate a row of ARGB.

	2121 align_buffer_64(row, width * 4);

	2122 NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);

	2123 ARGBToRGB565Row_SSE2(row, dst_rgb565, width);

	2124 free_aligned_buffer_64(row);

	2125 }

	2126

	2127 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,

	2128 uint8* dst_argb,

	2129 int width) {

	2130 // Allocate a rows of yuv.

	2131 align_buffer_64(row_y, ((width + 63) & ~63) * 2);

	2132 uint8* row_u = row_y + ((width + 63) & ~63);

	2133 uint8* row_v = row_u + ((width + 63) & ~63) / 2;

	2134 YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);

	2135 YUY2ToYRow_SSE2(src_yuy2, row_y, width);

	2136 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);

	2137 free_aligned_buffer_64(row_y);

	2138 }

	2139

	2140 void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,

	2141 uint8* dst_argb,

	2142 int width) {

	2143 // Allocate a rows of yuv.

	2144 align_buffer_64(row_y, ((width + 63) & ~63) * 2);

	2145 uint8* row_u = row_y + ((width + 63) & ~63);

	2146 uint8* row_v = row_u + ((width + 63) & ~63) / 2;

	2147 YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);

	2148 YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);

	2149 I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);

	2150 free_aligned_buffer_64(row_y);

	2151 }

	2152

	2153 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,

	2154 uint8* dst_argb,

	2155 int width) {

	2156 // Allocate a rows of yuv.

	2157 align_buffer_64(row_y, ((width + 63) & ~63) * 2);

	2158 uint8* row_u = row_y + ((width + 63) & ~63);

	2159 uint8* row_v = row_u + ((width + 63) & ~63) / 2;

	2160 UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);

	2161 UYVYToYRow_SSE2(src_uyvy, row_y, width);

	2162 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);

	2163 free_aligned_buffer_64(row_y);

	2164 }

	2165

	2166 void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,

	2167 uint8* dst_argb,

	2168 int width) {

	2169 // Allocate a rows of yuv.

	2170 align_buffer_64(row_y, ((width + 63) & ~63) * 2);

	2171 uint8* row_u = row_y + ((width + 63) & ~63);

	2172 uint8* row_v = row_u + ((width + 63) & ~63) / 2;

	2173 UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);

	2174 UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);

	2175 I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);

	2176 free_aligned_buffer_64(row_y);

	2177 }

	2178

	2179 #endif // defined(_M_IX86) \|\| defined(__x86_64__) \|\| defined(__i386__)

	2180 #endif // !defined(LIBYUV_DISABLE_X86)

	2181

	2182 void ARGBPolynomialRow_C(const uint8* src_argb,

	2183 uint8* dst_argb, const float* poly,

	2184 int width) {

	2185 int i;

	2186 for (i = 0; i < width; ++i) {

	2187 float b = (float)(src_argb[0]);

	2188 float g = (float)(src_argb[1]);

	2189 float r = (float)(src_argb[2]);

	2190 float a = (float)(src_argb[3]);

	2191 float b2 = b * b;

	2192 float g2 = g * g;

	2193 float r2 = r * r;

	2194 float a2 = a * a;

	2195 float db = poly[0] + poly[4] * b;

	2196 float dg = poly[1] + poly[5] * g;

	2197 float dr = poly[2] + poly[6] * r;

	2198 float da = poly[3] + poly[7] * a;

	2199 float b3 = b2 * b;

	2200 float g3 = g2 * g;

	2201 float r3 = r2 * r;

	2202 float a3 = a2 * a;

	2203 db += poly[8] * b2;

	2204 dg += poly[9] * g2;

	2205 dr += poly[10] * r2;

	2206 da += poly[11] * a2;

	2207 db += poly[12] * b3;

	2208 dg += poly[13] * g3;

	2209 dr += poly[14] * r3;

	2210 da += poly[15] * a3;

	2211

	2212 dst_argb[0] = Clamp((int32)(db));

	2213 dst_argb[1] = Clamp((int32)(dg));

	2214 dst_argb[2] = Clamp((int32)(dr));

	2215 dst_argb[3] = Clamp((int32)(da));

	2216 src_argb += 4;

	2217 dst_argb += 4;

	2218 }

	2219 }

	2220

	2221 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,

	2222 const uint8* luma, uint32 lumacoeff) {

	2223 uint32 bc = lumacoeff & 0xff;

	2224 uint32 gc = (lumacoeff >> 8) & 0xff;

	2225 uint32 rc = (lumacoeff >> 16) & 0xff;

	2226

	2227 int i;

	2228 for (i = 0; i < width - 1; i += 2) {

	2229 // Luminance in rows, color values in columns.

	2230 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +

	2231 src_argb[2] * rc) & 0x7F00u) + luma;

	2232 const uint8* luma1;

	2233 dst_argb[0] = luma0[src_argb[0]];

	2234 dst_argb[1] = luma0[src_argb[1]];

	2235 dst_argb[2] = luma0[src_argb[2]];

	2236 dst_argb[3] = src_argb[3];

	2237 luma1 = ((src_argb[4] * bc + src_argb[5] * gc +

	2238 src_argb[6] * rc) & 0x7F00u) + luma;

	2239 dst_argb[4] = luma1[src_argb[4]];

	2240 dst_argb[5] = luma1[src_argb[5]];

	2241 dst_argb[6] = luma1[src_argb[6]];

	2242 dst_argb[7] = src_argb[7];

	2243 src_argb += 8;

	2244 dst_argb += 8;

	2245 }

	2246 if (width & 1) {

	2247 // Luminance in rows, color values in columns.

	2248 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +

	2249 src_argb[2] * rc) & 0x7F00u) + luma;

	2250 dst_argb[0] = luma0[src_argb[0]];

	2251 dst_argb[1] = luma0[src_argb[1]];

	2252 dst_argb[2] = luma0[src_argb[2]];

	2253 dst_argb[3] = src_argb[3];

	2254 }

	2255 }

	2256

	2257 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {

	2258 int i;

	2259 for (i = 0; i < width - 1; i += 2) {

	2260 dst[3] = src[3];

	2261 dst[7] = src[7];

	2262 dst += 8;

	2263 src += 8;

	2264 }

	2265 if (width & 1) {

	2266 dst[3] = src[3];

	2267 }

	2268 }

	2269

	2270 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {

	2271 int i;

	2272 for (i = 0; i < width - 1; i += 2) {

	2273 dst[3] = src[0];

	2274 dst[7] = src[1];

	2275 dst += 8;

	2276 src += 2;

	2277 }

	2278 if (width & 1) {

	2279 dst[3] = src[0];

	2280 }

	2281 }

	2282

	2283 #ifdef __cplusplus

	2284 } // extern "C"

	2285 } // namespace libyuv

	2286 #endif

OLD	NEW

« no previous file with comments | « source/libvpx/third_party/libyuv/source/row_any.cc ('k') | source/libvpx/third_party/libyuv/source/row_mips.cc » ('j') | no next file with comments »