| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
| 6 | 6 |
| 7 #ifdef _DEBUG | 7 #ifdef _DEBUG |
| 8 #include "base/logging.h" | 8 #include "base/logging.h" |
| 9 #else | 9 #else |
| 10 #define DCHECK(a) | 10 #define DCHECK(a) |
| 11 #endif | 11 #endif |
| 12 | 12 |
| 13 extern "C" { | 13 extern "C" { |
| 14 #if USE_MMX | |
| 15 | 14 |
| 16 #define RGBY(i) { \ | 15 #if USE_SSE2 && defined(ARCH_CPU_X86_64) |
| 17 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | |
| 18 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | |
| 19 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ | |
| 20 0 \ | |
| 21 } | |
| 22 | |
| 23 #define RGBU(i) { \ | |
| 24 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ | |
| 25 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ | |
| 26 0, \ | |
| 27 static_cast<int16>(256 * 64 - 1) \ | |
| 28 } | |
| 29 | |
| 30 #define RGBV(i) { \ | |
| 31 0, \ | |
| 32 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ | |
| 33 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ | |
| 34 0 \ | |
| 35 } | |
| 36 | |
| 37 #define MMX_ALIGNED(var) var __attribute__((aligned(16))) | |
| 38 | |
| 39 | |
| 40 MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = { | |
| 41 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), | |
| 42 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), | |
| 43 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), | |
| 44 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), | |
| 45 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), | |
| 46 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), | |
| 47 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), | |
| 48 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), | |
| 49 RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), | |
| 50 RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), | |
| 51 RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B), | |
| 52 RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F), | |
| 53 RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33), | |
| 54 RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37), | |
| 55 RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B), | |
| 56 RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F), | |
| 57 RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43), | |
| 58 RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47), | |
| 59 RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B), | |
| 60 RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F), | |
| 61 RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53), | |
| 62 RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57), | |
| 63 RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B), | |
| 64 RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F), | |
| 65 RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63), | |
| 66 RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67), | |
| 67 RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B), | |
| 68 RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F), | |
| 69 RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73), | |
| 70 RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77), | |
| 71 RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B), | |
| 72 RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F), | |
| 73 RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83), | |
| 74 RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87), | |
| 75 RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B), | |
| 76 RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F), | |
| 77 RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93), | |
| 78 RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97), | |
| 79 RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B), | |
| 80 RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F), | |
| 81 RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3), | |
| 82 RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7), | |
| 83 RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB), | |
| 84 RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF), | |
| 85 RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3), | |
| 86 RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7), | |
| 87 RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB), | |
| 88 RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF), | |
| 89 RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3), | |
| 90 RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7), | |
| 91 RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB), | |
| 92 RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF), | |
| 93 RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3), | |
| 94 RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7), | |
| 95 RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB), | |
| 96 RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF), | |
| 97 RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), | |
| 98 RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), | |
| 99 RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), | |
| 100 RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), | |
| 101 RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), | |
| 102 RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), | |
| 103 RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), | |
| 104 RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), | |
| 105 | |
| 106 // Chroma U table. | |
| 107 RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), | |
| 108 RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), | |
| 109 RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), | |
| 110 RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), | |
| 111 RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), | |
| 112 RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), | |
| 113 RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), | |
| 114 RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), | |
| 115 RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), | |
| 116 RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), | |
| 117 RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B), | |
| 118 RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F), | |
| 119 RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33), | |
| 120 RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37), | |
| 121 RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B), | |
| 122 RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F), | |
| 123 RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43), | |
| 124 RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47), | |
| 125 RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B), | |
| 126 RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F), | |
| 127 RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53), | |
| 128 RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57), | |
| 129 RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B), | |
| 130 RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F), | |
| 131 RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63), | |
| 132 RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67), | |
| 133 RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B), | |
| 134 RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F), | |
| 135 RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73), | |
| 136 RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77), | |
| 137 RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B), | |
| 138 RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F), | |
| 139 RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83), | |
| 140 RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87), | |
| 141 RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B), | |
| 142 RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F), | |
| 143 RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93), | |
| 144 RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97), | |
| 145 RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B), | |
| 146 RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F), | |
| 147 RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3), | |
| 148 RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7), | |
| 149 RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB), | |
| 150 RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF), | |
| 151 RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3), | |
| 152 RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7), | |
| 153 RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB), | |
| 154 RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF), | |
| 155 RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3), | |
| 156 RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7), | |
| 157 RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB), | |
| 158 RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF), | |
| 159 RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3), | |
| 160 RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7), | |
| 161 RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB), | |
| 162 RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF), | |
| 163 RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), | |
| 164 RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), | |
| 165 RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), | |
| 166 RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), | |
| 167 RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), | |
| 168 RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), | |
| 169 RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), | |
| 170 RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), | |
| 171 | |
| 172 // Chroma V table. | |
| 173 RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), | |
| 174 RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), | |
| 175 RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), | |
| 176 RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), | |
| 177 RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), | |
| 178 RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), | |
| 179 RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), | |
| 180 RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), | |
| 181 RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), | |
| 182 RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), | |
| 183 RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B), | |
| 184 RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F), | |
| 185 RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33), | |
| 186 RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37), | |
| 187 RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B), | |
| 188 RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F), | |
| 189 RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43), | |
| 190 RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47), | |
| 191 RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B), | |
| 192 RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F), | |
| 193 RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53), | |
| 194 RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57), | |
| 195 RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B), | |
| 196 RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F), | |
| 197 RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63), | |
| 198 RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67), | |
| 199 RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B), | |
| 200 RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F), | |
| 201 RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73), | |
| 202 RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77), | |
| 203 RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B), | |
| 204 RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F), | |
| 205 RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83), | |
| 206 RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87), | |
| 207 RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B), | |
| 208 RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F), | |
| 209 RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93), | |
| 210 RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97), | |
| 211 RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B), | |
| 212 RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F), | |
| 213 RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3), | |
| 214 RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7), | |
| 215 RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB), | |
| 216 RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF), | |
| 217 RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3), | |
| 218 RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7), | |
| 219 RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB), | |
| 220 RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF), | |
| 221 RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3), | |
| 222 RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7), | |
| 223 RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB), | |
| 224 RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF), | |
| 225 RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3), | |
| 226 RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7), | |
| 227 RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB), | |
| 228 RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), | |
| 229 RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), | |
| 230 RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), | |
| 231 RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), | |
| 232 RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), | |
| 233 RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), | |
| 234 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), | |
| 235 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), | |
| 236 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), | |
| 237 }; | |
| 238 | |
| 239 #undef RGBY | |
| 240 #undef RGBU | |
| 241 #undef RGBV | |
| 242 #undef MMX_ALIGNED | |
| 243 | |
| 244 #if defined(ARCH_CPU_X86_64) | |
| 245 | 16 |
| 246 // AMD64 ABI uses register paremters. | 17 // AMD64 ABI uses register paremters. |
| 247 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi | 18 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi |
| 248 const uint8* u_buf, // rsi | 19 const uint8* u_buf, // rsi |
| 249 const uint8* v_buf, // rdx | 20 const uint8* v_buf, // rdx |
| 250 uint8* rgb_buf, // rcx | 21 uint8* rgb_buf, // rcx |
| 251 int width) { // r8 | 22 int source_width) { // r8 |
| 252 asm( | 23 asm( |
| 253 "jmp convertend\n" | 24 "jmp convertend\n" |
| 254 "convertloop:" | 25 "convertloop:" |
| 255 "movzb (%1),%%r10\n" | 26 "movzb (%1),%%r10\n" |
| 256 "add $0x1,%1\n" | 27 "add $0x1,%1\n" |
| 257 "movzb (%2),%%r11\n" | 28 "movzb (%2),%%r11\n" |
| 258 "add $0x1,%2\n" | 29 "add $0x1,%2\n" |
| 259 "movq 2048(%5,%%r10,8),%%xmm0\n" | 30 "movq 2048(%5,%%r10,8),%%xmm0\n" |
| 260 "movzb (%0),%%r10\n" | 31 "movzb (%0),%%r10\n" |
| 261 "movq 4096(%5,%%r11,8),%%xmm1\n" | 32 "movq 4096(%5,%%r11,8),%%xmm1\n" |
| (...skipping 27 matching lines...) Expand all Loading... |
| 289 "paddsw %%xmm0,%%xmm1\n" | 60 "paddsw %%xmm0,%%xmm1\n" |
| 290 "psraw $0x6,%%xmm1\n" | 61 "psraw $0x6,%%xmm1\n" |
| 291 "packuswb %%xmm1,%%xmm1\n" | 62 "packuswb %%xmm1,%%xmm1\n" |
| 292 "movd %%xmm1,0x0(%3)\n" | 63 "movd %%xmm1,0x0(%3)\n" |
| 293 "convertdone:" | 64 "convertdone:" |
| 294 : | 65 : |
| 295 : "r"(y_buf), // %0 | 66 : "r"(y_buf), // %0 |
| 296 "r"(u_buf), // %1 | 67 "r"(u_buf), // %1 |
| 297 "r"(v_buf), // %2 | 68 "r"(v_buf), // %2 |
| 298 "r"(rgb_buf), // %3 | 69 "r"(rgb_buf), // %3 |
| 299 "r"(width), // %4 | 70 "r"(source_width), // %4 |
| 300 "r" (kCoefficientsRgbY) // %5 | 71 "r" (kCoefficientsRgbY) // %5 |
| 301 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" | 72 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" |
| 302 ); | 73 ); |
| 303 } | 74 } |
| 304 | 75 |
| 305 void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi | 76 void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi |
| 306 const uint8* u_buf, // rsi | 77 const uint8* u_buf, // rsi |
| 307 const uint8* v_buf, // rdx | 78 const uint8* v_buf, // rdx |
| 308 uint8* rgb_buf, // rcx | 79 uint8* rgb_buf, // rcx |
| 309 int width, // r8 | 80 int source_width, // r8 |
| 310 int scaled_dx) { // r9 | 81 int source_dx) { // r9 |
| 311 asm( | 82 asm( |
| 312 "xor %%r11,%%r11\n" | 83 "xor %%r11,%%r11\n" |
| 313 "sub $0x2,%4\n" | 84 "sub $0x2,%4\n" |
| 314 "js scalenext\n" | 85 "js scalenext\n" |
| 315 | 86 |
| 316 "scaleloop:" | 87 "scaleloop:" |
| 317 "mov %%r11,%%r10\n" | 88 "mov %%r11,%%r10\n" |
| 318 "sar $0x11,%%r10\n" | 89 "sar $0x11,%%r10\n" |
| 319 "movzb (%1,%%r10,1),%%rax\n" | 90 "movzb (%1,%%r10,1),%%rax\n" |
| 320 "movq 2048(%5,%%rax,8),%%xmm0\n" | 91 "movq 2048(%5,%%rax,8),%%xmm0\n" |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 357 "psraw $0x6,%%xmm1\n" | 128 "psraw $0x6,%%xmm1\n" |
| 358 "packuswb %%xmm1,%%xmm1\n" | 129 "packuswb %%xmm1,%%xmm1\n" |
| 359 "movd %%xmm1,0x0(%3)\n" | 130 "movd %%xmm1,0x0(%3)\n" |
| 360 | 131 |
| 361 "scaledone:" | 132 "scaledone:" |
| 362 : | 133 : |
| 363 : "r"(y_buf), // %0 | 134 : "r"(y_buf), // %0 |
| 364 "r"(u_buf), // %1 | 135 "r"(u_buf), // %1 |
| 365 "r"(v_buf), // %2 | 136 "r"(v_buf), // %2 |
| 366 "r"(rgb_buf), // %3 | 137 "r"(rgb_buf), // %3 |
| 367 "r"(width), // %4 | 138 "r"(source_width), // %4 |
| 368 "r" (kCoefficientsRgbY), // %5 | 139 "r" (kCoefficientsRgbY), // %5 |
| 369 "r"(static_cast<long>(scaled_dx)) // %6 | 140 "r"(static_cast<long>(source_dx)) // %6 |
| 370 : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" | 141 : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" |
| 371 ); | 142 ); |
| 372 } | 143 } |
| 373 | 144 |
| 374 void LinearScaleYUVToRGB32Row(const uint8* y_buf, | 145 void LinearScaleYUVToRGB32Row(const uint8* y_buf, |
| 375 const uint8* u_buf, | 146 const uint8* u_buf, |
| 376 const uint8* v_buf, | 147 const uint8* v_buf, |
| 377 uint8* rgb_buf, | 148 uint8* rgb_buf, |
| 378 int width, | 149 int source_width, |
| 379 int scaled_dx) { | 150 int source_dx) { |
| 380 asm( | 151 asm( |
| 381 "xor %%r11,%%r11\n" | 152 "xor %%r11,%%r11\n" |
| 382 "sub $0x2,%4\n" | 153 "sub $0x2,%4\n" |
| 383 "js .lscalenext\n" | 154 "js .lscalenext\n" |
| 384 | 155 |
| 385 ".lscaleloop:" | 156 ".lscaleloop:" |
| 386 "mov %%r11,%%r10\n" | 157 "mov %%r11,%%r10\n" |
| 387 "sar $0x11,%%r10\n" | 158 "sar $0x11,%%r10\n" |
| 388 | 159 |
| 389 "movzb (%1, %%r10, 1), %%r13 \n" | 160 "movzb (%1, %%r10, 1), %%r13 \n" |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 470 "psraw $0x6,%%xmm1\n" | 241 "psraw $0x6,%%xmm1\n" |
| 471 "packuswb %%xmm1,%%xmm1\n" | 242 "packuswb %%xmm1,%%xmm1\n" |
| 472 "movd %%xmm1,0x0(%3)\n" | 243 "movd %%xmm1,0x0(%3)\n" |
| 473 | 244 |
| 474 ".lscaledone:" | 245 ".lscaledone:" |
| 475 : | 246 : |
| 476 : "r"(y_buf), // %0 | 247 : "r"(y_buf), // %0 |
| 477 "r"(u_buf), // %1 | 248 "r"(u_buf), // %1 |
| 478 "r"(v_buf), // %2 | 249 "r"(v_buf), // %2 |
| 479 "r"(rgb_buf), // %3 | 250 "r"(rgb_buf), // %3 |
| 480 "r"(width), // %4 | 251 "r"(source_width), // %4 |
| 481 "r" (kCoefficientsRgbY), // %5 | 252 "r" (kCoefficientsRgbY), // %5 |
| 482 "r"(static_cast<long>(scaled_dx)) // %6 | 253 "r"(static_cast<long>(source_dx)) // %6 |
| 483 : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" | 254 : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" |
| 484 ); | 255 ); |
| 485 } | 256 } |
| 486 | 257 |
| 487 #else // !AMD64 | 258 #elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__) |
| 488 | 259 |
| 489 // PIC version is slower because less registers are available, so | 260 // PIC version is slower because less registers are available, so |
| 490 // non-PIC is used on platforms where it is possible. | 261 // non-PIC is used on platforms where it is possible. |
| 491 | 262 |
| 492 #if !defined(__PIC__) | |
| 493 | |
| 494 void FastConvertYUVToRGB32Row(const uint8* y_buf, | 263 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 495 const uint8* u_buf, | 264 const uint8* u_buf, |
| 496 const uint8* v_buf, | 265 const uint8* v_buf, |
| 497 uint8* rgb_buf, | 266 uint8* rgb_buf, |
| 498 int width); | 267 int source_width); |
| 499 | 268 |
| 500 asm( | 269 asm( |
| 501 ".global FastConvertYUVToRGB32Row\n" | 270 ".global FastConvertYUVToRGB32Row\n" |
| 502 "FastConvertYUVToRGB32Row:\n" | 271 "FastConvertYUVToRGB32Row:\n" |
| 503 "pusha\n" | 272 "pusha\n" |
| 504 "mov 0x24(%esp),%edx\n" | 273 "mov 0x24(%esp),%edx\n" |
| 505 "mov 0x28(%esp),%edi\n" | 274 "mov 0x28(%esp),%edi\n" |
| 506 "mov 0x2c(%esp),%esi\n" | 275 "mov 0x2c(%esp),%esi\n" |
| 507 "mov 0x30(%esp),%ebp\n" | 276 "mov 0x30(%esp),%ebp\n" |
| 508 "mov 0x34(%esp),%ecx\n" | 277 "mov 0x34(%esp),%ecx\n" |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 547 "convertdone:" | 316 "convertdone:" |
| 548 "popa\n" | 317 "popa\n" |
| 549 "ret\n" | 318 "ret\n" |
| 550 ); | 319 ); |
| 551 | 320 |
| 552 | 321 |
| 553 void ScaleYUVToRGB32Row(const uint8* y_buf, | 322 void ScaleYUVToRGB32Row(const uint8* y_buf, |
| 554 const uint8* u_buf, | 323 const uint8* u_buf, |
| 555 const uint8* v_buf, | 324 const uint8* v_buf, |
| 556 uint8* rgb_buf, | 325 uint8* rgb_buf, |
| 557 int width, | 326 int source_width, |
| 558 int scaled_dx); | 327 int source_dx); |
| 559 | 328 |
| 560 asm( | 329 asm( |
| 561 ".global ScaleYUVToRGB32Row\n" | 330 ".global ScaleYUVToRGB32Row\n" |
| 562 "ScaleYUVToRGB32Row:\n" | 331 "ScaleYUVToRGB32Row:\n" |
| 563 "pusha\n" | 332 "pusha\n" |
| 564 "mov 0x24(%esp),%edx\n" | 333 "mov 0x24(%esp),%edx\n" |
| 565 "mov 0x28(%esp),%edi\n" | 334 "mov 0x28(%esp),%edi\n" |
| 566 "mov 0x2c(%esp),%esi\n" | 335 "mov 0x2c(%esp),%esi\n" |
| 567 "mov 0x30(%esp),%ebp\n" | 336 "mov 0x30(%esp),%ebp\n" |
| 568 "mov 0x34(%esp),%ecx\n" | 337 "mov 0x34(%esp),%ecx\n" |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 621 | 390 |
| 622 "scaledone:" | 391 "scaledone:" |
| 623 "popa\n" | 392 "popa\n" |
| 624 "ret\n" | 393 "ret\n" |
| 625 ); | 394 ); |
| 626 | 395 |
| 627 void LinearScaleYUVToRGB32Row(const uint8* y_buf, | 396 void LinearScaleYUVToRGB32Row(const uint8* y_buf, |
| 628 const uint8* u_buf, | 397 const uint8* u_buf, |
| 629 const uint8* v_buf, | 398 const uint8* v_buf, |
| 630 uint8* rgb_buf, | 399 uint8* rgb_buf, |
| 631 int width, | 400 int source_width, |
| 632 int scaled_dx); | 401 int source_dx); |
| 633 | 402 |
| 634 asm( | 403 asm( |
| 635 ".global LinearScaleYUVToRGB32Row\n" | 404 ".global LinearScaleYUVToRGB32Row\n" |
| 636 "LinearScaleYUVToRGB32Row:\n" | 405 "LinearScaleYUVToRGB32Row:\n" |
| 637 "pusha\n" | 406 "pusha\n" |
| 638 "mov 0x24(%esp),%edx\n" | 407 "mov 0x24(%esp),%edx\n" |
| 639 "mov 0x28(%esp),%edi\n" | 408 "mov 0x28(%esp),%edi\n" |
| 640 "mov 0x30(%esp),%ebp\n" | 409 "mov 0x30(%esp),%ebp\n" |
| 641 "xor %ebx,%ebx\n" | 410 "xor %ebx,%ebx\n" |
| 642 | 411 |
| 643 // width = width * scaled_dx + ebx | 412 // source_width = source_width * source_dx + ebx |
| 644 "mov 0x34(%esp), %ecx\n" | 413 "mov 0x34(%esp), %ecx\n" |
| 645 "imull 0x38(%esp), %ecx\n" | 414 "imull 0x38(%esp), %ecx\n" |
| 646 "addl %ebx, %ecx\n" | 415 "addl %ebx, %ecx\n" |
| 647 "mov %ecx, 0x34(%esp)\n" | 416 "mov %ecx, 0x34(%esp)\n" |
| 648 | 417 |
| 649 "jmp .lscaleend\n" | 418 "jmp .lscaleend\n" |
| 650 | 419 |
| 651 ".lscaleloop:" | 420 ".lscaleloop:" |
| 652 "mov %ebx,%eax\n" | 421 "mov %ebx,%eax\n" |
| 653 "sar $0x11,%eax\n" | 422 "sar $0x11,%eax\n" |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 725 | 494 |
| 726 ".lscalelastpixel:" | 495 ".lscalelastpixel:" |
| 727 "paddsw %mm0, %mm1\n" | 496 "paddsw %mm0, %mm1\n" |
| 728 "psraw $6, %mm1\n" | 497 "psraw $6, %mm1\n" |
| 729 "packuswb %mm1, %mm1\n" | 498 "packuswb %mm1, %mm1\n" |
| 730 "movd %mm1, (%ebp)\n" | 499 "movd %mm1, (%ebp)\n" |
| 731 "popa\n" | 500 "popa\n" |
| 732 "ret\n" | 501 "ret\n" |
| 733 ); | 502 ); |
| 734 | 503 |
| 735 #else // __PIC__ | 504 #elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__) |
| 736 | 505 |
| 737 extern void PICConvertYUVToRGB32Row(const uint8* y_buf, | 506 extern void PICConvertYUVToRGB32Row(const uint8* y_buf, |
| 738 const uint8* u_buf, | 507 const uint8* u_buf, |
| 739 const uint8* v_buf, | 508 const uint8* v_buf, |
| 740 uint8* rgb_buf, | 509 uint8* rgb_buf, |
| 741 int width, | 510 int source_width, |
| 742 int16 *kCoefficientsRgbY); | 511 int16 *kCoefficientsRgbY); |
| 743 __asm__( | 512 __asm__( |
| 744 "_PICConvertYUVToRGB32Row:\n" | 513 "_PICConvertYUVToRGB32Row:\n" |
| 745 "pusha\n" | 514 "pusha\n" |
| 746 "mov 0x24(%esp),%edx\n" | 515 "mov 0x24(%esp),%edx\n" |
| 747 "mov 0x28(%esp),%edi\n" | 516 "mov 0x28(%esp),%edi\n" |
| 748 "mov 0x2c(%esp),%esi\n" | 517 "mov 0x2c(%esp),%esi\n" |
| 749 "mov 0x30(%esp),%ebp\n" | 518 "mov 0x30(%esp),%ebp\n" |
| 750 "mov 0x38(%esp),%ecx\n" | 519 "mov 0x38(%esp),%ecx\n" |
| 751 | 520 |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 789 "movd %mm1,0x0(%ebp)\n" | 558 "movd %mm1,0x0(%ebp)\n" |
| 790 ".Lconvertdone:\n" | 559 ".Lconvertdone:\n" |
| 791 "popa\n" | 560 "popa\n" |
| 792 "ret\n" | 561 "ret\n" |
| 793 ); | 562 ); |
| 794 | 563 |
| 795 void FastConvertYUVToRGB32Row(const uint8* y_buf, | 564 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 796 const uint8* u_buf, | 565 const uint8* u_buf, |
| 797 const uint8* v_buf, | 566 const uint8* v_buf, |
| 798 uint8* rgb_buf, | 567 uint8* rgb_buf, |
| 799 int width) { | 568 int source_width) { |
| 800 PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, | 569 PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width, |
| 801 &kCoefficientsRgbY[0][0]); | 570 &kCoefficientsRgbY[0][0]); |
| 802 } | 571 } |
| 803 | 572 |
| 804 extern void PICScaleYUVToRGB32Row(const uint8* y_buf, | 573 extern void PICScaleYUVToRGB32Row(const uint8* y_buf, |
| 805 const uint8* u_buf, | 574 const uint8* u_buf, |
| 806 const uint8* v_buf, | 575 const uint8* v_buf, |
| 807 uint8* rgb_buf, | 576 uint8* rgb_buf, |
| 808 int width, | 577 int source_width, |
| 809 int scaled_dx, | 578 int source_dx, |
| 810 int16 *kCoefficientsRgbY); | 579 int16 *kCoefficientsRgbY); |
| 811 | 580 |
| 812 __asm__( | 581 __asm__( |
| 813 "_PICScaleYUVToRGB32Row:\n" | 582 "_PICScaleYUVToRGB32Row:\n" |
| 814 "pusha\n" | 583 "pusha\n" |
| 815 "mov 0x24(%esp),%edx\n" | 584 "mov 0x24(%esp),%edx\n" |
| 816 "mov 0x28(%esp),%edi\n" | 585 "mov 0x28(%esp),%edi\n" |
| 817 "mov 0x2c(%esp),%esi\n" | 586 "mov 0x2c(%esp),%esi\n" |
| 818 "mov 0x30(%esp),%ebp\n" | 587 "mov 0x30(%esp),%ebp\n" |
| 819 "mov 0x3c(%esp),%ecx\n" | 588 "mov 0x3c(%esp),%ecx\n" |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 873 "Lscaledone:" | 642 "Lscaledone:" |
| 874 "popa\n" | 643 "popa\n" |
| 875 "ret\n" | 644 "ret\n" |
| 876 ); | 645 ); |
| 877 | 646 |
| 878 | 647 |
| 879 void ScaleYUVToRGB32Row(const uint8* y_buf, | 648 void ScaleYUVToRGB32Row(const uint8* y_buf, |
| 880 const uint8* u_buf, | 649 const uint8* u_buf, |
| 881 const uint8* v_buf, | 650 const uint8* v_buf, |
| 882 uint8* rgb_buf, | 651 uint8* rgb_buf, |
| 883 int width, | 652 int source_width, |
| 884 int scaled_dx) { | 653 int source_dx) { |
| 885 PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, | 654 PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width, source_dx, |
| 886 &kCoefficientsRgbY[0][0]); | 655 &kCoefficientsRgbY[0][0]); |
| 887 } | 656 } |
| 888 | 657 |
| 889 void PICLinearScaleYUVToRGB32Row(const uint8* y_buf, | 658 void PICLinearScaleYUVToRGB32Row(const uint8* y_buf, |
| 890 const uint8* u_buf, | 659 const uint8* u_buf, |
| 891 const uint8* v_buf, | 660 const uint8* v_buf, |
| 892 uint8* rgb_buf, | 661 uint8* rgb_buf, |
| 893 int width, | 662 int source_width, |
| 894 int scaled_dx, | 663 int source_dx, |
| 895 int16 *kCoefficientsRgbY); | 664 int16 *kCoefficientsRgbY); |
| 896 | 665 |
| 897 asm( | 666 asm( |
| 898 "_PICLinearScaleYUVToRGB32Row:\n" | 667 "_PICLinearScaleYUVToRGB32Row:\n" |
| 899 "pusha\n" | 668 "pusha\n" |
| 900 "mov 0x24(%esp),%edx\n" | 669 "mov 0x24(%esp),%edx\n" |
| 901 "mov 0x30(%esp),%ebp\n" | 670 "mov 0x30(%esp),%ebp\n" |
| 902 "mov 0x34(%esp),%ecx\n" | 671 "mov 0x34(%esp),%ecx\n" |
| 903 "mov 0x3c(%esp),%edi\n" | 672 "mov 0x3c(%esp),%edi\n" |
| 904 "xor %ebx,%ebx\n" | 673 "xor %ebx,%ebx\n" |
| 905 | 674 |
| 906 // width = width * scaled_dx + ebx | 675 // source_width = source_width * source_dx + ebx |
| 907 "mov 0x34(%esp), %ecx\n" | 676 "mov 0x34(%esp), %ecx\n" |
| 908 "imull 0x38(%esp), %ecx\n" | 677 "imull 0x38(%esp), %ecx\n" |
| 909 "addl %ebx, %ecx\n" | 678 "addl %ebx, %ecx\n" |
| 910 "mov %ecx, 0x34(%esp)\n" | 679 "mov %ecx, 0x34(%esp)\n" |
| 911 | 680 |
| 912 "jmp .lscaleend\n" | 681 "jmp .lscaleend\n" |
| 913 | 682 |
| 914 ".lscaleloop:" | 683 ".lscaleloop:" |
| 915 "mov 0x28(%esp),%esi\n" | 684 "mov 0x28(%esp),%esi\n" |
| 916 "mov %ebx,%eax\n" | 685 "mov %ebx,%eax\n" |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 993 "packuswb %mm1, %mm1\n" | 762 "packuswb %mm1, %mm1\n" |
| 994 "movd %mm1, (%ebp)\n" | 763 "movd %mm1, (%ebp)\n" |
| 995 "popa\n" | 764 "popa\n" |
| 996 "ret\n" | 765 "ret\n" |
| 997 ); | 766 ); |
| 998 | 767 |
| 999 void LinearScaleYUVToRGB32Row(const uint8* y_buf, | 768 void LinearScaleYUVToRGB32Row(const uint8* y_buf, |
| 1000 const uint8* u_buf, | 769 const uint8* u_buf, |
| 1001 const uint8* v_buf, | 770 const uint8* v_buf, |
| 1002 uint8* rgb_buf, | 771 uint8* rgb_buf, |
| 1003 int width, | 772 int source_width, |
| 1004 int scaled_dx) { | 773 int source_dx) { |
| 1005 PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, | 774 PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, source_width, source
_dx, |
| 1006 &kCoefficientsRgbY[0][0]); | 775 &kCoefficientsRgbY[0][0]); |
| 1007 } | 776 } |
| 1008 | 777 |
| 1009 #endif // !__PIC__ | 778 #else // Use C code instead of MMX/SSE2. |
| 1010 | 779 |
| 1011 #endif // !AMD64 | 780 // C reference code that mimic the YUV assembly. |
| 1012 | 781 #define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) |
| 1013 #else // USE_MMX | 782 #define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \\ |
| 1014 | 783 (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) |
| 1015 // Reference version of YUV converter. | |
| 1016 static const int kClipTableSize = 256; | |
| 1017 static const int kClipOverflow = 288; // Cb max is 535. | |
| 1018 | |
| 1019 static uint8 kRgbClipTable[kClipOverflow + | |
| 1020 kClipTableSize + | |
| 1021 kClipOverflow] = { | |
| 1022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values | |
| 1023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0. | |
| 1024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1036 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1037 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1038 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1039 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1041 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1042 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1043 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1044 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1045 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1046 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1047 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1048 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1049 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1050 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1051 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1052 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1053 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1054 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1055 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1056 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1057 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
| 1058 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Unclipped values. | |
| 1059 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, | |
| 1060 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, | |
| 1061 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, | |
| 1062 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, | |
| 1063 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, | |
| 1064 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, | |
| 1065 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, | |
| 1066 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, | |
| 1067 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, | |
| 1068 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, | |
| 1069 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, | |
| 1070 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, | |
| 1071 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, | |
| 1072 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, | |
| 1073 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, | |
| 1074 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, | |
| 1075 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, | |
| 1076 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, | |
| 1077 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, | |
| 1078 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, | |
| 1079 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, | |
| 1080 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, | |
| 1081 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, | |
| 1082 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, | |
| 1083 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, | |
| 1084 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, | |
| 1085 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | |
| 1086 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, | |
| 1087 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, | |
| 1088 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, | |
| 1089 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, | |
| 1090 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 288 overflow values | |
| 1091 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // clipped to 255. | |
| 1092 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1093 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1094 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1095 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1096 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1097 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1098 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1099 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1100 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1101 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1102 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1103 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1104 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1105 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1106 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1107 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1108 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1109 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1110 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1111 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1112 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1113 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1114 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1115 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1116 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1117 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1118 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1119 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1120 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1121 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1122 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1123 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1124 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1125 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
| 1126 }; | |
| 1127 | |
| 1128 // Clip an rgb channel value to 0..255 range. | |
| 1129 // Source is signed fixed point 8.8. | |
| 1130 // Table allows for values to underflow or overflow by 128. | |
| 1131 // Therefore source range is -128 to 384. | |
| 1132 // Output clips to unsigned 0 to 255. | |
| 1133 static inline uint32 clip(int32 value) { | |
| 1134 DCHECK(((value >> 8) + kClipOverflow) >= 0); | |
| 1135 DCHECK(((value >> 8) + kClipOverflow) < | |
| 1136 (kClipOverflow + kClipTableSize + kClipOverflow)); | |
| 1137 return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]); | |
| 1138 } | |
| 1139 | 784 |
| 1140 static inline void YuvPixel(uint8 y, | 785 static inline void YuvPixel(uint8 y, |
| 1141 uint8 u, | 786 uint8 u, |
| 1142 uint8 v, | 787 uint8 v, |
| 1143 uint8* rgb_buf) { | 788 uint8* rgb_buf) { |
| 1144 int32 d = static_cast<int32>(u) - 128; | |
| 1145 int32 e = static_cast<int32>(v) - 128; | |
| 1146 | 789 |
| 1147 int32 cb = (516 * d + 128); | 790 int b = kCoefficientsRgbY[256+u][0]; |
| 1148 int32 cg = (- 100 * d - 208 * e + 128); | 791 int g = kCoefficientsRgbY[256+u][1]; |
| 1149 int32 cr = (409 * e + 128); | 792 int r = kCoefficientsRgbY[256+u][2]; |
| 793 int a = kCoefficientsRgbY[256+u][3]; |
| 1150 | 794 |
| 1151 int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128); | 795 b = paddsw(b, kCoefficientsRgbY[512+v][0]); |
| 1152 *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) | | 796 g = paddsw(g, kCoefficientsRgbY[512+v][1]); |
| 1153 (clip(C298a + cg) << 8) | | 797 r = paddsw(r, kCoefficientsRgbY[512+v][2]); |
| 1154 (clip(C298a + cr) << 16) | | 798 a = paddsw(a, kCoefficientsRgbY[512+v][3]); |
| 1155 (0xff000000); | 799 |
| 800 b = paddsw(b, kCoefficientsRgbY[y][0]); |
| 801 g = paddsw(g, kCoefficientsRgbY[y][1]); |
| 802 r = paddsw(r, kCoefficientsRgbY[y][2]); |
| 803 a = paddsw(a, kCoefficientsRgbY[y][3]); |
| 804 |
| 805 b >>= 6; |
| 806 g >>= 6; |
| 807 r >>= 6; |
| 808 a >>= 6; |
| 809 |
| 810 *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | |
| 811 (packuswb(g) << 8) | |
| 812 (packuswb(r) << 16) | |
| 813 (packuswb(a) << 24); |
| 1156 } | 814 } |
| 1157 | 815 |
| 816 #if TEST_MMX_YUV |
| 817 static inline void YuvPixel(uint8 y, |
| 818 uint8 u, |
| 819 uint8 v, |
| 820 uint8* rgb_buf) { |
| 821 |
| 822 __asm { |
| 823 movzx eax, u |
| 824 movq mm0, [kCoefficientsRgbY+2048 + 8 * eax] |
| 825 movzx eax, v |
| 826 paddsw mm0, [kCoefficientsRgbY+4096 + 8 * eax] |
| 827 movzx eax, y |
| 828 movq mm1, [kCoefficientsRgbY + 8 * eax] |
| 829 paddsw mm1, mm0 |
| 830 psraw mm1, 6 |
| 831 packuswb mm1, mm1 |
| 832 mov eax, rgb_buf |
| 833 movd [eax], mm1 |
| 834 emms |
| 835 } |
| 836 } |
| 837 #endif |
| 838 |
| 1158 void FastConvertYUVToRGB32Row(const uint8* y_buf, | 839 void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 1159 const uint8* u_buf, | 840 const uint8* u_buf, |
| 1160 const uint8* v_buf, | 841 const uint8* v_buf, |
| 1161 uint8* rgb_buf, | 842 uint8* rgb_buf, |
| 1162 int width) { | 843 int source_width) { |
| 1163 for (int x = 0; x < width; x += 2) { | 844 for (int x = 0; x < source_width; x += 2) { |
| 1164 uint8 u = u_buf[x >> 1]; | 845 uint8 u = u_buf[x >> 1]; |
| 1165 uint8 v = v_buf[x >> 1]; | 846 uint8 v = v_buf[x >> 1]; |
| 1166 uint8 y0 = y_buf[x]; | 847 uint8 y0 = y_buf[x]; |
| 1167 YuvPixel(y0, u, v, rgb_buf); | 848 YuvPixel(y0, u, v, rgb_buf); |
| 1168 if ((x + 1) < width) { | 849 if ((x + 1) < source_width) { |
| 1169 uint8 y1 = y_buf[x + 1]; | 850 uint8 y1 = y_buf[x + 1]; |
| 1170 YuvPixel(y1, u, v, rgb_buf + 4); | 851 YuvPixel(y1, u, v, rgb_buf + 4); |
| 1171 } | 852 } |
| 1172 rgb_buf += 8; // Advance 2 pixels. | 853 rgb_buf += 8; // Advance 2 pixels. |
| 1173 } | 854 } |
| 1174 } | 855 } |
| 1175 | 856 |
| 1176 // 16.16 fixed point is used. A shift by 16 isolates the integer. | 857 // 16.16 fixed point is used. A shift by 16 isolates the integer. |
| 1177 // A shift by 17 is used to further subsample the chrominence channels. | 858 // A shift by 17 is used to further subsample the chrominence channels. |
| 1178 // & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, | 859 // & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, |
| 1179 // for 1/65536 pixel accurate interpolation. | 860 // for 1/65536 pixel accurate interpolation. |
| 1180 void ScaleYUVToRGB32Row(const uint8* y_buf, | 861 void ScaleYUVToRGB32Row(const uint8* y_buf, |
| 1181 const uint8* u_buf, | 862 const uint8* u_buf, |
| 1182 const uint8* v_buf, | 863 const uint8* v_buf, |
| 1183 uint8* rgb_buf, | 864 uint8* rgb_buf, |
| 1184 int width, | 865 int source_width, |
| 1185 int scaled_dx) { | 866 int dx) { |
| 1186 int scaled_x = 0; | 867 int x = 0; |
| 1187 for (int x = 0; x < width; ++x) { | 868 for (int i = 0; i < source_width; i += 2) { |
| 1188 uint8 u = u_buf[scaled_x >> 17]; | 869 int y = y_buf[x >> 16]; |
| 1189 uint8 v = v_buf[scaled_x >> 17]; | 870 int u = u_buf[(x >> 17)]; |
| 1190 uint8 y0 = y_buf[scaled_x >> 16]; | 871 int v = v_buf[(x >> 17)]; |
| 1191 YuvPixel(y0, u, v, rgb_buf); | 872 YuvPixel(y, u, v, rgb_buf); |
| 1192 rgb_buf += 4; | 873 x += dx; |
| 1193 scaled_x += scaled_dx; | 874 if ((i + 1) < source_width) { |
| 875 y = y_buf[x >> 16]; |
| 876 YuvPixel(y, u, v, rgb_buf+4); |
| 877 x += dx; |
| 878 } |
| 879 rgb_buf += 8; |
| 1194 } | 880 } |
| 1195 } | 881 } |
| 1196 | 882 |
| 1197 void LinearScaleYUVToRGB32Row(const uint8* y_buf, | 883 void LinearScaleYUVToRGB32Row(const uint8* y_buf, |
| 1198 const uint8* u_buf, | 884 const uint8* u_buf, |
| 1199 const uint8* v_buf, | 885 const uint8* v_buf, |
| 1200 uint8* rgb_buf, | 886 uint8* rgb_buf, |
| 1201 int width, | 887 int source_width, |
| 1202 int dx) { | 888 int dx) { |
| 1203 for (int x = 0; x < width * dx; x += dx) { | 889 int x = 0; |
| 890 for (int i = 0; i < source_width; i += 2) { |
| 1204 int y0 = y_buf[x >> 16]; | 891 int y0 = y_buf[x >> 16]; |
| 1205 int y1 = y_buf[(x >> 16) + 1]; | 892 int y1 = y_buf[(x >> 16) + 1]; |
| 1206 int u0 = u_buf[(x >> 17)]; | 893 int u0 = u_buf[(x >> 17)]; |
| 1207 int u1 = u_buf[(x >> 17) + 1]; | 894 int u1 = u_buf[(x >> 17) + 1]; |
| 1208 int v0 = v_buf[(x >> 17)]; | 895 int v0 = v_buf[(x >> 17)]; |
| 1209 int v1 = v_buf[(x >> 17) + 1]; | 896 int v1 = v_buf[(x >> 17) + 1]; |
| 1210 int y = ((x & 65535) * y1 + ((x & 65535) ^ 65535) * y0) >> 16; | 897 int y_frac = (x & 65535); |
| 1211 int u = ((x & 65535) * u1 + ((x & 65535) ^ 65535) * u0) >> 16; | 898 int uv_frac = ((x >> 1) & 65535); |
| 1212 int v = ((x & 65535) * v1 + ((x & 65535) ^ 65535) * v0) >> 16; | 899 int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; |
| 900 int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16; |
| 901 int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16; |
| 1213 YuvPixel(y, u, v, rgb_buf); | 902 YuvPixel(y, u, v, rgb_buf); |
| 1214 rgb_buf += 4; | 903 x += dx; |
| 904 if ((i + 1) < source_width) { |
| 905 y0 = y_buf[x >> 16]; |
| 906 y1 = y_buf[(x >> 16) + 1]; |
| 907 y_frac = (x & 65535); |
| 908 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; |
| 909 YuvPixel(y, u, v, rgb_buf+4); |
| 910 x += dx; |
| 911 } |
| 912 rgb_buf += 8; |
| 1215 } | 913 } |
| 1216 } | 914 } |
| 1217 | 915 |
| 1218 #endif // USE_MMX | 916 #endif // USE_MMX |
| 1219 } // extern "C" | 917 } // extern "C" |
| 918 |
| OLD | NEW |