| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
| 6 | 6 |
| 7 // Enable bilinear filtering by turning on the following macro. | |
| 8 // #define MEDIA_BILINEAR_FILTER 1 | |
| 9 | |
| 10 #ifdef _DEBUG | 7 #ifdef _DEBUG |
| 11 #include "base/logging.h" | 8 #include "base/logging.h" |
| 12 #else | 9 #else |
| 13 #define DCHECK(a) | 10 #define DCHECK(a) |
| 14 #endif | 11 #endif |
| 15 | 12 |
| 16 namespace media { | 13 // TODO(fbarchard): Make MMX work in DLLs. Currently only works in unittests. |
| 14 // TODO(fbarchard): Do 64 bit version. |
| 15 |
| 16 extern "C" { |
| 17 #if USE_MMX |
| 18 |
| 19 #define RGBY(i) { \ |
| 20 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
| 21 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
| 22 static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ |
| 23 0 \ |
| 24 } |
| 25 |
| 26 #define RGBU(i) { \ |
| 27 static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ |
| 28 static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ |
| 29 0, \ |
| 30 static_cast<int16>(256 * 64 - 1) \ |
| 31 } |
| 32 |
| 33 #define RGBV(i) { \ |
| 34 0, \ |
| 35 static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ |
| 36 static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ |
| 37 0 \ |
| 38 } |
| 39 |
| 40 #define MMX_ALIGNED(var) var __attribute__((aligned(16))) |
| 41 |
| 42 MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = { |
| 43 RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), |
| 44 RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), |
| 45 RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), |
| 46 RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), |
| 47 RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), |
| 48 RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), |
| 49 RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), |
| 50 RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), |
| 51 RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), |
| 52 RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), |
| 53 RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B), |
| 54 RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F), |
| 55 RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33), |
| 56 RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37), |
| 57 RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B), |
| 58 RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F), |
| 59 RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43), |
| 60 RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47), |
| 61 RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B), |
| 62 RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F), |
| 63 RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53), |
| 64 RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57), |
| 65 RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B), |
| 66 RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F), |
| 67 RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63), |
| 68 RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67), |
| 69 RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B), |
| 70 RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F), |
| 71 RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73), |
| 72 RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77), |
| 73 RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B), |
| 74 RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F), |
| 75 RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83), |
| 76 RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87), |
| 77 RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B), |
| 78 RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F), |
| 79 RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93), |
| 80 RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97), |
| 81 RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B), |
| 82 RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F), |
| 83 RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3), |
| 84 RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7), |
| 85 RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB), |
| 86 RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF), |
| 87 RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3), |
| 88 RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7), |
| 89 RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB), |
| 90 RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF), |
| 91 RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3), |
| 92 RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7), |
| 93 RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB), |
| 94 RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF), |
| 95 RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3), |
| 96 RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7), |
| 97 RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB), |
| 98 RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF), |
| 99 RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), |
| 100 RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), |
| 101 RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), |
| 102 RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), |
| 103 RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), |
| 104 RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), |
| 105 RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), |
| 106 RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), |
| 107 }; |
| 108 |
| 109 MMX_ALIGNED(int16 kCoefficientsRgbU[256][4]) = { |
| 110 RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), |
| 111 RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), |
| 112 RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), |
| 113 RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), |
| 114 RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), |
| 115 RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), |
| 116 RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), |
| 117 RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), |
| 118 RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), |
| 119 RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), |
| 120 RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B), |
| 121 RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F), |
| 122 RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33), |
| 123 RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37), |
| 124 RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B), |
| 125 RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F), |
| 126 RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43), |
| 127 RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47), |
| 128 RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B), |
| 129 RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F), |
| 130 RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53), |
| 131 RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57), |
| 132 RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B), |
| 133 RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F), |
| 134 RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63), |
| 135 RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67), |
| 136 RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B), |
| 137 RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F), |
| 138 RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73), |
| 139 RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77), |
| 140 RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B), |
| 141 RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F), |
| 142 RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83), |
| 143 RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87), |
| 144 RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B), |
| 145 RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F), |
| 146 RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93), |
| 147 RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97), |
| 148 RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B), |
| 149 RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F), |
| 150 RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3), |
| 151 RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7), |
| 152 RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB), |
| 153 RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF), |
| 154 RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3), |
| 155 RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7), |
| 156 RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB), |
| 157 RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF), |
| 158 RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3), |
| 159 RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7), |
| 160 RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB), |
| 161 RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF), |
| 162 RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3), |
| 163 RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7), |
| 164 RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB), |
| 165 RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF), |
| 166 RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), |
| 167 RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), |
| 168 RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), |
| 169 RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), |
| 170 RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), |
| 171 RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), |
| 172 RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), |
| 173 RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), |
| 174 }; |
| 175 |
| 176 MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = { |
| 177 RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), |
| 178 RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), |
| 179 RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), |
| 180 RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), |
| 181 RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), |
| 182 RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), |
| 183 RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), |
| 184 RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), |
| 185 RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), |
| 186 RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), |
| 187 RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B), |
| 188 RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F), |
| 189 RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33), |
| 190 RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37), |
| 191 RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B), |
| 192 RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F), |
| 193 RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43), |
| 194 RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47), |
| 195 RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B), |
| 196 RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F), |
| 197 RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53), |
| 198 RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57), |
| 199 RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B), |
| 200 RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F), |
| 201 RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63), |
| 202 RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67), |
| 203 RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B), |
| 204 RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F), |
| 205 RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73), |
| 206 RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77), |
| 207 RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B), |
| 208 RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F), |
| 209 RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83), |
| 210 RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87), |
| 211 RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B), |
| 212 RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F), |
| 213 RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93), |
| 214 RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97), |
| 215 RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B), |
| 216 RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F), |
| 217 RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3), |
| 218 RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7), |
| 219 RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB), |
| 220 RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF), |
| 221 RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3), |
| 222 RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7), |
| 223 RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB), |
| 224 RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF), |
| 225 RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3), |
| 226 RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7), |
| 227 RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB), |
| 228 RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF), |
| 229 RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3), |
| 230 RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7), |
| 231 RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB), |
| 232 RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), |
| 233 RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), |
| 234 RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), |
| 235 RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), |
| 236 RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), |
| 237 RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), |
| 238 RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), |
| 239 RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), |
| 240 RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), |
| 241 }; |
| 242 |
| 243 #undef RGBY |
| 244 #undef RGBU |
| 245 #undef RGBV |
| 246 #undef MMX_ALIGNED |
| 247 |
| 248 // TODO(fbarchard): Use the following function instead of |
| 249 // pure assembly to help make code more portable to 64 bit |
| 250 // and Mac, which has different labels. |
| 251 // no-gcse eliminates the frame pointer, freeing up ebp. |
| 252 |
| 253 #if defined(FUTURE_64BIT_VERSION) |
| 254 void __attribute__((optimize("O2", "no-gcse"))) |
| 255 NewFastConvertYUVToRGB32Row(const uint8* y_buf, |
| 256 const uint8* u_buf, |
| 257 const uint8* v_buf, |
| 258 uint8* rgb_buf, |
| 259 int width) { |
| 260 asm( |
| 261 "shr %4\n" |
| 262 "1:\n" |
| 263 "movzb (%1),%%eax\n" |
| 264 "add $0x1,%1\n" |
| 265 "movzb (%2),%%ebx\n" |
| 266 "add $0x1,%2\n" |
| 267 "movq kCoefficientsRgbU(,%%eax,8),%%mm0\n" |
| 268 "movzb (%0),%%eax\n" |
| 269 "paddsw kCoefficientsRgbV(,%%ebx,8),%%mm0\n" |
| 270 "movzb 0x1(%0),%%ebx\n" |
| 271 "movq kCoefficientsRgbY(,%%eax,8),%%mm1\n" |
| 272 "add $0x2,%0\n" |
| 273 "movq kCoefficientsRgbY(,%%ebx,8),%%mm2\n" |
| 274 "paddsw %%mm0,%%mm1\n" |
| 275 "paddsw %%mm0,%%mm2\n" |
| 276 "psraw $0x6,%%mm1\n" |
| 277 "psraw $0x6,%%mm2\n" |
| 278 "packuswb %%mm2,%%mm1\n" |
| 279 "movntq %%mm1,0x0(%3)\n" |
| 280 "add $0x8,%3\n" |
| 281 "sub $0x1,%4\n" |
| 282 "jne 1b\n" |
| 283 : : "r"(y_buf),"r"(u_buf),"r"(v_buf),"r"(rgb_buf),"r"(width) |
| 284 : "eax","ebx"); |
| 285 } |
| 286 #endif |
| 287 |
| 288 extern void FastConvertYUVToRGB32Row(const uint8* y_buf, |
| 289 const uint8* u_buf, |
| 290 const uint8* v_buf, |
| 291 uint8* rgb_buf, |
| 292 int width); |
| 293 |
| 294 __asm__( |
| 295 " .globl _FastConvertYUVToRGB32Row\n" |
| 296 "_FastConvertYUVToRGB32Row:\n" |
| 297 "pusha\n" |
| 298 "mov 0x24(%esp),%edx\n" |
| 299 "mov 0x28(%esp),%edi\n" |
| 300 "mov 0x2c(%esp),%esi\n" |
| 301 "mov 0x30(%esp),%ebp\n" |
| 302 "mov 0x34(%esp),%ecx\n" |
| 303 "jmp convertend\n" |
| 304 |
| 305 "convertloop:" |
| 306 "movzbl (%edi),%eax\n" |
| 307 "add $0x1,%edi\n" |
| 308 "movzbl (%esi),%ebx\n" |
| 309 "add $0x1,%esi\n" |
| 310 "movq _kCoefficientsRgbU(,%eax,8),%mm0\n" |
| 311 "movzbl (%edx),%eax\n" |
| 312 "paddsw _kCoefficientsRgbV(,%ebx,8),%mm0\n" |
| 313 "movzbl 0x1(%edx),%ebx\n" |
| 314 "movq _kCoefficientsRgbY(,%eax,8),%mm1\n" |
| 315 "add $0x2,%edx\n" |
| 316 "movq _kCoefficientsRgbY(,%ebx,8),%mm2\n" |
| 317 "paddsw %mm0,%mm1\n" |
| 318 "paddsw %mm0,%mm2\n" |
| 319 "psraw $0x6,%mm1\n" |
| 320 "psraw $0x6,%mm2\n" |
| 321 "packuswb %mm2,%mm1\n" |
| 322 "movntq %mm1,0x0(%ebp)\n" |
| 323 "add $0x8,%ebp\n" |
| 324 "convertend:" |
| 325 "sub $0x2,%ecx\n" |
| 326 "jns convertloop\n" |
| 327 |
| 328 "and $0x1,%ecx\n" |
| 329 "je convertdone\n" |
| 330 |
| 331 "movzbl (%edi),%eax\n" |
| 332 "movq _kCoefficientsRgbU(,%eax,8),%mm0\n" |
| 333 "movzbl (%esi),%eax\n" |
| 334 "paddsw _kCoefficientsRgbV(,%eax,8),%mm0\n" |
| 335 "movzbl (%edx),%eax\n" |
| 336 "movq _kCoefficientsRgbY(,%eax,8),%mm1\n" |
| 337 "paddsw %mm0,%mm1\n" |
| 338 "psraw $0x6,%mm1\n" |
| 339 "packuswb %mm1,%mm1\n" |
| 340 "movd %mm1,0x0(%ebp)\n" |
| 341 "convertdone:" |
| 342 "popa\n" |
| 343 "ret\n" |
| 344 ); |
| 345 |
| 346 |
| 347 extern void ScaleYUVToRGB32Row(const uint8* y_buf, |
| 348 const uint8* u_buf, |
| 349 const uint8* v_buf, |
| 350 uint8* rgb_buf, |
| 351 int width, |
| 352 int scaled_dx); |
| 353 |
| 354 __asm__( |
| 355 " .globl _ScaleYUVToRGB32Row\n" |
| 356 "_ScaleYUVToRGB32Row:\n" |
| 357 "pusha\n" |
| 358 "mov 0x24(%esp),%edx\n" |
| 359 "mov 0x28(%esp),%edi\n" |
| 360 "mov 0x2c(%esp),%esi\n" |
| 361 "mov 0x30(%esp),%ebp\n" |
| 362 "mov 0x34(%esp),%ecx\n" |
| 363 "xor %ebx,%ebx\n" |
| 364 "jmp scaleend\n" |
| 365 |
| 366 "scaleloop:" |
| 367 "mov %ebx,%eax\n" |
| 368 "sar $0x5,%eax\n" |
| 369 "movzbl (%edi,%eax,1),%eax\n" |
| 370 "movq kCoefficientsRgbU(,%eax,8),%mm0\n" |
| 371 "mov %ebx,%eax\n" |
| 372 "sar $0x5,%eax\n" |
| 373 "movzbl (%esi,%eax,1),%eax\n" |
| 374 "paddsw kCoefficientsRgbV(,%eax,8),%mm0\n" |
| 375 "mov %ebx,%eax\n" |
| 376 "add 0x38(%esp),%ebx\n" |
| 377 "sar $0x4,%eax\n" |
| 378 "movzbl (%edx,%eax,1),%eax\n" |
| 379 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" |
| 380 "mov %ebx,%eax\n" |
| 381 "add 0x38(%esp),%ebx\n" |
| 382 "sar $0x4,%eax\n" |
| 383 "movzbl (%edx,%eax,1),%eax\n" |
| 384 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" |
| 385 "paddsw %mm0,%mm1\n" |
| 386 "paddsw %mm0,%mm2\n" |
| 387 "psraw $0x6,%mm1\n" |
| 388 "psraw $0x6,%mm2\n" |
| 389 "packuswb %mm2,%mm1\n" |
| 390 "movntq %mm1,0x0(%ebp)\n" |
| 391 "add $0x8,%ebp\n" |
| 392 "scaleend:" |
| 393 "sub $0x2,%ecx\n" |
| 394 "jns scaleloop\n" |
| 395 |
| 396 "and $0x1,%ecx\n" |
| 397 "je scaledone\n" |
| 398 |
| 399 "mov %ebx,%eax\n" |
| 400 "sar $0x5,%eax\n" |
| 401 "movzbl (%edi,%eax,1),%eax\n" |
| 402 "movq kCoefficientsRgbU(,%eax,8),%mm0\n" |
| 403 "mov %ebx,%eax\n" |
| 404 "sar $0x5,%eax\n" |
| 405 "movzbl (%esi,%eax,1),%eax\n" |
| 406 "paddsw kCoefficientsRgbV(,%eax,8),%mm0\n" |
| 407 "mov %ebx,%eax\n" |
| 408 "sar $0x4,%eax\n" |
| 409 "movzbl (%edx,%eax,1),%eax\n" |
| 410 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" |
| 411 "mov %ebx,%eax\n" |
| 412 "sar $0x4,%eax\n" |
| 413 "movzbl (%edx,%eax,1),%eax\n" |
| 414 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" |
| 415 "paddsw %mm0,%mm1\n" |
| 416 "paddsw %mm0,%mm2\n" |
| 417 "psraw $0x6,%mm1\n" |
| 418 "psraw $0x6,%mm2\n" |
| 419 "packuswb %mm2,%mm1\n" |
| 420 "movd %mm1,0x0(%ebp)\n" |
| 421 |
| 422 "scaledone:" |
| 423 "popa\n" |
| 424 "ret\n" |
| 425 ); |
| 426 |
| 427 #else // USE_MMX |
| 17 | 428 |
| 18 // Reference version of YUV converter. | 429 // Reference version of YUV converter. |
| 19 static const int kClipTableSize = 256; | 430 static const int kClipTableSize = 256; |
| 20 static const int kClipOverflow = 288; // Cb max is 535. | 431 static const int kClipOverflow = 288; // Cb max is 535. |
| 21 | 432 |
| 22 static uint8 g_rgb_clip_table[kClipOverflow + | 433 static uint8 kRgbClipTable[kClipOverflow + |
| 23 kClipTableSize + | 434 kClipTableSize + |
| 24 kClipOverflow] = { | 435 kClipOverflow] = { |
| 25 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values | 436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values |
| 26 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0. | 437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0. |
| 27 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 28 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 29 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 30 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 31 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 32 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 33 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 34 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | 445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 130 | 541 |
| 131 // Clip an rgb channel value to 0..255 range. | 542 // Clip an rgb channel value to 0..255 range. |
| 132 // Source is signed fixed point 8.8. | 543 // Source is signed fixed point 8.8. |
| 133 // Table allows for values to underflow or overflow by 128. | 544 // Table allows for values to underflow or overflow by 128. |
| 134 // Therefore source range is -128 to 384. | 545 // Therefore source range is -128 to 384. |
| 135 // Output clips to unsigned 0 to 255. | 546 // Output clips to unsigned 0 to 255. |
| 136 static inline uint32 clip(int32 value) { | 547 static inline uint32 clip(int32 value) { |
| 137 DCHECK(((value >> 8) + kClipOverflow) >= 0); | 548 DCHECK(((value >> 8) + kClipOverflow) >= 0); |
| 138 DCHECK(((value >> 8) + kClipOverflow) < | 549 DCHECK(((value >> 8) + kClipOverflow) < |
| 139 (kClipOverflow + kClipTableSize + kClipOverflow)); | 550 (kClipOverflow + kClipTableSize + kClipOverflow)); |
| 140 return static_cast<uint32>(g_rgb_clip_table[((value) >> 8) + kClipOverflow]); | 551 return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]); |
| 141 } | 552 } |
| 142 | 553 |
| 143 static inline void YuvPixel(uint8 y, | 554 static inline void YuvPixel(uint8 y, |
| 144 uint8 u, | 555 uint8 u, |
| 145 uint8 v, | 556 uint8 v, |
| 146 uint8* rgb_buf) { | 557 uint8* rgb_buf) { |
| 147 int32 d = static_cast<int32>(u) - 128; | 558 int32 d = static_cast<int32>(u) - 128; |
| 148 int32 e = static_cast<int32>(v) - 128; | 559 int32 e = static_cast<int32>(v) - 128; |
| 149 | 560 |
| 150 int32 cb = (516 * d + 128); | 561 int32 cb = (516 * d + 128); |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 189 int scaled_x = 0; | 600 int scaled_x = 0; |
| 190 for (int x = 0; x < width; ++x) { | 601 for (int x = 0; x < width; ++x) { |
| 191 uint8 u = u_buf[scaled_x >> 5]; | 602 uint8 u = u_buf[scaled_x >> 5]; |
| 192 uint8 v = v_buf[scaled_x >> 5]; | 603 uint8 v = v_buf[scaled_x >> 5]; |
| 193 uint8 y0 = y_buf[scaled_x >> 4]; | 604 uint8 y0 = y_buf[scaled_x >> 4]; |
| 194 YuvPixel(y0, u, v, rgb_buf); | 605 YuvPixel(y0, u, v, rgb_buf); |
| 195 rgb_buf += 4; | 606 rgb_buf += 4; |
| 196 scaled_x += scaled_dx; | 607 scaled_x += scaled_dx; |
| 197 } | 608 } |
| 198 } | 609 } |
| 610 #endif // USE_MMX |
| 611 } // extern "C" |
| 199 | 612 |
| 200 } // namespace media | |
| 201 | |
| OLD | NEW |