source/libvpx/third_party/libyuv/source/rotate.cc - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/third_party/libyuv/source/rotate.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.	2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "libyuv/rotate.h"	11 #include "libyuv/rotate.h"

12	12

13 #include "libyuv/cpu_id.h"	13 #include "libyuv/cpu_id.h"

14 #include "libyuv/convert.h"	14 #include "libyuv/convert.h"

15 #include "libyuv/planar_functions.h"	15 #include "libyuv/planar_functions.h"

	16 #include "libyuv/rotate_row.h"

16 #include "libyuv/row.h"	17 #include "libyuv/row.h"

17	18

18 #ifdef __cplusplus	19 #ifdef __cplusplus

19 namespace libyuv {	20 namespace libyuv {

20 extern "C" {	21 extern "C" {

21 #endif	22 #endif

22	23

23 #if !defined(LIBYUV_DISABLE_X86) && \

24 (defined(_M_IX86) \|\| defined(__x86_64__) \|\| defined(__i386__))

25 #if defined(__APPLE__) && defined(__i386__)

26 #define DECLARE_FUNCTION(name) \

27 ".text \n" \

28 ".private_extern _" #name " \n" \

29 ".align 4,0x90 \n" \

30 "_" #name ": \n"

31 #elif defined(__MINGW32__) \|\| defined(__CYGWIN__) && defined(__i386__)

32 #define DECLARE_FUNCTION(name) \

33 ".text \n" \

34 ".align 4,0x90 \n" \

35 "_" #name ": \n"

36 #else

37 #define DECLARE_FUNCTION(name) \

38 ".text \n" \

39 ".align 4,0x90 \n" \

40 #name ": \n"

41 #endif

42 #endif

43

44 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \

45 (defined(__ARM_NEON__) \|\| defined(LIBYUV_NEON) \|\| defined(__aarch64__))

46 #define HAS_TRANSPOSE_WX8_NEON

47 void TransposeWx8_NEON(const uint8* src, int src_stride,

48 uint8* dst, int dst_stride, int width);

49 #define HAS_TRANSPOSE_UVWX8_NEON

50 void TransposeUVWx8_NEON(const uint8* src, int src_stride,

51 uint8* dst_a, int dst_stride_a,

52 uint8* dst_b, int dst_stride_b,

53 int width);

54 #endif

55

56 #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \

57 defined(__mips__) && \

58 defined(__mips_dsp) && (__mips_dsp_rev >= 2)

59 #define HAS_TRANSPOSE_WX8_MIPS_DSPR2

60 void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,

61 uint8* dst, int dst_stride, int width);

62

63 void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,

64 uint8* dst, int dst_stride, int width);

65 #define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2

66 void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,

67 uint8* dst_a, int dst_stride_a,

68 uint8* dst_b, int dst_stride_b,

69 int width);

70 #endif // defined(__mips__)

71

72 #if !defined(LIBYUV_DISABLE_X86) && \

73 defined(_M_IX86) && defined(_MSC_VER)

74 #define HAS_TRANSPOSE_WX8_SSSE3

75 __declspec(naked) __declspec(align(16))

76 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,

77 uint8* dst, int dst_stride, int width) {

78 __asm {

79 push edi

80 push esi

81 push ebp

82 mov eax, [esp + 12 + 4] // src

83 mov edi, [esp + 12 + 8] // src_stride

84 mov edx, [esp + 12 + 12] // dst

85 mov esi, [esp + 12 + 16] // dst_stride

86 mov ecx, [esp + 12 + 20] // width

87

88 // Read in the data from the source pointer.

89 // First round of bit swap.

90 align 4

91 convertloop:

92 movq xmm0, qword ptr [eax]

93 lea ebp, [eax + 8]

94 movq xmm1, qword ptr [eax + edi]

95 lea eax, [eax + 2 * edi]

96 punpcklbw xmm0, xmm1

97 movq xmm2, qword ptr [eax]

98 movdqa xmm1, xmm0

99 palignr xmm1, xmm1, 8

100 movq xmm3, qword ptr [eax + edi]

101 lea eax, [eax + 2 * edi]

102 punpcklbw xmm2, xmm3

103 movdqa xmm3, xmm2

104 movq xmm4, qword ptr [eax]

105 palignr xmm3, xmm3, 8

106 movq xmm5, qword ptr [eax + edi]

107 punpcklbw xmm4, xmm5

108 lea eax, [eax + 2 * edi]

109 movdqa xmm5, xmm4

110 movq xmm6, qword ptr [eax]

111 palignr xmm5, xmm5, 8

112 movq xmm7, qword ptr [eax + edi]

113 punpcklbw xmm6, xmm7

114 mov eax, ebp

115 movdqa xmm7, xmm6

116 palignr xmm7, xmm7, 8

117 // Second round of bit swap.

118 punpcklwd xmm0, xmm2

119 punpcklwd xmm1, xmm3

120 movdqa xmm2, xmm0

121 movdqa xmm3, xmm1

122 palignr xmm2, xmm2, 8

123 palignr xmm3, xmm3, 8

124 punpcklwd xmm4, xmm6

125 punpcklwd xmm5, xmm7

126 movdqa xmm6, xmm4

127 movdqa xmm7, xmm5

128 palignr xmm6, xmm6, 8

129 palignr xmm7, xmm7, 8

130 // Third round of bit swap.

131 // Write to the destination pointer.

132 punpckldq xmm0, xmm4

133 movq qword ptr [edx], xmm0

134 movdqa xmm4, xmm0

135 palignr xmm4, xmm4, 8

136 movq qword ptr [edx + esi], xmm4

137 lea edx, [edx + 2 * esi]

138 punpckldq xmm2, xmm6

139 movdqa xmm6, xmm2

140 palignr xmm6, xmm6, 8

141 movq qword ptr [edx], xmm2

142 punpckldq xmm1, xmm5

143 movq qword ptr [edx + esi], xmm6

144 lea edx, [edx + 2 * esi]

145 movdqa xmm5, xmm1

146 movq qword ptr [edx], xmm1

147 palignr xmm5, xmm5, 8

148 punpckldq xmm3, xmm7

149 movq qword ptr [edx + esi], xmm5

150 lea edx, [edx + 2 * esi]

151 movq qword ptr [edx], xmm3

152 movdqa xmm7, xmm3

153 palignr xmm7, xmm7, 8

154 sub ecx, 8

155 movq qword ptr [edx + esi], xmm7

156 lea edx, [edx + 2 * esi]

157 jg convertloop

158

159 pop ebp

160 pop esi

161 pop edi

162 ret

163 }

164 }

165

166 #define HAS_TRANSPOSE_UVWX8_SSE2

167 __declspec(naked) __declspec(align(16))

168 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

169 uint8* dst_a, int dst_stride_a,

170 uint8* dst_b, int dst_stride_b,

171 int w) {

172 __asm {

173 push ebx

174 push esi

175 push edi

176 push ebp

177 mov eax, [esp + 16 + 4] // src

178 mov edi, [esp + 16 + 8] // src_stride

179 mov edx, [esp + 16 + 12] // dst_a

180 mov esi, [esp + 16 + 16] // dst_stride_a

181 mov ebx, [esp + 16 + 20] // dst_b

182 mov ebp, [esp + 16 + 24] // dst_stride_b

183 mov ecx, esp

184 sub esp, 4 + 16

185 and esp, ~15

186 mov [esp + 16], ecx

187 mov ecx, [ecx + 16 + 28] // w

188

189 align 4

190 convertloop:

191 // Read in the data from the source pointer.

192 // First round of bit swap.

193 movdqu xmm0, [eax]

194 movdqu xmm1, [eax + edi]

195 lea eax, [eax + 2 * edi]

196 movdqa xmm7, xmm0 // use xmm7 as temp register.

197 punpcklbw xmm0, xmm1

198 punpckhbw xmm7, xmm1

199 movdqa xmm1, xmm7

200 movdqu xmm2, [eax]

201 movdqu xmm3, [eax + edi]

202 lea eax, [eax + 2 * edi]

203 movdqa xmm7, xmm2

204 punpcklbw xmm2, xmm3

205 punpckhbw xmm7, xmm3

206 movdqa xmm3, xmm7

207 movdqu xmm4, [eax]

208 movdqu xmm5, [eax + edi]

209 lea eax, [eax + 2 * edi]

210 movdqa xmm7, xmm4

211 punpcklbw xmm4, xmm5

212 punpckhbw xmm7, xmm5

213 movdqa xmm5, xmm7

214 movdqu xmm6, [eax]

215 movdqu xmm7, [eax + edi]

216 lea eax, [eax + 2 * edi]

217 movdqu [esp], xmm5 // backup xmm5

218 neg edi

219 movdqa xmm5, xmm6 // use xmm5 as temp register.

220 punpcklbw xmm6, xmm7

221 punpckhbw xmm5, xmm7

222 movdqa xmm7, xmm5

223 lea eax, [eax + 8 * edi + 16]

224 neg edi

225 // Second round of bit swap.

226 movdqa xmm5, xmm0

227 punpcklwd xmm0, xmm2

228 punpckhwd xmm5, xmm2

229 movdqa xmm2, xmm5

230 movdqa xmm5, xmm1

231 punpcklwd xmm1, xmm3

232 punpckhwd xmm5, xmm3

233 movdqa xmm3, xmm5

234 movdqa xmm5, xmm4

235 punpcklwd xmm4, xmm6

236 punpckhwd xmm5, xmm6

237 movdqa xmm6, xmm5

238 movdqu xmm5, [esp] // restore xmm5

239 movdqu [esp], xmm6 // backup xmm6

240 movdqa xmm6, xmm5 // use xmm6 as temp register.

241 punpcklwd xmm5, xmm7

242 punpckhwd xmm6, xmm7

243 movdqa xmm7, xmm6

244 // Third round of bit swap.

245 // Write to the destination pointer.

246 movdqa xmm6, xmm0

247 punpckldq xmm0, xmm4

248 punpckhdq xmm6, xmm4

249 movdqa xmm4, xmm6

250 movdqu xmm6, [esp] // restore xmm6

251 movlpd qword ptr [edx], xmm0

252 movhpd qword ptr [ebx], xmm0

253 movlpd qword ptr [edx + esi], xmm4

254 lea edx, [edx + 2 * esi]

255 movhpd qword ptr [ebx + ebp], xmm4

256 lea ebx, [ebx + 2 * ebp]

257 movdqa xmm0, xmm2 // use xmm0 as the temp register.

258 punpckldq xmm2, xmm6

259 movlpd qword ptr [edx], xmm2

260 movhpd qword ptr [ebx], xmm2

261 punpckhdq xmm0, xmm6

262 movlpd qword ptr [edx + esi], xmm0

263 lea edx, [edx + 2 * esi]

264 movhpd qword ptr [ebx + ebp], xmm0

265 lea ebx, [ebx + 2 * ebp]

266 movdqa xmm0, xmm1 // use xmm0 as the temp register.

267 punpckldq xmm1, xmm5

268 movlpd qword ptr [edx], xmm1

269 movhpd qword ptr [ebx], xmm1

270 punpckhdq xmm0, xmm5

271 movlpd qword ptr [edx + esi], xmm0

272 lea edx, [edx + 2 * esi]

273 movhpd qword ptr [ebx + ebp], xmm0

274 lea ebx, [ebx + 2 * ebp]

275 movdqa xmm0, xmm3 // use xmm0 as the temp register.

276 punpckldq xmm3, xmm7

277 movlpd qword ptr [edx], xmm3

278 movhpd qword ptr [ebx], xmm3

279 punpckhdq xmm0, xmm7

280 sub ecx, 8

281 movlpd qword ptr [edx + esi], xmm0

282 lea edx, [edx + 2 * esi]

283 movhpd qword ptr [ebx + ebp], xmm0

284 lea ebx, [ebx + 2 * ebp]

285 jg convertloop

286

287 mov esp, [esp + 16]

288 pop ebp

289 pop edi

290 pop esi

291 pop ebx

292 ret

293 }

294 }

295 #endif

296 #if !defined(LIBYUV_DISABLE_X86) && \

297 (defined(__i386__) \|\| (defined(__x86_64__) && !defined(__native_client__)))

298 #define HAS_TRANSPOSE_WX8_SSSE3

299 static void TransposeWx8_SSSE3(const uint8* src, int src_stride,

300 uint8* dst, int dst_stride, int width) {

301 asm volatile (

302 // Read in the data from the source pointer.

303 // First round of bit swap.

304 ".p2align 2 \n"

305 "1: \n"

306 "movq (%0),%%xmm0 \n"

307 "movq (%0,%3),%%xmm1 \n"

308 "lea (%0,%3,2),%0 \n"

309 "punpcklbw %%xmm1,%%xmm0 \n"

310 "movq (%0),%%xmm2 \n"

311 "movdqa %%xmm0,%%xmm1 \n"

312 "palignr $0x8,%%xmm1,%%xmm1 \n"

313 "movq (%0,%3),%%xmm3 \n"

314 "lea (%0,%3,2),%0 \n"

315 "punpcklbw %%xmm3,%%xmm2 \n"

316 "movdqa %%xmm2,%%xmm3 \n"

317 "movq (%0),%%xmm4 \n"

318 "palignr $0x8,%%xmm3,%%xmm3 \n"

319 "movq (%0,%3),%%xmm5 \n"

320 "lea (%0,%3,2),%0 \n"

321 "punpcklbw %%xmm5,%%xmm4 \n"

322 "movdqa %%xmm4,%%xmm5 \n"

323 "movq (%0),%%xmm6 \n"

324 "palignr $0x8,%%xmm5,%%xmm5 \n"

325 "movq (%0,%3),%%xmm7 \n"

326 "lea (%0,%3,2),%0 \n"

327 "punpcklbw %%xmm7,%%xmm6 \n"

328 "neg %3 \n"

329 "movdqa %%xmm6,%%xmm7 \n"

330 "lea 0x8(%0,%3,8),%0 \n"

331 "palignr $0x8,%%xmm7,%%xmm7 \n"

332 "neg %3 \n"

333 // Second round of bit swap.

334 "punpcklwd %%xmm2,%%xmm0 \n"

335 "punpcklwd %%xmm3,%%xmm1 \n"

336 "movdqa %%xmm0,%%xmm2 \n"

337 "movdqa %%xmm1,%%xmm3 \n"

338 "palignr $0x8,%%xmm2,%%xmm2 \n"

339 "palignr $0x8,%%xmm3,%%xmm3 \n"

340 "punpcklwd %%xmm6,%%xmm4 \n"

341 "punpcklwd %%xmm7,%%xmm5 \n"

342 "movdqa %%xmm4,%%xmm6 \n"

343 "movdqa %%xmm5,%%xmm7 \n"

344 "palignr $0x8,%%xmm6,%%xmm6 \n"

345 "palignr $0x8,%%xmm7,%%xmm7 \n"

346 // Third round of bit swap.

347 // Write to the destination pointer.

348 "punpckldq %%xmm4,%%xmm0 \n"

349 "movq %%xmm0,(%1) \n"

350 "movdqa %%xmm0,%%xmm4 \n"

351 "palignr $0x8,%%xmm4,%%xmm4 \n"

352 "movq %%xmm4,(%1,%4) \n"

353 "lea (%1,%4,2),%1 \n"

354 "punpckldq %%xmm6,%%xmm2 \n"

355 "movdqa %%xmm2,%%xmm6 \n"

356 "movq %%xmm2,(%1) \n"

357 "palignr $0x8,%%xmm6,%%xmm6 \n"

358 "punpckldq %%xmm5,%%xmm1 \n"

359 "movq %%xmm6,(%1,%4) \n"

360 "lea (%1,%4,2),%1 \n"

361 "movdqa %%xmm1,%%xmm5 \n"

362 "movq %%xmm1,(%1) \n"

363 "palignr $0x8,%%xmm5,%%xmm5 \n"

364 "movq %%xmm5,(%1,%4) \n"

365 "lea (%1,%4,2),%1 \n"

366 "punpckldq %%xmm7,%%xmm3 \n"

367 "movq %%xmm3,(%1) \n"

368 "movdqa %%xmm3,%%xmm7 \n"

369 "palignr $0x8,%%xmm7,%%xmm7 \n"

370 "sub $0x8,%2 \n"

371 "movq %%xmm7,(%1,%4) \n"

372 "lea (%1,%4,2),%1 \n"

373 "jg 1b \n"

374 : "+r"(src), // %0

375 "+r"(dst), // %1

376 "+r"(width) // %2

377 : "r"((intptr_t)(src_stride)), // %3

378 "r"((intptr_t)(dst_stride)) // %4

379 : "memory", "cc",

380 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"

381 );

382 }

383

384 #if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)

385 #define HAS_TRANSPOSE_UVWX8_SSE2

386 void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

387 uint8* dst_a, int dst_stride_a,

388 uint8* dst_b, int dst_stride_b,

389 int w);

390 asm (

391 DECLARE_FUNCTION(TransposeUVWx8_SSE2)

392 "push %ebx \n"

393 "push %esi \n"

394 "push %edi \n"

395 "push %ebp \n"

396 "mov 0x14(%esp),%eax \n"

397 "mov 0x18(%esp),%edi \n"

398 "mov 0x1c(%esp),%edx \n"

399 "mov 0x20(%esp),%esi \n"

400 "mov 0x24(%esp),%ebx \n"

401 "mov 0x28(%esp),%ebp \n"

402 "mov %esp,%ecx \n"

403 "sub $0x14,%esp \n"

404 "and $0xfffffff0,%esp \n"

405 "mov %ecx,0x10(%esp) \n"

406 "mov 0x2c(%ecx),%ecx \n"

407

408 "1: \n"

409 "movdqu (%eax),%xmm0 \n"

410 "movdqu (%eax,%edi,1),%xmm1 \n"

411 "lea (%eax,%edi,2),%eax \n"

412 "movdqa %xmm0,%xmm7 \n"

413 "punpcklbw %xmm1,%xmm0 \n"

414 "punpckhbw %xmm1,%xmm7 \n"

415 "movdqa %xmm7,%xmm1 \n"

416 "movdqu (%eax),%xmm2 \n"

417 "movdqu (%eax,%edi,1),%xmm3 \n"

418 "lea (%eax,%edi,2),%eax \n"

419 "movdqa %xmm2,%xmm7 \n"

420 "punpcklbw %xmm3,%xmm2 \n"

421 "punpckhbw %xmm3,%xmm7 \n"

422 "movdqa %xmm7,%xmm3 \n"

423 "movdqu (%eax),%xmm4 \n"

424 "movdqu (%eax,%edi,1),%xmm5 \n"

425 "lea (%eax,%edi,2),%eax \n"

426 "movdqa %xmm4,%xmm7 \n"

427 "punpcklbw %xmm5,%xmm4 \n"

428 "punpckhbw %xmm5,%xmm7 \n"

429 "movdqa %xmm7,%xmm5 \n"

430 "movdqu (%eax),%xmm6 \n"

431 "movdqu (%eax,%edi,1),%xmm7 \n"

432 "lea (%eax,%edi,2),%eax \n"

433 "movdqu %xmm5,(%esp) \n"

434 "neg %edi \n"

435 "movdqa %xmm6,%xmm5 \n"

436 "punpcklbw %xmm7,%xmm6 \n"

437 "punpckhbw %xmm7,%xmm5 \n"

438 "movdqa %xmm5,%xmm7 \n"

439 "lea 0x10(%eax,%edi,8),%eax \n"

440 "neg %edi \n"

441 "movdqa %xmm0,%xmm5 \n"

442 "punpcklwd %xmm2,%xmm0 \n"

443 "punpckhwd %xmm2,%xmm5 \n"

444 "movdqa %xmm5,%xmm2 \n"

445 "movdqa %xmm1,%xmm5 \n"

446 "punpcklwd %xmm3,%xmm1 \n"

447 "punpckhwd %xmm3,%xmm5 \n"

448 "movdqa %xmm5,%xmm3 \n"

449 "movdqa %xmm4,%xmm5 \n"

450 "punpcklwd %xmm6,%xmm4 \n"

451 "punpckhwd %xmm6,%xmm5 \n"

452 "movdqa %xmm5,%xmm6 \n"

453 "movdqu (%esp),%xmm5 \n"

454 "movdqu %xmm6,(%esp) \n"

455 "movdqa %xmm5,%xmm6 \n"

456 "punpcklwd %xmm7,%xmm5 \n"

457 "punpckhwd %xmm7,%xmm6 \n"

458 "movdqa %xmm6,%xmm7 \n"

459 "movdqa %xmm0,%xmm6 \n"

460 "punpckldq %xmm4,%xmm0 \n"

461 "punpckhdq %xmm4,%xmm6 \n"

462 "movdqa %xmm6,%xmm4 \n"

463 "movdqu (%esp),%xmm6 \n"

464 "movlpd %xmm0,(%edx) \n"

465 "movhpd %xmm0,(%ebx) \n"

466 "movlpd %xmm4,(%edx,%esi,1) \n"

467 "lea (%edx,%esi,2),%edx \n"

468 "movhpd %xmm4,(%ebx,%ebp,1) \n"

469 "lea (%ebx,%ebp,2),%ebx \n"

470 "movdqa %xmm2,%xmm0 \n"

471 "punpckldq %xmm6,%xmm2 \n"

472 "movlpd %xmm2,(%edx) \n"

473 "movhpd %xmm2,(%ebx) \n"

474 "punpckhdq %xmm6,%xmm0 \n"

475 "movlpd %xmm0,(%edx,%esi,1) \n"

476 "lea (%edx,%esi,2),%edx \n"

477 "movhpd %xmm0,(%ebx,%ebp,1) \n"

478 "lea (%ebx,%ebp,2),%ebx \n"

479 "movdqa %xmm1,%xmm0 \n"

480 "punpckldq %xmm5,%xmm1 \n"

481 "movlpd %xmm1,(%edx) \n"

482 "movhpd %xmm1,(%ebx) \n"

483 "punpckhdq %xmm5,%xmm0 \n"

484 "movlpd %xmm0,(%edx,%esi,1) \n"

485 "lea (%edx,%esi,2),%edx \n"

486 "movhpd %xmm0,(%ebx,%ebp,1) \n"

487 "lea (%ebx,%ebp,2),%ebx \n"

488 "movdqa %xmm3,%xmm0 \n"

489 "punpckldq %xmm7,%xmm3 \n"

490 "movlpd %xmm3,(%edx) \n"

491 "movhpd %xmm3,(%ebx) \n"

492 "punpckhdq %xmm7,%xmm0 \n"

493 "sub $0x8,%ecx \n"

494 "movlpd %xmm0,(%edx,%esi,1) \n"

495 "lea (%edx,%esi,2),%edx \n"

496 "movhpd %xmm0,(%ebx,%ebp,1) \n"

497 "lea (%ebx,%ebp,2),%ebx \n"

498 "jg 1b \n"

499 "mov 0x10(%esp),%esp \n"

500 "pop %ebp \n"

501 "pop %edi \n"

502 "pop %esi \n"

503 "pop %ebx \n"

504 #if defined(__native_client__)

505 "pop %ecx \n"

506 "and $0xffffffe0,%ecx \n"

507 "jmp *%ecx \n"

508 #else

509 "ret \n"

510 #endif

511 );

512 #endif

513 #if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \

514 defined(__x86_64__)

515 // 64 bit version has enough registers to do 16x8 to 8x16 at a time.

516 #define HAS_TRANSPOSE_WX8_FAST_SSSE3

517 static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,

518 uint8* dst, int dst_stride, int width) {

519 asm volatile (

520 // Read in the data from the source pointer.

521 // First round of bit swap.

522 ".p2align 2 \n"

523 "1: \n"

524 "movdqu (%0),%%xmm0 \n"

525 "movdqu (%0,%3),%%xmm1 \n"

526 "lea (%0,%3,2),%0 \n"

527 "movdqa %%xmm0,%%xmm8 \n"

528 "punpcklbw %%xmm1,%%xmm0 \n"

529 "punpckhbw %%xmm1,%%xmm8 \n"

530 "movdqu (%0),%%xmm2 \n"

531 "movdqa %%xmm0,%%xmm1 \n"

532 "movdqa %%xmm8,%%xmm9 \n"

533 "palignr $0x8,%%xmm1,%%xmm1 \n"

534 "palignr $0x8,%%xmm9,%%xmm9 \n"

535 "movdqu (%0,%3),%%xmm3 \n"

536 "lea (%0,%3,2),%0 \n"

537 "movdqa %%xmm2,%%xmm10 \n"

538 "punpcklbw %%xmm3,%%xmm2 \n"

539 "punpckhbw %%xmm3,%%xmm10 \n"

540 "movdqa %%xmm2,%%xmm3 \n"

541 "movdqa %%xmm10,%%xmm11 \n"

542 "movdqu (%0),%%xmm4 \n"

543 "palignr $0x8,%%xmm3,%%xmm3 \n"

544 "palignr $0x8,%%xmm11,%%xmm11 \n"

545 "movdqu (%0,%3),%%xmm5 \n"

546 "lea (%0,%3,2),%0 \n"

547 "movdqa %%xmm4,%%xmm12 \n"

548 "punpcklbw %%xmm5,%%xmm4 \n"

549 "punpckhbw %%xmm5,%%xmm12 \n"

550 "movdqa %%xmm4,%%xmm5 \n"

551 "movdqa %%xmm12,%%xmm13 \n"

552 "movdqu (%0),%%xmm6 \n"

553 "palignr $0x8,%%xmm5,%%xmm5 \n"

554 "palignr $0x8,%%xmm13,%%xmm13 \n"

555 "movdqu (%0,%3),%%xmm7 \n"

556 "lea (%0,%3,2),%0 \n"

557 "movdqa %%xmm6,%%xmm14 \n"

558 "punpcklbw %%xmm7,%%xmm6 \n"

559 "punpckhbw %%xmm7,%%xmm14 \n"

560 "neg %3 \n"

561 "movdqa %%xmm6,%%xmm7 \n"

562 "movdqa %%xmm14,%%xmm15 \n"

563 "lea 0x10(%0,%3,8),%0 \n"

564 "palignr $0x8,%%xmm7,%%xmm7 \n"

565 "palignr $0x8,%%xmm15,%%xmm15 \n"

566 "neg %3 \n"

567 // Second round of bit swap.

568 "punpcklwd %%xmm2,%%xmm0 \n"

569 "punpcklwd %%xmm3,%%xmm1 \n"

570 "movdqa %%xmm0,%%xmm2 \n"

571 "movdqa %%xmm1,%%xmm3 \n"

572 "palignr $0x8,%%xmm2,%%xmm2 \n"

573 "palignr $0x8,%%xmm3,%%xmm3 \n"

574 "punpcklwd %%xmm6,%%xmm4 \n"

575 "punpcklwd %%xmm7,%%xmm5 \n"

576 "movdqa %%xmm4,%%xmm6 \n"

577 "movdqa %%xmm5,%%xmm7 \n"

578 "palignr $0x8,%%xmm6,%%xmm6 \n"

579 "palignr $0x8,%%xmm7,%%xmm7 \n"

580 "punpcklwd %%xmm10,%%xmm8 \n"

581 "punpcklwd %%xmm11,%%xmm9 \n"

582 "movdqa %%xmm8,%%xmm10 \n"

583 "movdqa %%xmm9,%%xmm11 \n"

584 "palignr $0x8,%%xmm10,%%xmm10 \n"

585 "palignr $0x8,%%xmm11,%%xmm11 \n"

586 "punpcklwd %%xmm14,%%xmm12 \n"

587 "punpcklwd %%xmm15,%%xmm13 \n"

588 "movdqa %%xmm12,%%xmm14 \n"

589 "movdqa %%xmm13,%%xmm15 \n"

590 "palignr $0x8,%%xmm14,%%xmm14 \n"

591 "palignr $0x8,%%xmm15,%%xmm15 \n"

592 // Third round of bit swap.

593 // Write to the destination pointer.

594 "punpckldq %%xmm4,%%xmm0 \n"

595 "movq %%xmm0,(%1) \n"

596 "movdqa %%xmm0,%%xmm4 \n"

597 "palignr $0x8,%%xmm4,%%xmm4 \n"

598 "movq %%xmm4,(%1,%4) \n"

599 "lea (%1,%4,2),%1 \n"

600 "punpckldq %%xmm6,%%xmm2 \n"

601 "movdqa %%xmm2,%%xmm6 \n"

602 "movq %%xmm2,(%1) \n"

603 "palignr $0x8,%%xmm6,%%xmm6 \n"

604 "punpckldq %%xmm5,%%xmm1 \n"

605 "movq %%xmm6,(%1,%4) \n"

606 "lea (%1,%4,2),%1 \n"

607 "movdqa %%xmm1,%%xmm5 \n"

608 "movq %%xmm1,(%1) \n"

609 "palignr $0x8,%%xmm5,%%xmm5 \n"

610 "movq %%xmm5,(%1,%4) \n"

611 "lea (%1,%4,2),%1 \n"

612 "punpckldq %%xmm7,%%xmm3 \n"

613 "movq %%xmm3,(%1) \n"

614 "movdqa %%xmm3,%%xmm7 \n"

615 "palignr $0x8,%%xmm7,%%xmm7 \n"

616 "movq %%xmm7,(%1,%4) \n"

617 "lea (%1,%4,2),%1 \n"

618 "punpckldq %%xmm12,%%xmm8 \n"

619 "movq %%xmm8,(%1) \n"

620 "movdqa %%xmm8,%%xmm12 \n"

621 "palignr $0x8,%%xmm12,%%xmm12 \n"

622 "movq %%xmm12,(%1,%4) \n"

623 "lea (%1,%4,2),%1 \n"

624 "punpckldq %%xmm14,%%xmm10 \n"

625 "movdqa %%xmm10,%%xmm14 \n"

626 "movq %%xmm10,(%1) \n"

627 "palignr $0x8,%%xmm14,%%xmm14 \n"

628 "punpckldq %%xmm13,%%xmm9 \n"

629 "movq %%xmm14,(%1,%4) \n"

630 "lea (%1,%4,2),%1 \n"

631 "movdqa %%xmm9,%%xmm13 \n"

632 "movq %%xmm9,(%1) \n"

633 "palignr $0x8,%%xmm13,%%xmm13 \n"

634 "movq %%xmm13,(%1,%4) \n"

635 "lea (%1,%4,2),%1 \n"

636 "punpckldq %%xmm15,%%xmm11 \n"

637 "movq %%xmm11,(%1) \n"

638 "movdqa %%xmm11,%%xmm15 \n"

639 "palignr $0x8,%%xmm15,%%xmm15 \n"

640 "sub $0x10,%2 \n"

641 "movq %%xmm15,(%1,%4) \n"

642 "lea (%1,%4,2),%1 \n"

643 "jg 1b \n"

644 : "+r"(src), // %0

645 "+r"(dst), // %1

646 "+r"(width) // %2

647 : "r"((intptr_t)(src_stride)), // %3

648 "r"((intptr_t)(dst_stride)) // %4

649 : "memory", "cc",

650 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",

651 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"

652 );

653 }

654

655 #define HAS_TRANSPOSE_UVWX8_SSE2

656 static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,

657 uint8* dst_a, int dst_stride_a,

658 uint8* dst_b, int dst_stride_b,

659 int w) {

660 asm volatile (

661 // Read in the data from the source pointer.

662 // First round of bit swap.

663 ".p2align 2 \n"

664 "1: \n"

665 "movdqu (%0),%%xmm0 \n"

666 "movdqu (%0,%4),%%xmm1 \n"

667 "lea (%0,%4,2),%0 \n"

668 "movdqa %%xmm0,%%xmm8 \n"

669 "punpcklbw %%xmm1,%%xmm0 \n"

670 "punpckhbw %%xmm1,%%xmm8 \n"

671 "movdqa %%xmm8,%%xmm1 \n"

672 "movdqu (%0),%%xmm2 \n"

673 "movdqu (%0,%4),%%xmm3 \n"

674 "lea (%0,%4,2),%0 \n"

675 "movdqa %%xmm2,%%xmm8 \n"

676 "punpcklbw %%xmm3,%%xmm2 \n"

677 "punpckhbw %%xmm3,%%xmm8 \n"

678 "movdqa %%xmm8,%%xmm3 \n"

679 "movdqu (%0),%%xmm4 \n"

680 "movdqu (%0,%4),%%xmm5 \n"

681 "lea (%0,%4,2),%0 \n"

682 "movdqa %%xmm4,%%xmm8 \n"

683 "punpcklbw %%xmm5,%%xmm4 \n"

684 "punpckhbw %%xmm5,%%xmm8 \n"

685 "movdqa %%xmm8,%%xmm5 \n"

686 "movdqu (%0),%%xmm6 \n"

687 "movdqu (%0,%4),%%xmm7 \n"

688 "lea (%0,%4,2),%0 \n"

689 "movdqa %%xmm6,%%xmm8 \n"

690 "punpcklbw %%xmm7,%%xmm6 \n"

691 "neg %4 \n"

692 "lea 0x10(%0,%4,8),%0 \n"

693 "punpckhbw %%xmm7,%%xmm8 \n"

694 "movdqa %%xmm8,%%xmm7 \n"

695 "neg %4 \n"

696 // Second round of bit swap.

697 "movdqa %%xmm0,%%xmm8 \n"

698 "movdqa %%xmm1,%%xmm9 \n"

699 "punpckhwd %%xmm2,%%xmm8 \n"

700 "punpckhwd %%xmm3,%%xmm9 \n"

701 "punpcklwd %%xmm2,%%xmm0 \n"

702 "punpcklwd %%xmm3,%%xmm1 \n"

703 "movdqa %%xmm8,%%xmm2 \n"

704 "movdqa %%xmm9,%%xmm3 \n"

705 "movdqa %%xmm4,%%xmm8 \n"

706 "movdqa %%xmm5,%%xmm9 \n"

707 "punpckhwd %%xmm6,%%xmm8 \n"

708 "punpckhwd %%xmm7,%%xmm9 \n"

709 "punpcklwd %%xmm6,%%xmm4 \n"

710 "punpcklwd %%xmm7,%%xmm5 \n"

711 "movdqa %%xmm8,%%xmm6 \n"

712 "movdqa %%xmm9,%%xmm7 \n"

713 // Third round of bit swap.

714 // Write to the destination pointer.

715 "movdqa %%xmm0,%%xmm8 \n"

716 "punpckldq %%xmm4,%%xmm0 \n"

717 "movlpd %%xmm0,(%1) \n" // Write back U channel

718 "movhpd %%xmm0,(%2) \n" // Write back V channel

719 "punpckhdq %%xmm4,%%xmm8 \n"

720 "movlpd %%xmm8,(%1,%5) \n"

721 "lea (%1,%5,2),%1 \n"

722 "movhpd %%xmm8,(%2,%6) \n"

723 "lea (%2,%6,2),%2 \n"

724 "movdqa %%xmm2,%%xmm8 \n"

725 "punpckldq %%xmm6,%%xmm2 \n"

726 "movlpd %%xmm2,(%1) \n"

727 "movhpd %%xmm2,(%2) \n"

728 "punpckhdq %%xmm6,%%xmm8 \n"

729 "movlpd %%xmm8,(%1,%5) \n"

730 "lea (%1,%5,2),%1 \n"

731 "movhpd %%xmm8,(%2,%6) \n"

732 "lea (%2,%6,2),%2 \n"

733 "movdqa %%xmm1,%%xmm8 \n"

734 "punpckldq %%xmm5,%%xmm1 \n"

735 "movlpd %%xmm1,(%1) \n"

736 "movhpd %%xmm1,(%2) \n"

737 "punpckhdq %%xmm5,%%xmm8 \n"

738 "movlpd %%xmm8,(%1,%5) \n"

739 "lea (%1,%5,2),%1 \n"

740 "movhpd %%xmm8,(%2,%6) \n"

741 "lea (%2,%6,2),%2 \n"

742 "movdqa %%xmm3,%%xmm8 \n"

743 "punpckldq %%xmm7,%%xmm3 \n"

744 "movlpd %%xmm3,(%1) \n"

745 "movhpd %%xmm3,(%2) \n"

746 "punpckhdq %%xmm7,%%xmm8 \n"

747 "sub $0x8,%3 \n"

748 "movlpd %%xmm8,(%1,%5) \n"

749 "lea (%1,%5,2),%1 \n"

750 "movhpd %%xmm8,(%2,%6) \n"

751 "lea (%2,%6,2),%2 \n"

752 "jg 1b \n"

753 : "+r"(src), // %0

754 "+r"(dst_a), // %1

755 "+r"(dst_b), // %2

756 "+r"(w) // %3

757 : "r"((intptr_t)(src_stride)), // %4

758 "r"((intptr_t)(dst_stride_a)), // %5

759 "r"((intptr_t)(dst_stride_b)) // %6

760 : "memory", "cc",

761 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",

762 "xmm8", "xmm9"

763 );

764 }

765 #endif

766 #endif

767

768 static void TransposeWx8_C(const uint8* src, int src_stride,

769 uint8* dst, int dst_stride,

770 int width) {

771 int i;

772 for (i = 0; i < width; ++i) {

773 dst[0] = src[0 * src_stride];

774 dst[1] = src[1 * src_stride];

775 dst[2] = src[2 * src_stride];

776 dst[3] = src[3 * src_stride];

777 dst[4] = src[4 * src_stride];

778 dst[5] = src[5 * src_stride];

779 dst[6] = src[6 * src_stride];

780 dst[7] = src[7 * src_stride];

781 ++src;

782 dst += dst_stride;

783 }

784 }

785

786 static void TransposeWxH_C(const uint8* src, int src_stride,

787 uint8* dst, int dst_stride,

788 int width, int height) {

789 int i;

790 for (i = 0; i < width; ++i) {

791 int j;

792 for (j = 0; j < height; ++j) {

793 dst[i * dst_stride + j] = src[j * src_stride + i];

794 }

795 }

796 }

797

798 LIBYUV_API	24 LIBYUV_API

799 void TransposePlane(const uint8* src, int src_stride,	25 void TransposePlane(const uint8* src, int src_stride,

800 uint8* dst, int dst_stride,	26 uint8* dst, int dst_stride,

801 int width, int height) {	27 int width, int height) {

802 int i = height;	28 int i = height;

803 void (TransposeWx8)(const uint8 src, int src_stride,	29 void (TransposeWx8)(const uint8 src, int src_stride,

804 uint8* dst, int dst_stride,	30 uint8* dst, int dst_stride, int width) = TransposeWx8_C;

805 int width) = TransposeWx8_C;	31 #if defined(HAS_TRANSPOSEWX8_NEON)

806 #if defined(HAS_TRANSPOSE_WX8_NEON)

807 if (TestCpuFlag(kCpuHasNEON)) {	32 if (TestCpuFlag(kCpuHasNEON)) {

808 TransposeWx8 = TransposeWx8_NEON;	33 TransposeWx8 = TransposeWx8_NEON;

809 }	34 }

810 #endif	35 #endif

811 #if defined(HAS_TRANSPOSE_WX8_SSSE3)	36 #if defined(HAS_TRANSPOSEWX8_SSSE3)

812 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {	37 if (TestCpuFlag(kCpuHasSSSE3)) {

813 TransposeWx8 = TransposeWx8_SSSE3;	38 TransposeWx8 = TransposeWx8_Any_SSSE3;

	39 if (IS_ALIGNED(width, 8)) {

	40 TransposeWx8 = TransposeWx8_SSSE3;

	41 }

814 }	42 }

815 #endif	43 #endif

816 #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)	44 #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)

817 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {	45 if (TestCpuFlag(kCpuHasSSSE3)) {

818 TransposeWx8 = TransposeWx8_FAST_SSSE3;	46 TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;

	47 if (IS_ALIGNED(width, 16)) {

	48 TransposeWx8 = TransposeWx8_Fast_SSSE3;

	49 }

819 }	50 }

820 #endif	51 #endif

821 #if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)	52 #if defined(HAS_TRANSPOSEWX8_MIPS_DSPR2)

822 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {	53 if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {

823 if (IS_ALIGNED(width, 4) &&	54 if (IS_ALIGNED(width, 4) &&

824 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {	55 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {

825 TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;	56 TransposeWx8 = TransposeWx8_Fast_MIPS_DSPR2;

826 } else {	57 } else {

827 TransposeWx8 = TransposeWx8_MIPS_DSPR2;	58 TransposeWx8 = TransposeWx8_MIPS_DSPR2;

828 }	59 }

829 }	60 }

830 #endif	61 #endif

831	62

832 // Work across the source in 8x8 tiles	63 // Work across the source in 8x8 tiles

833 while (i >= 8) {	64 while (i >= 8) {

834 TransposeWx8(src, src_stride, dst, dst_stride, width);	65 TransposeWx8(src, src_stride, dst, dst_stride, width);

835 src += 8 * src_stride; // Go down 8 rows.	66 src += 8 * src_stride; // Go down 8 rows.

836 dst += 8; // Move over 8 columns.	67 dst += 8; // Move over 8 columns.

837 i -= 8;	68 i -= 8;

838 }	69 }

839	70

840 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);	71 if (i > 0) {

	72 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);

	73 }

841 }	74 }

842	75

843 LIBYUV_API	76 LIBYUV_API

844 void RotatePlane90(const uint8* src, int src_stride,	77 void RotatePlane90(const uint8* src, int src_stride,

845 uint8* dst, int dst_stride,	78 uint8* dst, int dst_stride,

846 int width, int height) {	79 int width, int height) {

847 // Rotate by 90 is a transpose with the source read	80 // Rotate by 90 is a transpose with the source read

848 // from bottom to top. So set the source pointer to the end	81 // from bottom to top. So set the source pointer to the end

849 // of the buffer and flip the sign of the source stride.	82 // of the buffer and flip the sign of the source stride.

850 src += src_stride * (height - 1);	83 src += src_stride * (height - 1);

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
948 src += src_stride;	181 src += src_stride;

949 MirrorRow(src_bot, dst, width); // Mirror last row into first row	182 MirrorRow(src_bot, dst, width); // Mirror last row into first row

950 dst += dst_stride;	183 dst += dst_stride;

951 CopyRow(row, dst_bot, width); // Copy first mirrored row into last	184 CopyRow(row, dst_bot, width); // Copy first mirrored row into last

952 src_bot -= src_stride;	185 src_bot -= src_stride;

953 dst_bot -= dst_stride;	186 dst_bot -= dst_stride;

954 }	187 }

955 free_aligned_buffer_64(row);	188 free_aligned_buffer_64(row);

956 }	189 }

957	190

958 static void TransposeUVWx8_C(const uint8* src, int src_stride,

959 uint8* dst_a, int dst_stride_a,

960 uint8* dst_b, int dst_stride_b,

961 int width) {

962 int i;

963 for (i = 0; i < width; ++i) {

964 dst_a[0] = src[0 * src_stride + 0];

965 dst_b[0] = src[0 * src_stride + 1];

966 dst_a[1] = src[1 * src_stride + 0];

967 dst_b[1] = src[1 * src_stride + 1];

968 dst_a[2] = src[2 * src_stride + 0];

969 dst_b[2] = src[2 * src_stride + 1];

970 dst_a[3] = src[3 * src_stride + 0];

971 dst_b[3] = src[3 * src_stride + 1];

972 dst_a[4] = src[4 * src_stride + 0];

973 dst_b[4] = src[4 * src_stride + 1];

974 dst_a[5] = src[5 * src_stride + 0];

975 dst_b[5] = src[5 * src_stride + 1];

976 dst_a[6] = src[6 * src_stride + 0];

977 dst_b[6] = src[6 * src_stride + 1];

978 dst_a[7] = src[7 * src_stride + 0];

979 dst_b[7] = src[7 * src_stride + 1];

980 src += 2;

981 dst_a += dst_stride_a;

982 dst_b += dst_stride_b;

983 }

984 }

985

986 static void TransposeUVWxH_C(const uint8* src, int src_stride,

987 uint8* dst_a, int dst_stride_a,

988 uint8* dst_b, int dst_stride_b,

989 int width, int height) {

990 int i;

991 for (i = 0; i < width * 2; i += 2) {

992 int j;

993 for (j = 0; j < height; ++j) {

994 dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)];

995 dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1];

996 }

997 }

998 }

999

1000 LIBYUV_API	191 LIBYUV_API

1001 void TransposeUV(const uint8* src, int src_stride,	192 void TransposeUV(const uint8* src, int src_stride,

1002 uint8* dst_a, int dst_stride_a,	193 uint8* dst_a, int dst_stride_a,

1003 uint8* dst_b, int dst_stride_b,	194 uint8* dst_b, int dst_stride_b,

1004 int width, int height) {	195 int width, int height) {

1005 int i = height;	196 int i = height;

1006 void (TransposeUVWx8)(const uint8 src, int src_stride,	197 void (TransposeUVWx8)(const uint8 src, int src_stride,

1007 uint8* dst_a, int dst_stride_a,	198 uint8* dst_a, int dst_stride_a,

1008 uint8* dst_b, int dst_stride_b,	199 uint8* dst_b, int dst_stride_b,

1009 int width) = TransposeUVWx8_C;	200 int width) = TransposeUVWx8_C;

1010 #if defined(HAS_TRANSPOSE_UVWX8_NEON)	201 #if defined(HAS_TRANSPOSEUVWX8_NEON)

1011 if (TestCpuFlag(kCpuHasNEON)) {	202 if (TestCpuFlag(kCpuHasNEON)) {

1012 TransposeUVWx8 = TransposeUVWx8_NEON;	203 TransposeUVWx8 = TransposeUVWx8_NEON;

1013 }	204 }

1014 #endif	205 #endif

1015 #if defined(HAS_TRANSPOSE_UVWX8_SSE2)	206 #if defined(HAS_TRANSPOSEUVWX8_SSE2)

1016 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {	207 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {

1017 TransposeUVWx8 = TransposeUVWx8_SSE2;	208 TransposeUVWx8 = TransposeUVWx8_SSE2;

1018 }	209 }

1019 #endif	210 #endif

1020 #if defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)	211 #if defined(HAS_TRANSPOSEUVWx8_MIPS_DSPR2)

1021 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&	212 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&

1022 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {	213 IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {

1023 TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;	214 TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;

1024 }	215 }

1025 #endif	216 #endif

1026	217

1027 // Work through the source in 8x8 tiles.	218 // Work through the source in 8x8 tiles.

1028 while (i >= 8) {	219 while (i >= 8) {

1029 TransposeUVWx8(src, src_stride,	220 TransposeUVWx8(src, src_stride,

1030 dst_a, dst_stride_a,	221 dst_a, dst_stride_a,

1031 dst_b, dst_stride_b,	222 dst_b, dst_stride_b,

1032 width);	223 width);

1033 src += 8 * src_stride; // Go down 8 rows.	224 src += 8 * src_stride; // Go down 8 rows.

1034 dst_a += 8; // Move over 8 columns.	225 dst_a += 8; // Move over 8 columns.

1035 dst_b += 8; // Move over 8 columns.	226 dst_b += 8; // Move over 8 columns.

1036 i -= 8;	227 i -= 8;

1037 }	228 }

1038	229

1039 TransposeUVWxH_C(src, src_stride,	230 if (i > 0) {

1040 dst_a, dst_stride_a,	231 TransposeUVWxH_C(src, src_stride,

1041 dst_b, dst_stride_b,	232 dst_a, dst_stride_a,

1042 width, i);	233 dst_b, dst_stride_b,

	234 width, i);

	235 }

1043 }	236 }

1044	237

1045 LIBYUV_API	238 LIBYUV_API

1046 void RotateUV90(const uint8* src, int src_stride,	239 void RotateUV90(const uint8* src, int src_stride,

1047 uint8* dst_a, int dst_stride_a,	240 uint8* dst_a, int dst_stride_a,

1048 uint8* dst_b, int dst_stride_b,	241 uint8* dst_b, int dst_stride_b,

1049 int width, int height) {	242 int width, int height) {

1050 src += src_stride * (height - 1);	243 src += src_stride * (height - 1);

1051 src_stride = -src_stride;	244 src_stride = -src_stride;

1052	245

(...skipping 241 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1294 default:	487 default:

1295 break;	488 break;

1296 }	489 }

1297 return -1;	490 return -1;

1298 }	491 }

1299	492

1300 #ifdef __cplusplus	493 #ifdef __cplusplus

1301 } // extern "C"	494 } // extern "C"

1302 } // namespace libyuv	495 } // namespace libyuv

1303 #endif	496 #endif

OLD	NEW