Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm

Issue 390713002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: libvpx: Pull from upstream Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_temporal_filter.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after
227 227
228 ; actual quantize loop - setup pointers, rounders, etc. 228 ; actual quantize loop - setup pointers, rounders, etc.
229 movifnidn coeffq, coeffmp 229 movifnidn coeffq, coeffmp
230 movifnidn ncoeffq, ncoeffmp 230 movifnidn ncoeffq, ncoeffmp
231 mov r2, dequantmp 231 mov r2, dequantmp
232 movifnidn zbinq, zbinmp 232 movifnidn zbinq, zbinmp
233 movifnidn roundq, roundmp 233 movifnidn roundq, roundmp
234 movifnidn quantq, quantmp 234 movifnidn quantq, quantmp
235 mova m1, [roundq] ; m1 = round 235 mova m1, [roundq] ; m1 = round
236 mova m2, [quantq] ; m2 = quant 236 mova m2, [quantq] ; m2 = quant
237 %ifidn %1, b_32x32 237 %ifidn %1, fp_32x32
238 ; TODO(jingning) to be continued with 32x32 quantization process
239 pcmpeqw m5, m5 238 pcmpeqw m5, m5
240 psrlw m5, 15 239 psrlw m5, 15
241 paddw m0, m5
242 paddw m1, m5 240 paddw m1, m5
243 psrlw m0, 1 ; m0 = (m0 + 1) / 2
244 psrlw m1, 1 ; m1 = (m1 + 1) / 2 241 psrlw m1, 1 ; m1 = (m1 + 1) / 2
245 %endif 242 %endif
246 mova m3, [r2q] ; m3 = dequant 243 mova m3, [r2q] ; m3 = dequant
247 mov r3, qcoeffmp 244 mov r3, qcoeffmp
248 mov r4, dqcoeffmp 245 mov r4, dqcoeffmp
249 mov r5, iscanmp 246 mov r5, iscanmp
250 %ifidn %1, b_32x32 247 %ifidn %1, fp_32x32
251 psllw m4, 1 248 psllw m2, 1
252 %endif 249 %endif
253 pxor m5, m5 ; m5 = dedicated zero 250 pxor m5, m5 ; m5 = dedicated zero
254 DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob 251 DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
255 lea coeffq, [ coeffq+ncoeffq*2] 252 lea coeffq, [ coeffq+ncoeffq*2]
256 lea iscanq, [ iscanq+ncoeffq*2] 253 lea iscanq, [ iscanq+ncoeffq*2]
257 lea qcoeffq, [ qcoeffq+ncoeffq*2] 254 lea qcoeffq, [ qcoeffq+ncoeffq*2]
258 lea dqcoeffq, [dqcoeffq+ncoeffq*2] 255 lea dqcoeffq, [dqcoeffq+ncoeffq*2]
259 neg ncoeffq 256 neg ncoeffq
260 257
261 ; get DC and first 15 AC coeffs 258 ; get DC and first 15 AC coeffs
262 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] 259 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
263 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] 260 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
264 pabsw m6, m9 ; m6 = abs(m9) 261 pabsw m6, m9 ; m6 = abs(m9)
265 pabsw m11, m10 ; m11 = abs(m10) 262 pabsw m11, m10 ; m11 = abs(m10)
266 pcmpeqw m7, m7 263 pcmpeqw m7, m7
267 264
268 paddsw m6, m1 ; m6 += round 265 paddsw m6, m1 ; m6 += round
269 punpckhqdq m1, m1 266 punpckhqdq m1, m1
270 paddsw m11, m1 ; m11 += round 267 paddsw m11, m1 ; m11 += round
271 pmulhw m8, m6, m2 ; m8 = m6*q>>16 268 pmulhw m8, m6, m2 ; m8 = m6*q>>16
272 punpckhqdq m2, m2 269 punpckhqdq m2, m2
273 pmulhw m13, m11, m2 ; m13 = m11*q>>16 270 pmulhw m13, m11, m2 ; m13 = m11*q>>16
274 psignw m8, m9 ; m8 = reinsert sign 271 psignw m8, m9 ; m8 = reinsert sign
275 psignw m13, m10 ; m13 = reinsert sign 272 psignw m13, m10 ; m13 = reinsert sign
276 mova [qcoeffq+ncoeffq*2+ 0], m8 273 mova [qcoeffq+ncoeffq*2+ 0], m8
277 mova [qcoeffq+ncoeffq*2+16], m13 274 mova [qcoeffq+ncoeffq*2+16], m13
278 %ifidn %1, b_32x32 275 %ifidn %1, fp_32x32
279 pabsw m8, m8 276 pabsw m8, m8
280 pabsw m13, m13 277 pabsw m13, m13
281 %endif 278 %endif
282 pmullw m8, m3 ; dqc[i] = qc[i] * q 279 pmullw m8, m3 ; dqc[i] = qc[i] * q
283 punpckhqdq m3, m3 280 punpckhqdq m3, m3
284 pmullw m13, m3 ; dqc[i] = qc[i] * q 281 pmullw m13, m3 ; dqc[i] = qc[i] * q
285 %ifidn %1, b_32x32 282 %ifidn %1, fp_32x32
286 psrlw m8, 1 283 psrlw m8, 1
287 psrlw m13, 1 284 psrlw m13, 1
288 psignw m8, m9 285 psignw m8, m9
289 psignw m13, m10 286 psignw m13, m10
287 psrlw m0, m3, 2
290 %endif 288 %endif
291 mova [dqcoeffq+ncoeffq*2+ 0], m8 289 mova [dqcoeffq+ncoeffq*2+ 0], m8
292 mova [dqcoeffq+ncoeffq*2+16], m13 290 mova [dqcoeffq+ncoeffq*2+16], m13
293 pcmpeqw m8, m5 ; m8 = c[i] == 0 291 pcmpeqw m8, m5 ; m8 = c[i] == 0
294 pcmpeqw m13, m5 ; m13 = c[i] == 0 292 pcmpeqw m13, m5 ; m13 = c[i] == 0
295 mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] 293 mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
296 mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] 294 mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
297 psubw m6, m7 ; m6 = scan[i] + 1 295 psubw m6, m7 ; m6 = scan[i] + 1
298 psubw m11, m7 ; m11 = scan[i] + 1 296 psubw m11, m7 ; m11 = scan[i] + 1
299 pandn m8, m6 ; m8 = max(eob) 297 pandn m8, m6 ; m8 = max(eob)
300 pandn m13, m11 ; m13 = max(eob) 298 pandn m13, m11 ; m13 = max(eob)
301 pmaxsw m8, m13 299 pmaxsw m8, m13
302 add ncoeffq, mmsize 300 add ncoeffq, mmsize
303 jz .accumulate_eob 301 jz .accumulate_eob
304 302
305 .ac_only_loop: 303 .ac_only_loop:
306 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] 304 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
307 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] 305 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
308 pabsw m6, m9 ; m6 = abs(m9) 306 pabsw m6, m9 ; m6 = abs(m9)
309 pabsw m11, m10 ; m11 = abs(m10) 307 pabsw m11, m10 ; m11 = abs(m10)
310 pcmpeqw m7, m7 308 %ifidn %1, fp_32x32
311 %ifidn %1, b_32x32 309 pcmpgtw m7, m6, m0
310 pcmpgtw m12, m11, m0
312 pmovmskb r6, m7 311 pmovmskb r6, m7
313 pmovmskb r2, m7 312 pmovmskb r2, m12
313
314 or r6, r2 314 or r6, r2
315 jz .skip_iter 315 jz .skip_iter
316 %endif 316 %endif
317 pcmpeqw m7, m7
318
317 paddsw m6, m1 ; m6 += round 319 paddsw m6, m1 ; m6 += round
318 paddsw m11, m1 ; m11 += round 320 paddsw m11, m1 ; m11 += round
319 pmulhw m14, m6, m2 ; m14 = m6*q>>16 321 pmulhw m14, m6, m2 ; m14 = m6*q>>16
320 pmulhw m13, m11, m2 ; m13 = m11*q>>16 322 pmulhw m13, m11, m2 ; m13 = m11*q>>16
321 psignw m14, m9 ; m14 = reinsert sign 323 psignw m14, m9 ; m14 = reinsert sign
322 psignw m13, m10 ; m13 = reinsert sign 324 psignw m13, m10 ; m13 = reinsert sign
323 mova [qcoeffq+ncoeffq*2+ 0], m14 325 mova [qcoeffq+ncoeffq*2+ 0], m14
324 mova [qcoeffq+ncoeffq*2+16], m13 326 mova [qcoeffq+ncoeffq*2+16], m13
325 %ifidn %1, b_32x32 327 %ifidn %1, fp_32x32
326 pabsw m14, m14 328 pabsw m14, m14
327 pabsw m13, m13 329 pabsw m13, m13
328 %endif 330 %endif
329 pmullw m14, m3 ; dqc[i] = qc[i] * q 331 pmullw m14, m3 ; dqc[i] = qc[i] * q
330 pmullw m13, m3 ; dqc[i] = qc[i] * q 332 pmullw m13, m3 ; dqc[i] = qc[i] * q
331 %ifidn %1, b_32x32 333 %ifidn %1, fp_32x32
332 psrlw m14, 1 334 psrlw m14, 1
333 psrlw m13, 1 335 psrlw m13, 1
334 psignw m14, m9 336 psignw m14, m9
335 psignw m13, m10 337 psignw m13, m10
336 %endif 338 %endif
337 mova [dqcoeffq+ncoeffq*2+ 0], m14 339 mova [dqcoeffq+ncoeffq*2+ 0], m14
338 mova [dqcoeffq+ncoeffq*2+16], m13 340 mova [dqcoeffq+ncoeffq*2+16], m13
339 pcmpeqw m14, m5 ; m14 = c[i] == 0 341 pcmpeqw m14, m5 ; m14 = c[i] == 0
340 pcmpeqw m13, m5 ; m13 = c[i] == 0 342 pcmpeqw m13, m5 ; m13 = c[i] == 0
341 mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] 343 mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
342 mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] 344 mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
343 psubw m6, m7 ; m6 = scan[i] + 1 345 psubw m6, m7 ; m6 = scan[i] + 1
344 psubw m11, m7 ; m11 = scan[i] + 1 346 psubw m11, m7 ; m11 = scan[i] + 1
345 pandn m14, m6 ; m14 = max(eob) 347 pandn m14, m6 ; m14 = max(eob)
346 pandn m13, m11 ; m13 = max(eob) 348 pandn m13, m11 ; m13 = max(eob)
347 pmaxsw m8, m14 349 pmaxsw m8, m14
348 pmaxsw m8, m13 350 pmaxsw m8, m13
349 add ncoeffq, mmsize 351 add ncoeffq, mmsize
350 jl .ac_only_loop 352 jl .ac_only_loop
351 353
352 %ifidn %1, b_32x32 354 %ifidn %1, fp_32x32
353 jmp .accumulate_eob 355 jmp .accumulate_eob
354 .skip_iter: 356 .skip_iter:
355 mova [qcoeffq+ncoeffq*2+ 0], m5 357 mova [qcoeffq+ncoeffq*2+ 0], m5
356 mova [qcoeffq+ncoeffq*2+16], m5 358 mova [qcoeffq+ncoeffq*2+16], m5
357 mova [dqcoeffq+ncoeffq*2+ 0], m5 359 mova [dqcoeffq+ncoeffq*2+ 0], m5
358 mova [dqcoeffq+ncoeffq*2+16], m5 360 mova [dqcoeffq+ncoeffq*2+16], m5
359 add ncoeffq, mmsize 361 add ncoeffq, mmsize
360 jl .ac_only_loop 362 jl .ac_only_loop
361 %endif 363 %endif
362 364
(...skipping 27 matching lines...) Expand all
390 mova [qcoeffq+ncoeffq*2+ 0], m7 392 mova [qcoeffq+ncoeffq*2+ 0], m7
391 mova [qcoeffq+ncoeffq*2+16], m7 393 mova [qcoeffq+ncoeffq*2+16], m7
392 add ncoeffq, mmsize 394 add ncoeffq, mmsize
393 jl .blank_loop 395 jl .blank_loop
394 mov word [eobq], 0 396 mov word [eobq], 0
395 RET 397 RET
396 %endmacro 398 %endmacro
397 399
398 INIT_XMM ssse3 400 INIT_XMM ssse3
399 QUANTIZE_FP fp, 7 401 QUANTIZE_FP fp, 7
402 QUANTIZE_FP fp_32x32, 7
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_temporal_filter.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698