Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(484)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm

Issue 375983002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_temporal_filter.c ('k') | source/libvpx/vp9/vp9_cx_iface.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
210 mova [qcoeffq+ncoeffq*2+16], m7 210 mova [qcoeffq+ncoeffq*2+16], m7
211 add ncoeffq, mmsize 211 add ncoeffq, mmsize
212 jl .blank_loop 212 jl .blank_loop
213 mov word [eobq], 0 213 mov word [eobq], 0
214 RET 214 RET
215 %endmacro 215 %endmacro
216 216
217 INIT_XMM ssse3 217 INIT_XMM ssse3
218 QUANTIZE_FN b, 7 218 QUANTIZE_FN b, 7
219 QUANTIZE_FN b_32x32, 7 219 QUANTIZE_FN b_32x32, 7
220
221 %macro QUANTIZE_FP 2
222 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
223 shift, qcoeff, dqcoeff, dequant, zbin_oq, \
224 eob, scan, iscan
225 cmp dword skipm, 0
226 jne .blank
227
228 ; actual quantize loop - setup pointers, rounders, etc.
229 movifnidn coeffq, coeffmp
230 movifnidn ncoeffq, ncoeffmp
231 mov r2, dequantmp
232 movifnidn zbinq, zbinmp
233 movifnidn roundq, roundmp
234 movifnidn quantq, quantmp
235 mova m1, [roundq] ; m1 = round
236 mova m2, [quantq] ; m2 = quant
237 %ifidn %1, b_32x32
238 ; TODO(jingning) to be continued with 32x32 quantization process
239 pcmpeqw m5, m5
240 psrlw m5, 15
241 paddw m0, m5
242 paddw m1, m5
243 psrlw m0, 1 ; m0 = (m0 + 1) / 2
244 psrlw m1, 1 ; m1 = (m1 + 1) / 2
245 %endif
246 mova m3, [r2q] ; m3 = dequant
247 mov r3, qcoeffmp
248 mov r4, dqcoeffmp
249 mov r5, iscanmp
250 %ifidn %1, b_32x32
251 psllw m4, 1
252 %endif
253 pxor m5, m5 ; m5 = dedicated zero
254 DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
255 lea coeffq, [ coeffq+ncoeffq*2]
256 lea iscanq, [ iscanq+ncoeffq*2]
257 lea qcoeffq, [ qcoeffq+ncoeffq*2]
258 lea dqcoeffq, [dqcoeffq+ncoeffq*2]
259 neg ncoeffq
260
261 ; get DC and first 15 AC coeffs
262 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
263 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
264 pabsw m6, m9 ; m6 = abs(m9)
265 pabsw m11, m10 ; m11 = abs(m10)
266 pcmpeqw m7, m7
267
268 paddsw m6, m1 ; m6 += round
269 punpckhqdq m1, m1
270 paddsw m11, m1 ; m11 += round
271 pmulhw m8, m6, m2 ; m8 = m6*q>>16
272 punpckhqdq m2, m2
273 pmulhw m13, m11, m2 ; m13 = m11*q>>16
274 psignw m8, m9 ; m8 = reinsert sign
275 psignw m13, m10 ; m13 = reinsert sign
276 mova [qcoeffq+ncoeffq*2+ 0], m8
277 mova [qcoeffq+ncoeffq*2+16], m13
278 %ifidn %1, b_32x32
279 pabsw m8, m8
280 pabsw m13, m13
281 %endif
282 pmullw m8, m3 ; dqc[i] = qc[i] * q
283 punpckhqdq m3, m3
284 pmullw m13, m3 ; dqc[i] = qc[i] * q
285 %ifidn %1, b_32x32
286 psrlw m8, 1
287 psrlw m13, 1
288 psignw m8, m9
289 psignw m13, m10
290 %endif
291 mova [dqcoeffq+ncoeffq*2+ 0], m8
292 mova [dqcoeffq+ncoeffq*2+16], m13
293 pcmpeqw m8, m5 ; m8 = c[i] == 0
294 pcmpeqw m13, m5 ; m13 = c[i] == 0
295 mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
296 mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
297 psubw m6, m7 ; m6 = scan[i] + 1
298 psubw m11, m7 ; m11 = scan[i] + 1
299 pandn m8, m6 ; m8 = max(eob)
300 pandn m13, m11 ; m13 = max(eob)
301 pmaxsw m8, m13
302 add ncoeffq, mmsize
303 jz .accumulate_eob
304
305 .ac_only_loop:
306 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
307 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
308 pabsw m6, m9 ; m6 = abs(m9)
309 pabsw m11, m10 ; m11 = abs(m10)
310 pcmpeqw m7, m7
311 %ifidn %1, b_32x32
312 pmovmskb r6, m7
313 pmovmskb r2, m7
314 or r6, r2
315 jz .skip_iter
316 %endif
317 paddsw m6, m1 ; m6 += round
318 paddsw m11, m1 ; m11 += round
319 pmulhw m14, m6, m2 ; m14 = m6*q>>16
320 pmulhw m13, m11, m2 ; m13 = m11*q>>16
321 psignw m14, m9 ; m14 = reinsert sign
322 psignw m13, m10 ; m13 = reinsert sign
323 mova [qcoeffq+ncoeffq*2+ 0], m14
324 mova [qcoeffq+ncoeffq*2+16], m13
325 %ifidn %1, b_32x32
326 pabsw m14, m14
327 pabsw m13, m13
328 %endif
329 pmullw m14, m3 ; dqc[i] = qc[i] * q
330 pmullw m13, m3 ; dqc[i] = qc[i] * q
331 %ifidn %1, b_32x32
332 psrlw m14, 1
333 psrlw m13, 1
334 psignw m14, m9
335 psignw m13, m10
336 %endif
337 mova [dqcoeffq+ncoeffq*2+ 0], m14
338 mova [dqcoeffq+ncoeffq*2+16], m13
339 pcmpeqw m14, m5 ; m14 = c[i] == 0
340 pcmpeqw m13, m5 ; m13 = c[i] == 0
341 mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
342 mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
343 psubw m6, m7 ; m6 = scan[i] + 1
344 psubw m11, m7 ; m11 = scan[i] + 1
345 pandn m14, m6 ; m14 = max(eob)
346 pandn m13, m11 ; m13 = max(eob)
347 pmaxsw m8, m14
348 pmaxsw m8, m13
349 add ncoeffq, mmsize
350 jl .ac_only_loop
351
352 %ifidn %1, b_32x32
353 jmp .accumulate_eob
354 .skip_iter:
355 mova [qcoeffq+ncoeffq*2+ 0], m5
356 mova [qcoeffq+ncoeffq*2+16], m5
357 mova [dqcoeffq+ncoeffq*2+ 0], m5
358 mova [dqcoeffq+ncoeffq*2+16], m5
359 add ncoeffq, mmsize
360 jl .ac_only_loop
361 %endif
362
363 .accumulate_eob:
364 ; horizontally accumulate/max eobs and write into [eob] memory pointer
365 mov r2, eobmp
366 pshufd m7, m8, 0xe
367 pmaxsw m8, m7
368 pshuflw m7, m8, 0xe
369 pmaxsw m8, m7
370 pshuflw m7, m8, 0x1
371 pmaxsw m8, m7
372 pextrw r6, m8, 0
373 mov [r2], r6
374 RET
375
376 ; skip-block, i.e. just write all zeroes
377 .blank:
378 mov r0, dqcoeffmp
379 movifnidn ncoeffq, ncoeffmp
380 mov r2, qcoeffmp
381 mov r3, eobmp
382 DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
383 lea dqcoeffq, [dqcoeffq+ncoeffq*2]
384 lea qcoeffq, [ qcoeffq+ncoeffq*2]
385 neg ncoeffq
386 pxor m7, m7
387 .blank_loop:
388 mova [dqcoeffq+ncoeffq*2+ 0], m7
389 mova [dqcoeffq+ncoeffq*2+16], m7
390 mova [qcoeffq+ncoeffq*2+ 0], m7
391 mova [qcoeffq+ncoeffq*2+16], m7
392 add ncoeffq, mmsize
393 jl .blank_loop
394 mov word [eobq], 0
395 RET
396 %endmacro
397
398 INIT_XMM ssse3
399 QUANTIZE_FP fp, 7
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_temporal_filter.c ('k') | source/libvpx/vp9/vp9_cx_iface.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698