Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(48)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c

Issue 756673003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
216 do { 216 do {
217 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); 217 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
218 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); 218 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
219 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); 219 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
220 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); 220 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
221 n_coeffs += 8 * 2; 221 n_coeffs += 8 * 2;
222 } while (n_coeffs < 0); 222 } while (n_coeffs < 0);
223 *eob_ptr = 0; 223 *eob_ptr = 0;
224 } 224 }
225 } 225 }
226
227 void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
228 int skip_block, const int16_t* zbin_ptr,
229 const int16_t* round_ptr, const int16_t* quant_ptr,
230 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
231 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
232 int zbin_oq_value, uint16_t* eob_ptr,
233 const int16_t* scan_ptr,
234 const int16_t* iscan_ptr) {
235 __m128i zero;
236 (void)scan_ptr;
237 (void)zbin_ptr;
238 (void)quant_shift_ptr;
239 (void)zbin_oq_value;
240
241 coeff_ptr += n_coeffs;
242 iscan_ptr += n_coeffs;
243 qcoeff_ptr += n_coeffs;
244 dqcoeff_ptr += n_coeffs;
245 n_coeffs = -n_coeffs;
246 zero = _mm_setzero_si128();
247
248 if (!skip_block) {
249 __m128i eob;
250 __m128i round, quant, dequant;
251 {
252 __m128i coeff0, coeff1;
253
254 // Setup global values
255 {
256 round = _mm_load_si128((const __m128i*)round_ptr);
257 quant = _mm_load_si128((const __m128i*)quant_ptr);
258 dequant = _mm_load_si128((const __m128i*)dequant_ptr);
259 }
260
261 {
262 __m128i coeff0_sign, coeff1_sign;
263 __m128i qcoeff0, qcoeff1;
264 __m128i qtmp0, qtmp1;
265 // Do DC and first 15 AC
266 coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
267 coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
268
269 // Poor man's sign extract
270 coeff0_sign = _mm_srai_epi16(coeff0, 15);
271 coeff1_sign = _mm_srai_epi16(coeff1, 15);
272 qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
273 qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
274 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
275 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
276
277 qcoeff0 = _mm_adds_epi16(qcoeff0, round);
278 round = _mm_unpackhi_epi64(round, round);
279 qcoeff1 = _mm_adds_epi16(qcoeff1, round);
280 qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
281 quant = _mm_unpackhi_epi64(quant, quant);
282 qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
283
284 // Reinsert signs
285 qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
286 qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
287 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
288 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
289
290 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
291 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
292
293 coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
294 dequant = _mm_unpackhi_epi64(dequant, dequant);
295 coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
296
297 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
298 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
299 }
300
301 {
302 // Scan for eob
303 __m128i zero_coeff0, zero_coeff1;
304 __m128i nzero_coeff0, nzero_coeff1;
305 __m128i iscan0, iscan1;
306 __m128i eob1;
307 zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
308 zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
309 nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
310 nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
311 iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
312 iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
313 // Add one to convert from indices to counts
314 iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
315 iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
316 eob = _mm_and_si128(iscan0, nzero_coeff0);
317 eob1 = _mm_and_si128(iscan1, nzero_coeff1);
318 eob = _mm_max_epi16(eob, eob1);
319 }
320 n_coeffs += 8 * 2;
321 }
322
323 // AC only loop
324 while (n_coeffs < 0) {
325 __m128i coeff0, coeff1;
326 {
327 __m128i coeff0_sign, coeff1_sign;
328 __m128i qcoeff0, qcoeff1;
329 __m128i qtmp0, qtmp1;
330
331 coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
332 coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
333
334 // Poor man's sign extract
335 coeff0_sign = _mm_srai_epi16(coeff0, 15);
336 coeff1_sign = _mm_srai_epi16(coeff1, 15);
337 qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
338 qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
339 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
340 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
341
342 qcoeff0 = _mm_adds_epi16(qcoeff0, round);
343 qcoeff1 = _mm_adds_epi16(qcoeff1, round);
344 qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
345 qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
346
347 // Reinsert signs
348 qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
349 qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
350 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
351 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
352
353 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
354 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
355
356 coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
357 coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
358
359 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
360 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
361 }
362
363 {
364 // Scan for eob
365 __m128i zero_coeff0, zero_coeff1;
366 __m128i nzero_coeff0, nzero_coeff1;
367 __m128i iscan0, iscan1;
368 __m128i eob0, eob1;
369 zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
370 zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
371 nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
372 nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
373 iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
374 iscan1 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs) + 1);
375 // Add one to convert from indices to counts
376 iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
377 iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
378 eob0 = _mm_and_si128(iscan0, nzero_coeff0);
379 eob1 = _mm_and_si128(iscan1, nzero_coeff1);
380 eob0 = _mm_max_epi16(eob0, eob1);
381 eob = _mm_max_epi16(eob, eob0);
382 }
383 n_coeffs += 8 * 2;
384 }
385
386 // Accumulate EOB
387 {
388 __m128i eob_shuffled;
389 eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
390 eob = _mm_max_epi16(eob, eob_shuffled);
391 eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
392 eob = _mm_max_epi16(eob, eob_shuffled);
393 eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
394 eob = _mm_max_epi16(eob, eob_shuffled);
395 *eob_ptr = _mm_extract_epi16(eob, 1);
396 }
397 } else {
398 do {
399 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
400 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
401 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
402 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
403 n_coeffs += 8 * 2;
404 } while (n_coeffs < 0);
405 *eob_ptr = 0;
406 }
407 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c ('k') | source/libvpx/vp9/vp9_cx_iface.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698