third_party/libwebp/dsp/lossless_mips32.c - Issue 1546003002: libwebp: update to 0.5.0

Side by Side Diff: third_party/libwebp/dsp/lossless_mips32.c

Issue 1546003002: libwebp: update to 0.5.0 (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: rebase Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 // Copyright 2014 Google Inc. All Rights Reserved.

2 //

3 // Use of this source code is governed by a BSD-style license

4 // that can be found in the COPYING file in the root of the source

5 // tree. An additional intellectual property rights grant can be found

6 // in the file PATENTS. All contributing project authors may

7 // be found in the AUTHORS file in the root of the source tree.

8 // -----------------------------------------------------------------------------

9 //

10 // MIPS version of lossless functions

11 //

12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)

13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com)

14

15 #include "./dsp.h"

16 #include "./lossless.h"

17

18 #if defined(WEBP_USE_MIPS32)

19

20 #include <assert.h>

21 #include <math.h>

22 #include <stdlib.h>

23 #include <string.h>

24

25 #define APPROX_LOG_WITH_CORRECTION_MAX 65536

26 #define APPROX_LOG_MAX 4096

27 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086

28

29 static float FastSLog2Slow(uint32_t v) {

30 assert(v >= LOG_LOOKUP_IDX_MAX);

31 if (v < APPROX_LOG_WITH_CORRECTION_MAX) {

32 uint32_t log_cnt, y, correction;

33 const int c24 = 24;

34 const float v_f = (float)v;

35 uint32_t temp;

36

37 // Xf = 256 = 2^8

38 // log_cnt is index of leading one in upper 24 bits

39 __asm__ volatile(

40 "clz %[log_cnt], %[v] \n\t"

41 "addiu %[y], $zero, 1 \n\t"

42 "subu %[log_cnt], %[c24], %[log_cnt] \n\t"

43 "sllv %[y], %[y], %[log_cnt] \n\t"

44 "srlv %[temp], %[v], %[log_cnt] \n\t"

45 : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),

46 [temp]"=r"(temp)

47 : [c24]"r"(c24), [v]"r"(v)

48 );

49

50 // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256

51 // Xf = floor(Xf) * (1 + (v % y) / v)

52 // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)

53 // The correction factor: log(1 + d) ~ d; for very small d values, so

54 // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v

55 // LOG_2_RECIPROCAL ~ 23/16

56

57 // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)

58 correction = (23 * (v & (y - 1))) >> 4;

59 return v_f * (kLog2Table[temp] + log_cnt) + correction;

60 } else {

61 return (float)(LOG_2_RECIPROCAL * v * log((double)v));

62 }

63 }

64

65 static float FastLog2Slow(uint32_t v) {

66 assert(v >= LOG_LOOKUP_IDX_MAX);

67 if (v < APPROX_LOG_WITH_CORRECTION_MAX) {

68 uint32_t log_cnt, y;

69 const int c24 = 24;

70 double log_2;

71 uint32_t temp;

72

73 __asm__ volatile(

74 "clz %[log_cnt], %[v] \n\t"

75 "addiu %[y], $zero, 1 \n\t"

76 "subu %[log_cnt], %[c24], %[log_cnt] \n\t"

77 "sllv %[y], %[y], %[log_cnt] \n\t"

78 "srlv %[temp], %[v], %[log_cnt] \n\t"

79 : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),

80 [temp]"=r"(temp)

81 : [c24]"r"(c24), [v]"r"(v)

82 );

83

84 log_2 = kLog2Table[temp] + log_cnt;

85 if (v >= APPROX_LOG_MAX) {

86 // Since the division is still expensive, add this correction factor only

87 // for large values of 'v'.

88

89 const uint32_t correction = (23 * (v & (y - 1))) >> 4;

90 log_2 += (double)correction / v;

91 }

92 return (float)log_2;

93 } else {

94 return (float)(LOG_2_RECIPROCAL * log((double)v));

95 }

96 }

97

98 // C version of this function:

99 // int i = 0;

100 // int64_t cost = 0;

101 // const uint32_t* pop = &population[4];

102 // const uint32_t* LoopEnd = &population[length];

103 // while (pop != LoopEnd) {

104 // ++i;

105 // cost += i * *pop;

106 // cost += i * *(pop + 1);

107 // pop += 2;

108 // }

109 // return (double)cost;

110 static double ExtraCost(const uint32_t* const population, int length) {

111 int i, temp0, temp1;

112 const uint32_t* pop = &population[4];

113 const uint32_t* const LoopEnd = &population[length];

114

115 __asm__ volatile(

116 "mult $zero, $zero \n\t"

117 "xor %[i], %[i], %[i] \n\t"

118 "beq %[pop], %[LoopEnd], 2f \n\t"

119 "1: \n\t"

120 "lw %[temp0], 0(%[pop]) \n\t"

121 "lw %[temp1], 4(%[pop]) \n\t"

122 "addiu %[i], %[i], 1 \n\t"

123 "addiu %[pop], %[pop], 8 \n\t"

124 "madd %[i], %[temp0] \n\t"

125 "madd %[i], %[temp1] \n\t"

126 "bne %[pop], %[LoopEnd], 1b \n\t"

127 "2: \n\t"

128 "mfhi %[temp0] \n\t"

129 "mflo %[temp1] \n\t"

130 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),

131 [i]"=&r"(i), [pop]"+r"(pop)

132 : [LoopEnd]"r"(LoopEnd)

133 : "memory", "hi", "lo"

134 );

135

136 return (double)((int64_t)temp0 << 32 \| temp1);

137 }

138

139 // C version of this function:

140 // int i = 0;

141 // int64_t cost = 0;

142 // const uint32_t* pX = &X[4];

143 // const uint32_t* pY = &Y[4];

144 // const uint32_t* LoopEnd = &X[length];

145 // while (pX != LoopEnd) {

146 // const uint32_t xy0 = pX + pY;

147 // const uint32_t xy1 = (pX + 1) + (pY + 1);

148 // ++i;

149 // cost += i * xy0;

150 // cost += i * xy1;

151 // pX += 2;

152 // pY += 2;

153 // }

154 // return (double)cost;

155 static double ExtraCostCombined(const uint32_t* const X,

156 const uint32_t* const Y, int length) {

157 int i, temp0, temp1, temp2, temp3;

158 const uint32_t* pX = &X[4];

159 const uint32_t* pY = &Y[4];

160 const uint32_t* const LoopEnd = &X[length];

161

162 __asm__ volatile(

163 "mult $zero, $zero \n\t"

164 "xor %[i], %[i], %[i] \n\t"

165 "beq %[pX], %[LoopEnd], 2f \n\t"

166 "1: \n\t"

167 "lw %[temp0], 0(%[pX]) \n\t"

168 "lw %[temp1], 0(%[pY]) \n\t"

169 "lw %[temp2], 4(%[pX]) \n\t"

170 "lw %[temp3], 4(%[pY]) \n\t"

171 "addiu %[i], %[i], 1 \n\t"

172 "addu %[temp0], %[temp0], %[temp1] \n\t"

173 "addu %[temp2], %[temp2], %[temp3] \n\t"

174 "addiu %[pX], %[pX], 8 \n\t"

175 "addiu %[pY], %[pY], 8 \n\t"

176 "madd %[i], %[temp0] \n\t"

177 "madd %[i], %[temp2] \n\t"

178 "bne %[pX], %[LoopEnd], 1b \n\t"

179 "2: \n\t"

180 "mfhi %[temp0] \n\t"

181 "mflo %[temp1] \n\t"

182 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),

183 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),

184 [i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)

185 : [LoopEnd]"r"(LoopEnd)

186 : "memory", "hi", "lo"

187 );

188

189 return (double)((int64_t)temp0 << 32 \| temp1);

190 }

191

192 #define HUFFMAN_COST_PASS \

193 __asm__ volatile( \

194 "sll %[temp1], %[temp0], 3 \n\t" \

195 "addiu %[temp3], %[streak], -3 \n\t" \

196 "addu %[temp2], %[pstreaks], %[temp1] \n\t" \

197 "blez %[temp3], 1f \n\t" \

198 "srl %[temp1], %[temp1], 1 \n\t" \

199 "addu %[temp3], %[pcnts], %[temp1] \n\t" \

200 "lw %[temp0], 4(%[temp2]) \n\t" \

201 "lw %[temp1], 0(%[temp3]) \n\t" \

202 "addu %[temp0], %[temp0], %[streak] \n\t" \

203 "addiu %[temp1], %[temp1], 1 \n\t" \

204 "sw %[temp0], 4(%[temp2]) \n\t" \

205 "sw %[temp1], 0(%[temp3]) \n\t" \

206 "b 2f \n\t" \

207 "1: \n\t" \

208 "lw %[temp0], 0(%[temp2]) \n\t" \

209 "addu %[temp0], %[temp0], %[streak] \n\t" \

210 "sw %[temp0], 0(%[temp2]) \n\t" \

211 "2: \n\t" \

212 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), \

213 [temp3]"=&r"(temp3), [temp0]"+r"(temp0) \

214 : [pstreaks]"r"(pstreaks), [pcnts]"r"(pcnts), \

215 [streak]"r"(streak) \

216 : "memory" \

217 );

218

219 // Returns the various RLE counts

220 static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {

221 int i;

222 int streak = 0;

223 VP8LStreaks stats;

224 int* const pstreaks = &stats.streaks[0][0];

225 int* const pcnts = &stats.counts[0];

226 int temp0, temp1, temp2, temp3;

227 memset(&stats, 0, sizeof(stats));

228 for (i = 0; i < length - 1; ++i) {

229 ++streak;

230 if (population[i] == population[i + 1]) {

231 continue;

232 }

233 temp0 = (population[i] != 0);

234 HUFFMAN_COST_PASS

235 streak = 0;

236 }

237 ++streak;

238 temp0 = (population[i] != 0);

239 HUFFMAN_COST_PASS

240

241 return stats;

242 }

243

244 static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,

245 const uint32_t* Y, int length) {

246 int i;

247 int streak = 0;

248 VP8LStreaks stats;

249 int* const pstreaks = &stats.streaks[0][0];

250 int* const pcnts = &stats.counts[0];

251 int temp0, temp1, temp2, temp3;

252 memset(&stats, 0, sizeof(stats));

253 for (i = 0; i < length - 1; ++i) {

254 const uint32_t xy = X[i] + Y[i];

255 const uint32_t xy_next = X[i + 1] + Y[i + 1];

256 ++streak;

257 if (xy == xy_next) {

258 continue;

259 }

260 temp0 = (xy != 0);

261 HUFFMAN_COST_PASS

262 streak = 0;

263 }

264 {

265 const uint32_t xy = X[i] + Y[i];

266 ++streak;

267 temp0 = (xy != 0);

268 HUFFMAN_COST_PASS

269 }

270

271 return stats;

272 }

273

274 #define ASM_START \

275 __asm__ volatile( \

276 ".set push \n\t" \

277 ".set at \n\t" \

278 ".set macro \n\t" \

279 "1: \n\t"

280

281 // P2 = P0 + P1

282 // A..D - offsets

283 // E - temp variable to tell macro

284 // if pointer should be incremented

285 // literal_ and successive histograms could be unaligned

286 // so we must use ulw and usw

287 #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \

288 "ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \

289 "ulw %[temp1], " #B "(%[" #P0 "]) \n\t" \

290 "ulw %[temp2], " #C "(%[" #P0 "]) \n\t" \

291 "ulw %[temp3], " #D "(%[" #P0 "]) \n\t" \

292 "ulw %[temp4], " #A "(%[" #P1 "]) \n\t" \

293 "ulw %[temp5], " #B "(%[" #P1 "]) \n\t" \

294 "ulw %[temp6], " #C "(%[" #P1 "]) \n\t" \

295 "ulw %[temp7], " #D "(%[" #P1 "]) \n\t" \

296 "addu %[temp4], %[temp4], %[temp0] \n\t" \

297 "addu %[temp5], %[temp5], %[temp1] \n\t" \

298 "addu %[temp6], %[temp6], %[temp2] \n\t" \

299 "addu %[temp7], %[temp7], %[temp3] \n\t" \

300 "addiu %[" #P0 "], %[" #P0 "], 16 \n\t" \

301 ".if " #E " == 1 \n\t" \

302 "addiu %[" #P1 "], %[" #P1 "], 16 \n\t" \

303 ".endif \n\t" \

304 "usw %[temp4], " #A "(%[" #P2 "]) \n\t" \

305 "usw %[temp5], " #B "(%[" #P2 "]) \n\t" \

306 "usw %[temp6], " #C "(%[" #P2 "]) \n\t" \

307 "usw %[temp7], " #D "(%[" #P2 "]) \n\t" \

308 "addiu %[" #P2 "], %[" #P2 "], 16 \n\t" \

309 "bne %[" #P0 "], %[LoopEnd], 1b \n\t" \

310 ".set pop \n\t" \

311

312 #define ASM_END_COMMON_0 \

313 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), \

314 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \

315 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), \

316 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), \

317 [pa]"+r"(pa), [pout]"+r"(pout)

318

319 #define ASM_END_COMMON_1 \

320 : [LoopEnd]"r"(LoopEnd) \

321 : "memory", "at" \

322 );

323

324 #define ASM_END_0 \

325 ASM_END_COMMON_0 \

326 , [pb]"+r"(pb) \

327 ASM_END_COMMON_1

328

329 #define ASM_END_1 \

330 ASM_END_COMMON_0 \

331 ASM_END_COMMON_1

332

333 #define ADD_VECTOR(A, B, OUT, SIZE, EXTRA_SIZE) do { \

334 const uint32_t* pa = (const uint32_t*)(A); \

335 const uint32_t* pb = (const uint32_t*)(B); \

336 uint32_t* pout = (uint32_t*)(OUT); \

337 const uint32_t* const LoopEnd = pa + (SIZE); \

338 assert((SIZE) % 4 == 0); \

339 ASM_START \

340 ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout) \

341 ASM_END_0 \

342 if ((EXTRA_SIZE) > 0) { \

343 const int last = (EXTRA_SIZE); \

344 int i; \

345 for (i = 0; i < last; ++i) pout[i] = pa[i] + pb[i]; \

346 } \

347 } while (0)

348

349 #define ADD_VECTOR_EQ(A, OUT, SIZE, EXTRA_SIZE) do { \

350 const uint32_t* pa = (const uint32_t*)(A); \

351 uint32_t* pout = (uint32_t*)(OUT); \

352 const uint32_t* const LoopEnd = pa + (SIZE); \

353 assert((SIZE) % 4 == 0); \

354 ASM_START \

355 ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout) \

356 ASM_END_1 \

357 if ((EXTRA_SIZE) > 0) { \

358 const int last = (EXTRA_SIZE); \

359 int i; \

360 for (i = 0; i < last; ++i) pout[i] += pa[i]; \

361 } \

362 } while (0)

363

364 static void HistogramAdd(const VP8LHistogram* const a,

365 const VP8LHistogram* const b,

366 VP8LHistogram* const out) {

367 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;

368 const int extra_cache_size = VP8LHistogramNumCodes(a->palette_code_bits_)

369 - (NUM_LITERAL_CODES + NUM_LENGTH_CODES);

370 assert(a->palette_code_bits_ == b->palette_code_bits_);

371

372 if (b != out) {

373 ADD_VECTOR(a->literal_, b->literal_, out->literal_,

374 NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);

375 ADD_VECTOR(a->distance_, b->distance_, out->distance_,

376 NUM_DISTANCE_CODES, 0);

377 ADD_VECTOR(a->red_, b->red_, out->red_, NUM_LITERAL_CODES, 0);

378 ADD_VECTOR(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES, 0);

379 ADD_VECTOR(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);

380 } else {

381 ADD_VECTOR_EQ(a->literal_, out->literal_,

382 NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);

383 ADD_VECTOR_EQ(a->distance_, out->distance_, NUM_DISTANCE_CODES, 0);

384 ADD_VECTOR_EQ(a->red_, out->red_, NUM_LITERAL_CODES, 0);

385 ADD_VECTOR_EQ(a->blue_, out->blue_, NUM_LITERAL_CODES, 0);

386 ADD_VECTOR_EQ(a->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);

387 }

388 }

389

390 #undef ADD_VECTOR_EQ

391 #undef ADD_VECTOR

392 #undef ASM_END_1

393 #undef ASM_END_0

394 #undef ASM_END_COMMON_1

395 #undef ASM_END_COMMON_0

396 #undef ADD_TO_OUT

397 #undef ASM_START

398

399 #endif // WEBP_USE_MIPS32

400

401 //------------------------------------------------------------------------------

402 // Entry point

403

404 extern void VP8LDspInitMIPS32(void);

405

406 void VP8LDspInitMIPS32(void) {

407 #if defined(WEBP_USE_MIPS32)

408 VP8LFastSLog2Slow = FastSLog2Slow;

409 VP8LFastLog2Slow = FastLog2Slow;

410 VP8LExtraCost = ExtraCost;

411 VP8LExtraCostCombined = ExtraCostCombined;

412 VP8LHuffmanCostCount = HuffmanCostCount;

413 VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;

414 VP8LHistogramAdd = HistogramAdd;

415 #endif // WEBP_USE_MIPS32

416 }

OLD	NEW

« no previous file with comments | « third_party/libwebp/dsp/lossless_enc_sse41.c ('k') | third_party/libwebp/dsp/lossless_mips_dsp_r2.c » ('j') | no next file with comments »