OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ============================================================================ |
| 3 Name : vp9_maskingmv.c |
| 4 Author : jimbankoski |
| 5 Version : |
| 6 Copyright : Your copyright notice |
| 7 Description : Hello World in C, Ansi-style |
| 8 ============================================================================ |
| 9 */ |
| 10 |
| 11 #include <stdio.h> |
| 12 #include <stdlib.h> |
| 13 #include <string.h> |
| 14 extern unsigned int vp9_sad16x16_sse3( |
| 15 unsigned char *src_ptr, |
| 16 int src_stride, |
| 17 unsigned char *ref_ptr, |
| 18 int ref_stride, |
| 19 int max_err); |
| 20 |
| 21 extern void vp9_sad16x16x3_sse3( |
| 22 unsigned char *src_ptr, |
| 23 int src_stride, |
| 24 unsigned char *ref_ptr, |
| 25 int ref_stride, |
| 26 int *results); |
| 27 |
| 28 extern int vp8_growmaskmb_sse3( |
| 29 unsigned char *om, |
| 30 unsigned char *nm); |
| 31 |
| 32 extern void vp8_makemask_sse3( |
| 33 unsigned char *y, |
| 34 unsigned char *u, |
| 35 unsigned char *v, |
| 36 unsigned char *ym, |
| 37 int yp, |
| 38 int uvp, |
| 39 int ys, |
| 40 int us, |
| 41 int vs, |
| 42 int yt, |
| 43 int ut, |
| 44 int vt); |
| 45 |
| 46 unsigned int vp9_sad16x16_unmasked_wmt( |
| 47 unsigned char *src_ptr, |
| 48 int src_stride, |
| 49 unsigned char *ref_ptr, |
| 50 int ref_stride, |
| 51 unsigned char *mask); |
| 52 |
| 53 unsigned int vp9_sad16x16_masked_wmt( |
| 54 unsigned char *src_ptr, |
| 55 int src_stride, |
| 56 unsigned char *ref_ptr, |
| 57 int ref_stride, |
| 58 unsigned char *mask); |
| 59 |
| 60 unsigned int vp8_masked_predictor_wmt( |
| 61 unsigned char *masked, |
| 62 unsigned char *unmasked, |
| 63 int src_stride, |
| 64 unsigned char *dst_ptr, |
| 65 int dst_stride, |
| 66 unsigned char *mask); |
| 67 unsigned int vp8_masked_predictor_uv_wmt( |
| 68 unsigned char *masked, |
| 69 unsigned char *unmasked, |
| 70 int src_stride, |
| 71 unsigned char *dst_ptr, |
| 72 int dst_stride, |
| 73 unsigned char *mask); |
| 74 unsigned int vp8_uv_from_y_mask( |
| 75 unsigned char *ymask, |
| 76 unsigned char *uvmask); |
| 77 int yp = 16; |
| 78 unsigned char sxy[] = { |
| 79 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 80 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 81 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 82 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 83 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 84 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 85 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 86 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 87 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 88 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 89 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 90 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 91 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 92 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 93 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90, |
| 94 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 80, 120, 120,
90, 90, 90, 90, 90, 80, 120, 120, 90, 90, 90, 90, 90 |
| 95 }; |
| 96 |
| 97 unsigned char sts[] = { |
| 98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 102 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 103 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 104 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 109 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 110 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 111 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 112 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 114 }; |
| 115 unsigned char str[] = { |
| 116 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 117 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 118 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 119 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 120 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 121 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 122 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 123 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 127 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 131 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 |
| 132 }; |
| 133 |
| 134 unsigned char y[] = { |
| 135 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, |
| 136 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, |
| 137 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, |
| 138 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, |
| 139 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, |
| 140 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, |
| 141 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, |
| 142 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, |
| 143 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, |
| 144 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, |
| 145 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, |
| 146 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, 40, |
| 147 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, |
| 148 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, |
| 149 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40, |
| 150 40, 40, 40, 60, 60, 60, 60, 40, 40, 40, 40, 60, 60, 60, 60, 40 |
| 151 }; |
| 152 int uvp = 8; |
| 153 unsigned char u[] = { |
| 154 90, 80, 70, 70, 90, 90, 90, 17, |
| 155 90, 80, 70, 70, 90, 90, 90, 17, |
| 156 84, 70, 70, 90, 90, 90, 17, 17, |
| 157 84, 70, 70, 90, 90, 90, 17, 17, |
| 158 80, 70, 70, 90, 90, 90, 17, 17, |
| 159 90, 80, 70, 70, 90, 90, 90, 17, |
| 160 90, 80, 70, 70, 90, 90, 90, 17, |
| 161 90, 80, 70, 70, 90, 90, 90, 17 |
| 162 }; |
| 163 |
| 164 unsigned char v[] = { |
| 165 80, 80, 80, 80, 80, 80, 80, 80, |
| 166 80, 80, 80, 80, 80, 80, 80, 80, |
| 167 80, 80, 80, 80, 80, 80, 80, 80, |
| 168 80, 80, 80, 80, 80, 80, 80, 80, |
| 169 80, 80, 80, 80, 80, 80, 80, 80, |
| 170 80, 80, 80, 80, 80, 80, 80, 80, |
| 171 80, 80, 80, 80, 80, 80, 80, 80, |
| 172 80, 80, 80, 80, 80, 80, 80, 80 |
| 173 }; |
| 174 |
| 175 unsigned char ym[256]; |
| 176 unsigned char uvm[64]; |
| 177 typedef struct { |
| 178 unsigned char y; |
| 179 unsigned char yt; |
| 180 unsigned char u; |
| 181 unsigned char ut; |
| 182 unsigned char v; |
| 183 unsigned char vt; |
| 184 unsigned char use; |
| 185 } COLOR_SEG_ELEMENT; |
| 186 |
| 187 /* |
| 188 COLOR_SEG_ELEMENT segmentation[]= |
| 189 { |
| 190 { 60,4,80,17,80,10, 1}, |
| 191 { 40,4,15,10,80,10, 1}, |
| 192 }; |
| 193 */ |
| 194 |
| 195 COLOR_SEG_ELEMENT segmentation[] = { |
| 196 { 79, 44, 92, 44, 237, 60, 1}, |
| 197 }; |
| 198 |
| 199 unsigned char pixel_mask(unsigned char y, unsigned char u, unsigned char v, |
| 200 COLOR_SEG_ELEMENT sgm[], |
| 201 int c) { |
| 202 COLOR_SEG_ELEMENT *s = sgm; |
| 203 unsigned char m = 0; |
| 204 int i; |
| 205 for (i = 0; i < c; i++, s++) |
| 206 m |= (abs(y - s->y) < s->yt && |
| 207 abs(u - s->u) < s->ut && |
| 208 abs(v - s->v) < s->vt ? 255 : 0); |
| 209 |
| 210 return m; |
| 211 } |
| 212 int neighbors[256][8]; |
| 213 int makeneighbors(void) { |
| 214 int i, j; |
| 215 for (i = 0; i < 256; i++) { |
| 216 int r = (i >> 4), c = (i & 15); |
| 217 int ni = 0; |
| 218 for (j = 0; j < 8; j++) |
| 219 neighbors[i][j] = i; |
| 220 for (j = 0; j < 256; j++) { |
| 221 int nr = (j >> 4), nc = (j & 15); |
| 222 if (abs(nr - r) < 2 && abs(nc - c) < 2) |
| 223 neighbors[i][ni++] = j; |
| 224 } |
| 225 } |
| 226 return 0; |
| 227 } |
| 228 void grow_ymask(unsigned char *ym) { |
| 229 unsigned char nym[256]; |
| 230 int i, j; |
| 231 |
| 232 for (i = 0; i < 256; i++) { |
| 233 nym[i] = ym[i]; |
| 234 for (j = 0; j < 8; j++) { |
| 235 nym[i] |= ym[neighbors[i][j]]; |
| 236 } |
| 237 } |
| 238 for (i = 0; i < 256; i++) |
| 239 ym[i] = nym[i]; |
| 240 } |
| 241 void make_mb_mask(unsigned char *y, unsigned char *u, unsigned char *v, |
| 242 unsigned char *ym, unsigned char *uvm, |
| 243 int yp, int uvp, |
| 244 COLOR_SEG_ELEMENT sgm[], |
| 245 int count) { |
| 246 int r, c; |
| 247 unsigned char *oym = ym; |
| 248 |
| 249 memset(ym, 20, 256); |
| 250 for (r = 0; r < 8; r++, uvm += 8, u += uvp, v += uvp, y += (yp + yp), ym += 32
) |
| 251 for (c = 0; c < 8; c++) { |
| 252 int y1 = y[c << 1]; |
| 253 int u1 = u[c]; |
| 254 int v1 = v[c]; |
| 255 int m = pixel_mask(y1, u1, v1, sgm, count); |
| 256 uvm[c] = m; |
| 257 ym[c << 1] = uvm[c]; // = pixel_mask(y[c<<1],u[c],v[c],sgm,count); |
| 258 ym[(c << 1) + 1] = pixel_mask(y[1 + (c << 1)], u[c], v[c], sgm, count); |
| 259 ym[(c << 1) + 16] = pixel_mask(y[yp + (c << 1)], u[c], v[c], sgm, count); |
| 260 ym[(c << 1) + 17] = pixel_mask(y[1 + yp + (c << 1)], u[c], v[c], sgm, coun
t); |
| 261 } |
| 262 grow_ymask(oym); |
| 263 } |
| 264 |
| 265 int masked_sad(unsigned char *src, int p, unsigned char *dst, int dp, |
| 266 unsigned char *ym) { |
| 267 int i, j; |
| 268 unsigned sad = 0; |
| 269 for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16) |
| 270 for (j = 0; j < 16; j++) |
| 271 if (ym[j]) |
| 272 sad += abs(src[j] - dst[j]); |
| 273 |
| 274 return sad; |
| 275 } |
| 276 |
| 277 int compare_masks(unsigned char *sym, unsigned char *ym) { |
| 278 int i, j; |
| 279 unsigned sad = 0; |
| 280 for (i = 0; i < 16; i++, sym += 16, ym += 16) |
| 281 for (j = 0; j < 16; j++) |
| 282 sad += (sym[j] != ym[j] ? 1 : 0); |
| 283 |
| 284 return sad; |
| 285 } |
| 286 int unmasked_sad(unsigned char *src, int p, unsigned char *dst, int dp, |
| 287 unsigned char *ym) { |
| 288 int i, j; |
| 289 unsigned sad = 0; |
| 290 for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16) |
| 291 for (j = 0; j < 16; j++) |
| 292 if (!ym[j]) |
| 293 sad += abs(src[j] - dst[j]); |
| 294 |
| 295 return sad; |
| 296 } |
| 297 int masked_motion_search(unsigned char *y, unsigned char *u, unsigned char *v, |
| 298 int yp, int uvp, |
| 299 unsigned char *dy, unsigned char *du, unsigned char *dv
, |
| 300 int dyp, int duvp, |
| 301 COLOR_SEG_ELEMENT sgm[], |
| 302 int count, |
| 303 int *mi, |
| 304 int *mj, |
| 305 int *ui, |
| 306 int *uj, |
| 307 int *wm) { |
| 308 int i, j; |
| 309 |
| 310 unsigned char ym[256]; |
| 311 unsigned char uvm[64]; |
| 312 unsigned char dym[256]; |
| 313 unsigned char duvm[64]; |
| 314 unsigned int e = 0; |
| 315 int beste = 256; |
| 316 int bmi = -32, bmj = -32; |
| 317 int bui = -32, buj = -32; |
| 318 int beste1 = 256; |
| 319 int bmi1 = -32, bmj1 = -32; |
| 320 int bui1 = -32, buj1 = -32; |
| 321 int obeste; |
| 322 |
| 323 // first try finding best mask and then unmasked |
| 324 beste = 0xffffffff; |
| 325 |
| 326 // find best unmasked mv |
| 327 for (i = -32; i < 32; i++) { |
| 328 unsigned char *dyz = i * dyp + dy; |
| 329 unsigned char *duz = i / 2 * duvp + du; |
| 330 unsigned char *dvz = i / 2 * duvp + dv; |
| 331 for (j = -32; j < 32; j++) { |
| 332 // 0,0 masked destination |
| 333 make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm,
count); |
| 334 |
| 335 e = unmasked_sad(y, yp, dyz + j, dyp, dym); |
| 336 |
| 337 if (e < beste) { |
| 338 bui = i; |
| 339 buj = j; |
| 340 beste = e; |
| 341 } |
| 342 } |
| 343 } |
| 344 // bui=0;buj=0; |
| 345 // best mv masked destination |
| 346 make_mb_mask(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bui / 2
* duvp + buj / 2, |
| 347 dym, duvm, dyp, duvp, sgm, count); |
| 348 |
| 349 obeste = beste; |
| 350 beste = 0xffffffff; |
| 351 |
| 352 // find best masked |
| 353 for (i = -32; i < 32; i++) { |
| 354 unsigned char *dyz = i * dyp + dy; |
| 355 for (j = -32; j < 32; j++) { |
| 356 e = masked_sad(y, yp, dyz + j, dyp, dym); |
| 357 |
| 358 if (e < beste) { |
| 359 bmi = i; |
| 360 bmj = j; |
| 361 beste = e; |
| 362 } |
| 363 } |
| 364 } |
| 365 beste1 = beste + obeste; |
| 366 bmi1 = bmi; |
| 367 bmj1 = bmj; |
| 368 bui1 = bui; |
| 369 buj1 = buj; |
| 370 |
| 371 beste = 0xffffffff; |
| 372 // source mask |
| 373 make_mb_mask(y, u, v, ym, uvm, yp, uvp, sgm, count); |
| 374 |
| 375 // find best mask |
| 376 for (i = -32; i < 32; i++) { |
| 377 unsigned char *dyz = i * dyp + dy; |
| 378 unsigned char *duz = i / 2 * duvp + du; |
| 379 unsigned char *dvz = i / 2 * duvp + dv; |
| 380 for (j = -32; j < 32; j++) { |
| 381 // 0,0 masked destination |
| 382 make_mb_mask(dyz + j, duz + j / 2, dvz + j / 2, dym, duvm, dyp, duvp, sgm,
count); |
| 383 |
| 384 e = compare_masks(ym, dym); |
| 385 |
| 386 if (e < beste) { |
| 387 bmi = i; |
| 388 bmj = j; |
| 389 beste = e; |
| 390 } |
| 391 } |
| 392 } |
| 393 |
| 394 |
| 395 // best mv masked destination |
| 396 make_mb_mask(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bmi / 2
* duvp + bmj / 2, |
| 397 dym, duvm, dyp, duvp, sgm, count); |
| 398 |
| 399 obeste = masked_sad(y, yp, dy + bmi * dyp + bmj, dyp, dym); |
| 400 |
| 401 beste = 0xffffffff; |
| 402 |
| 403 // find best unmasked mv |
| 404 for (i = -32; i < 32; i++) { |
| 405 unsigned char *dyz = i * dyp + dy; |
| 406 for (j = -32; j < 32; j++) { |
| 407 e = unmasked_sad(y, yp, dyz + j, dyp, dym); |
| 408 |
| 409 if (e < beste) { |
| 410 bui = i; |
| 411 buj = j; |
| 412 beste = e; |
| 413 } |
| 414 } |
| 415 } |
| 416 beste += obeste; |
| 417 |
| 418 |
| 419 if (beste < beste1) { |
| 420 *mi = bmi; |
| 421 *mj = bmj; |
| 422 *ui = bui; |
| 423 *uj = buj; |
| 424 *wm = 1; |
| 425 } else { |
| 426 *mi = bmi1; |
| 427 *mj = bmj1; |
| 428 *ui = bui1; |
| 429 *uj = buj1; |
| 430 *wm = 0; |
| 431 |
| 432 } |
| 433 return 0; |
| 434 } |
| 435 |
| 436 int predict(unsigned char *src, int p, unsigned char *dst, int dp, |
| 437 unsigned char *ym, unsigned char *prd) { |
| 438 int i, j; |
| 439 for (i = 0; i < 16; i++, src += p, dst += dp, ym += 16, prd += 16) |
| 440 for (j = 0; j < 16; j++) |
| 441 prd[j] = (ym[j] ? src[j] : dst[j]); |
| 442 return 0; |
| 443 } |
| 444 |
| 445 int fast_masked_motion_search(unsigned char *y, unsigned char *u, unsigned char
*v, |
| 446 int yp, int uvp, |
| 447 unsigned char *dy, unsigned char *du, unsigned cha
r *dv, |
| 448 int dyp, int duvp, |
| 449 COLOR_SEG_ELEMENT sgm[], |
| 450 int count, |
| 451 int *mi, |
| 452 int *mj, |
| 453 int *ui, |
| 454 int *uj, |
| 455 int *wm) { |
| 456 int i, j; |
| 457 |
| 458 unsigned char ym[256]; |
| 459 unsigned char ym2[256]; |
| 460 unsigned char uvm[64]; |
| 461 unsigned char dym2[256]; |
| 462 unsigned char dym[256]; |
| 463 unsigned char duvm[64]; |
| 464 unsigned int e = 0; |
| 465 int beste = 256; |
| 466 int bmi = -32, bmj = -32; |
| 467 int bui = -32, buj = -32; |
| 468 int beste1 = 256; |
| 469 int bmi1 = -32, bmj1 = -32; |
| 470 int bui1 = -32, buj1 = -32; |
| 471 int obeste; |
| 472 |
| 473 // first try finding best mask and then unmasked |
| 474 beste = 0xffffffff; |
| 475 |
| 476 #if 0 |
| 477 for (i = 0; i < 16; i++) { |
| 478 unsigned char *dy = i * yp + y; |
| 479 for (j = 0; j < 16; j++) |
| 480 printf("%2x", dy[j]); |
| 481 printf("\n"); |
| 482 } |
| 483 printf("\n"); |
| 484 |
| 485 for (i = -32; i < 48; i++) { |
| 486 unsigned char *dyz = i * dyp + dy; |
| 487 for (j = -32; j < 48; j++) |
| 488 printf("%2x", dyz[j]); |
| 489 printf("\n"); |
| 490 } |
| 491 #endif |
| 492 |
| 493 // find best unmasked mv |
| 494 for (i = -32; i < 32; i++) { |
| 495 unsigned char *dyz = i * dyp + dy; |
| 496 unsigned char *duz = i / 2 * duvp + du; |
| 497 unsigned char *dvz = i / 2 * duvp + dv; |
| 498 for (j = -32; j < 32; j++) { |
| 499 // 0,0 masked destination |
| 500 vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp, |
| 501 sgm[0].y, sgm[0].u, sgm[0].v, |
| 502 sgm[0].yt, sgm[0].ut, sgm[0].vt); |
| 503 |
| 504 vp8_growmaskmb_sse3(dym, dym2); |
| 505 |
| 506 e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2); |
| 507 |
| 508 if (e < beste) { |
| 509 bui = i; |
| 510 buj = j; |
| 511 beste = e; |
| 512 } |
| 513 } |
| 514 } |
| 515 // bui=0;buj=0; |
| 516 // best mv masked destination |
| 517 |
| 518 vp8_makemask_sse3(dy + bui * dyp + buj, du + bui / 2 * duvp + buj / 2, dv + bu
i / 2 * duvp + buj / 2, |
| 519 dym, dyp, duvp, |
| 520 sgm[0].y, sgm[0].u, sgm[0].v, |
| 521 sgm[0].yt, sgm[0].ut, sgm[0].vt); |
| 522 |
| 523 vp8_growmaskmb_sse3(dym, dym2); |
| 524 |
| 525 obeste = beste; |
| 526 beste = 0xffffffff; |
| 527 |
| 528 // find best masked |
| 529 for (i = -32; i < 32; i++) { |
| 530 unsigned char *dyz = i * dyp + dy; |
| 531 for (j = -32; j < 32; j++) { |
| 532 e = vp9_sad16x16_masked_wmt(y, yp, dyz + j, dyp, dym2); |
| 533 if (e < beste) { |
| 534 bmi = i; |
| 535 bmj = j; |
| 536 beste = e; |
| 537 } |
| 538 } |
| 539 } |
| 540 beste1 = beste + obeste; |
| 541 bmi1 = bmi; |
| 542 bmj1 = bmj; |
| 543 bui1 = bui; |
| 544 buj1 = buj; |
| 545 |
| 546 // source mask |
| 547 vp8_makemask_sse3(y, u, v, |
| 548 ym, yp, uvp, |
| 549 sgm[0].y, sgm[0].u, sgm[0].v, |
| 550 sgm[0].yt, sgm[0].ut, sgm[0].vt); |
| 551 |
| 552 vp8_growmaskmb_sse3(ym, ym2); |
| 553 |
| 554 // find best mask |
| 555 for (i = -32; i < 32; i++) { |
| 556 unsigned char *dyz = i * dyp + dy; |
| 557 unsigned char *duz = i / 2 * duvp + du; |
| 558 unsigned char *dvz = i / 2 * duvp + dv; |
| 559 for (j = -32; j < 32; j++) { |
| 560 // 0,0 masked destination |
| 561 vp8_makemask_sse3(dyz + j, duz + j / 2, dvz + j / 2, dym, dyp, duvp, |
| 562 sgm[0].y, sgm[0].u, sgm[0].v, |
| 563 sgm[0].yt, sgm[0].ut, sgm[0].vt); |
| 564 |
| 565 vp8_growmaskmb_sse3(dym, dym2); |
| 566 |
| 567 e = compare_masks(ym2, dym2); |
| 568 |
| 569 if (e < beste) { |
| 570 bmi = i; |
| 571 bmj = j; |
| 572 beste = e; |
| 573 } |
| 574 } |
| 575 } |
| 576 |
| 577 vp8_makemask_sse3(dy + bmi * dyp + bmj, du + bmi / 2 * duvp + bmj / 2, dv + bm
i / 2 * duvp + bmj / 2, |
| 578 dym, dyp, duvp, |
| 579 sgm[0].y, sgm[0].u, sgm[0].v, |
| 580 sgm[0].yt, sgm[0].ut, sgm[0].vt); |
| 581 |
| 582 vp8_growmaskmb_sse3(dym, dym2); |
| 583 |
| 584 obeste = vp9_sad16x16_masked_wmt(y, yp, dy + bmi * dyp + bmj, dyp, dym2); |
| 585 |
| 586 beste = 0xffffffff; |
| 587 |
| 588 // find best unmasked mv |
| 589 for (i = -32; i < 32; i++) { |
| 590 unsigned char *dyz = i * dyp + dy; |
| 591 for (j = -32; j < 32; j++) { |
| 592 e = vp9_sad16x16_unmasked_wmt(y, yp, dyz + j, dyp, dym2); |
| 593 |
| 594 if (e < beste) { |
| 595 bui = i; |
| 596 buj = j; |
| 597 beste = e; |
| 598 } |
| 599 } |
| 600 } |
| 601 beste += obeste; |
| 602 |
| 603 if (beste < beste1) { |
| 604 *mi = bmi; |
| 605 *mj = bmj; |
| 606 *ui = bui; |
| 607 *uj = buj; |
| 608 *wm = 1; |
| 609 } else { |
| 610 *mi = bmi1; |
| 611 *mj = bmj1; |
| 612 *ui = bui1; |
| 613 *uj = buj1; |
| 614 *wm = 0; |
| 615 beste = beste1; |
| 616 |
| 617 } |
| 618 return beste; |
| 619 } |
| 620 |
| 621 int predict_all(unsigned char *ym, unsigned char *um, unsigned char *vm, |
| 622 int ymp, int uvmp, |
| 623 unsigned char *yp, unsigned char *up, unsigned char *vp, |
| 624 int ypp, int uvpp, |
| 625 COLOR_SEG_ELEMENT sgm[], |
| 626 int count, |
| 627 int mi, |
| 628 int mj, |
| 629 int ui, |
| 630 int uj, |
| 631 int wm) { |
| 632 int i, j; |
| 633 unsigned char dym[256]; |
| 634 unsigned char dym2[256]; |
| 635 unsigned char duvm[64]; |
| 636 unsigned char *yu = ym, *uu = um, *vu = vm; |
| 637 |
| 638 unsigned char *dym3 = dym2; |
| 639 |
| 640 ym += mi * ymp + mj; |
| 641 um += mi / 2 * uvmp + mj / 2; |
| 642 vm += mi / 2 * uvmp + mj / 2; |
| 643 |
| 644 yu += ui * ymp + uj; |
| 645 uu += ui / 2 * uvmp + uj / 2; |
| 646 vu += ui / 2 * uvmp + uj / 2; |
| 647 |
| 648 // best mv masked destination |
| 649 if (wm) |
| 650 vp8_makemask_sse3(ym, um, vm, dym, ymp, uvmp, |
| 651 sgm[0].y, sgm[0].u, sgm[0].v, |
| 652 sgm[0].yt, sgm[0].ut, sgm[0].vt); |
| 653 else |
| 654 vp8_makemask_sse3(yu, uu, vu, dym, ymp, uvmp, |
| 655 sgm[0].y, sgm[0].u, sgm[0].v, |
| 656 sgm[0].yt, sgm[0].ut, sgm[0].vt); |
| 657 |
| 658 vp8_growmaskmb_sse3(dym, dym2); |
| 659 vp8_masked_predictor_wmt(ym, yu, ymp, yp, ypp, dym3); |
| 660 vp8_uv_from_y_mask(dym3, duvm); |
| 661 vp8_masked_predictor_uv_wmt(um, uu, uvmp, up, uvpp, duvm); |
| 662 vp8_masked_predictor_uv_wmt(vm, vu, uvmp, vp, uvpp, duvm); |
| 663 |
| 664 return 0; |
| 665 } |
| 666 |
| 667 unsigned char f0p[1280 * 720 * 3 / 2]; |
| 668 unsigned char f1p[1280 * 720 * 3 / 2]; |
| 669 unsigned char prd[1280 * 720 * 3 / 2]; |
| 670 unsigned char msk[1280 * 720 * 3 / 2]; |
| 671 |
| 672 |
| 673 int mainz(int argc, char *argv[]) { |
| 674 |
| 675 FILE *f = fopen(argv[1], "rb"); |
| 676 FILE *g = fopen(argv[2], "wb"); |
| 677 int w = atoi(argv[3]), h = atoi(argv[4]); |
| 678 int y_stride = w, uv_stride = w / 2; |
| 679 int r, c; |
| 680 unsigned char *f0 = f0p, *f1 = f1p, *t; |
| 681 unsigned char ym[256], uvm[64]; |
| 682 unsigned char ym2[256], uvm2[64]; |
| 683 unsigned char ym3[256], uvm3[64]; |
| 684 int a, b; |
| 685 |
| 686 COLOR_SEG_ELEMENT last = { 20, 20, 20, 20, 230, 20, 1}, best; |
| 687 #if 0 |
| 688 makeneighbors(); |
| 689 COLOR_SEG_ELEMENT segmentation[] = { |
| 690 { 60, 4, 80, 17, 80, 10, 1}, |
| 691 { 40, 4, 15, 10, 80, 10, 1}, |
| 692 }; |
| 693 make_mb_mask(y, u, v, ym2, uvm2, 16, 8, segmentation, 1); |
| 694 |
| 695 vp8_makemask_sse3(y, u, v, ym, (int) 16, (int) 8, |
| 696 (int) segmentation[0].y, (int) segmentation[0].u, (int) segm
entation[0].v, |
| 697 segmentation[0].yt, segmentation[0].ut, segmentation[0].vt); |
| 698 |
| 699 vp8_growmaskmb_sse3(ym, ym3); |
| 700 |
| 701 a = vp9_sad16x16_masked_wmt(str, 16, sts, 16, ym3); |
| 702 b = vp9_sad16x16_unmasked_wmt(str, 16, sts, 16, ym3); |
| 703 |
| 704 vp8_masked_predictor_wmt(str, sts, 16, ym, 16, ym3); |
| 705 |
| 706 vp8_uv_from_y_mask(ym3, uvm3); |
| 707 |
| 708 return 4; |
| 709 #endif |
| 710 makeneighbors(); |
| 711 |
| 712 |
| 713 memset(prd, 128, w * h * 3 / 2); |
| 714 |
| 715 fread(f0, w * h * 3 / 2, 1, f); |
| 716 |
| 717 while (!feof(f)) { |
| 718 unsigned char *ys = f1, *yd = f0, *yp = prd; |
| 719 unsigned char *us = f1 + w * h, *ud = f0 + w * h, *up = prd + w * h; |
| 720 unsigned char *vs = f1 + w * h * 5 / 4, *vd = f0 + w * h * 5 / 4, *vp = prd
+ w * h * 5 / 4; |
| 721 fread(f1, w * h * 3 / 2, 1, f); |
| 722 |
| 723 ys += 32 * y_stride; |
| 724 yd += 32 * y_stride; |
| 725 yp += 32 * y_stride; |
| 726 us += 16 * uv_stride; |
| 727 ud += 16 * uv_stride; |
| 728 up += 16 * uv_stride; |
| 729 vs += 16 * uv_stride; |
| 730 vd += 16 * uv_stride; |
| 731 vp += 16 * uv_stride; |
| 732 for (r = 32; r < h - 32; r += 16, |
| 733 ys += 16 * w, yd += 16 * w, yp += 16 * w, |
| 734 us += 8 * uv_stride, ud += 8 * uv_stride, up += 8 * uv_stride, |
| 735 vs += 8 * uv_stride, vd += 8 * uv_stride, vp += 8 * uv_stride) { |
| 736 for (c = 32; c < w - 32; c += 16) { |
| 737 int mi, mj, ui, uj, wm; |
| 738 int bmi, bmj, bui, buj, bwm; |
| 739 unsigned char ym[256]; |
| 740 |
| 741 if (vp9_sad16x16_sse3(ys + c, y_stride, yd + c, y_stride, 0xffff) == 0) |
| 742 bmi = bmj = bui = buj = bwm = 0; |
| 743 else { |
| 744 COLOR_SEG_ELEMENT cs[5]; |
| 745 int j; |
| 746 unsigned int beste = 0xfffffff; |
| 747 unsigned int bestj = 0; |
| 748 |
| 749 // try color from last mb segmentation |
| 750 cs[0] = last; |
| 751 |
| 752 // try color segs from 4 pixels in mb recon as segmentation |
| 753 cs[1].y = yd[c + y_stride + 1]; |
| 754 cs[1].u = ud[c / 2 + uv_stride]; |
| 755 cs[1].v = vd[c / 2 + uv_stride]; |
| 756 cs[1].yt = cs[1].ut = cs[1].vt = 20; |
| 757 cs[2].y = yd[c + w + 14]; |
| 758 cs[2].u = ud[c / 2 + uv_stride + 7]; |
| 759 cs[2].v = vd[c / 2 + uv_stride + 7]; |
| 760 cs[2].yt = cs[2].ut = cs[2].vt = 20; |
| 761 cs[3].y = yd[c + w * 14 + 1]; |
| 762 cs[3].u = ud[c / 2 + uv_stride * 7]; |
| 763 cs[3].v = vd[c / 2 + uv_stride * 7]; |
| 764 cs[3].yt = cs[3].ut = cs[3].vt = 20; |
| 765 cs[4].y = yd[c + w * 14 + 14]; |
| 766 cs[4].u = ud[c / 2 + uv_stride * 7 + 7]; |
| 767 cs[4].v = vd[c / 2 + uv_stride * 7 + 7]; |
| 768 cs[4].yt = cs[4].ut = cs[4].vt = 20; |
| 769 |
| 770 for (j = 0; j < 5; j++) { |
| 771 int e; |
| 772 |
| 773 e = fast_masked_motion_search( |
| 774 ys + c, us + c / 2, vs + c / 2, y_stride, uv_stride, |
| 775 yd + c, ud + c / 2, vd + c / 2, y_stride, uv_stride, |
| 776 &cs[j], 1, &mi, &mj, &ui, &uj, &wm); |
| 777 |
| 778 if (e < beste) { |
| 779 bmi = mi; |
| 780 bmj = mj; |
| 781 bui = ui; |
| 782 buj = uj, bwm = wm; |
| 783 bestj = j; |
| 784 beste = e; |
| 785 } |
| 786 } |
| 787 best = cs[bestj]; |
| 788 // best = segmentation[0]; |
| 789 last = best; |
| 790 } |
| 791 predict_all(yd + c, ud + c / 2, vd + c / 2, w, uv_stride, |
| 792 yp + c, up + c / 2, vp + c / 2, w, uv_stride, |
| 793 &best, 1, bmi, bmj, bui, buj, bwm); |
| 794 |
| 795 } |
| 796 } |
| 797 fwrite(prd, w * h * 3 / 2, 1, g); |
| 798 t = f0; |
| 799 f0 = f1; |
| 800 f1 = t; |
| 801 |
| 802 } |
| 803 fclose(f); |
| 804 fclose(g); |
| 805 return; |
| 806 } |
OLD | NEW |