OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2010, Google Inc. |
| 3 * and other copyright owners as documented in JGit's IP log. |
| 4 * |
| 5 * This program and the accompanying materials are made available |
| 6 * under the terms of the Eclipse Distribution License v1.0 which |
| 7 * accompanies this distribution, is reproduced below, and is |
| 8 * available at http://www.eclipse.org/org/documents/edl-v10.php |
| 9 * |
| 10 * All rights reserved. |
| 11 * |
| 12 * Redistribution and use in source and binary forms, with or |
| 13 * without modification, are permitted provided that the following |
| 14 * conditions are met: |
| 15 * |
| 16 * - Redistributions of source code must retain the above copyright |
| 17 * notice, this list of conditions and the following disclaimer. |
| 18 * |
| 19 * - Redistributions in binary form must reproduce the above |
| 20 * copyright notice, this list of conditions and the following |
| 21 * disclaimer in the documentation and/or other materials provided |
| 22 * with the distribution. |
| 23 * |
| 24 * - Neither the name of the Eclipse Foundation, Inc. nor the |
| 25 * names of its contributors may be used to endorse or promote |
| 26 * products derived from this software without specific prior |
| 27 * written permission. |
| 28 * |
| 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
| 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
| 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
| 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 42 */ |
| 43 |
| 44 #include "xinclude.h" |
| 45 #include "xtypes.h" |
| 46 #include "xdiff.h" |
| 47 |
| 48 #define MAX_PTR UINT_MAX |
| 49 #define MAX_CNT UINT_MAX |
| 50 |
| 51 #define LINE_END(n) (line##n + count##n - 1) |
| 52 #define LINE_END_PTR(n) (*line##n + *count##n - 1) |
| 53 |
| 54 struct histindex { |
| 55 struct record { |
| 56 unsigned int ptr, cnt; |
| 57 struct record *next; |
| 58 } **records, /* an occurrence */ |
| 59 **line_map; /* map of line to record chain */ |
| 60 chastore_t rcha; |
| 61 unsigned int *next_ptrs; |
| 62 unsigned int table_bits, |
| 63 records_size, |
| 64 line_map_size; |
| 65 |
| 66 unsigned int max_chain_length, |
| 67 key_shift, |
| 68 ptr_shift; |
| 69 |
| 70 unsigned int cnt, |
| 71 has_common; |
| 72 |
| 73 xdfenv_t *env; |
| 74 xpparam_t const *xpp; |
| 75 }; |
| 76 |
| 77 struct region { |
| 78 unsigned int begin1, end1; |
| 79 unsigned int begin2, end2; |
| 80 }; |
| 81 |
| 82 #define LINE_MAP(i, a) (i->line_map[(a) - i->ptr_shift]) |
| 83 |
| 84 #define NEXT_PTR(index, ptr) \ |
| 85 (index->next_ptrs[(ptr) - index->ptr_shift]) |
| 86 |
| 87 #define CNT(index, ptr) \ |
| 88 ((LINE_MAP(index, ptr))->cnt) |
| 89 |
| 90 #define REC(env, s, l) \ |
| 91 (env->xdf##s.recs[l - 1]) |
| 92 |
| 93 static int cmp_recs(xpparam_t const *xpp, |
| 94 xrecord_t *r1, xrecord_t *r2) |
| 95 { |
| 96 return r1->ha == r2->ha && |
| 97 xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size, |
| 98 xpp->flags); |
| 99 } |
| 100 |
| 101 #define CMP_ENV(xpp, env, s1, l1, s2, l2) \ |
| 102 (cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2))) |
| 103 |
| 104 #define CMP(i, s1, l1, s2, l2) \ |
| 105 (cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2))) |
| 106 |
| 107 #define TABLE_HASH(index, side, line) \ |
| 108 XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) |
| 109 |
| 110 static int scanA(struct histindex *index, int line1, int count1) |
| 111 { |
| 112 unsigned int ptr, tbl_idx; |
| 113 unsigned int chain_len; |
| 114 struct record **rec_chain, *rec; |
| 115 |
| 116 for (ptr = LINE_END(1); line1 <= ptr; ptr--) { |
| 117 tbl_idx = TABLE_HASH(index, 1, ptr); |
| 118 rec_chain = index->records + tbl_idx; |
| 119 rec = *rec_chain; |
| 120 |
| 121 chain_len = 0; |
| 122 while (rec) { |
| 123 if (CMP(index, 1, rec->ptr, 1, ptr)) { |
| 124 /* |
| 125 * ptr is identical to another element. Insert |
| 126 * it onto the front of the existing element |
| 127 * chain. |
| 128 */ |
| 129 NEXT_PTR(index, ptr) = rec->ptr; |
| 130 rec->ptr = ptr; |
| 131 /* cap rec->cnt at MAX_CNT */ |
| 132 rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1); |
| 133 LINE_MAP(index, ptr) = rec; |
| 134 goto continue_scan; |
| 135 } |
| 136 |
| 137 rec = rec->next; |
| 138 chain_len++; |
| 139 } |
| 140 |
| 141 if (chain_len == index->max_chain_length) |
| 142 return -1; |
| 143 |
| 144 /* |
| 145 * This is the first time we have ever seen this particular |
| 146 * element in the sequence. Construct a new chain for it. |
| 147 */ |
| 148 if (!(rec = xdl_cha_alloc(&index->rcha))) |
| 149 return -1; |
| 150 rec->ptr = ptr; |
| 151 rec->cnt = 1; |
| 152 rec->next = *rec_chain; |
| 153 *rec_chain = rec; |
| 154 LINE_MAP(index, ptr) = rec; |
| 155 |
| 156 continue_scan: |
| 157 ; /* no op */ |
| 158 } |
| 159 |
| 160 return 0; |
| 161 } |
| 162 |
| 163 static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, |
| 164 int line1, int count1, int line2, int count2) |
| 165 { |
| 166 unsigned int b_next = b_ptr + 1; |
| 167 struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)]; |
| 168 unsigned int as, ae, bs, be, np, rc; |
| 169 int should_break; |
| 170 |
| 171 for (; rec; rec = rec->next) { |
| 172 if (rec->cnt > index->cnt) { |
| 173 if (!index->has_common) |
| 174 index->has_common = CMP(index, 1, rec->ptr, 2, b
_ptr); |
| 175 continue; |
| 176 } |
| 177 |
| 178 as = rec->ptr; |
| 179 if (!CMP(index, 1, as, 2, b_ptr)) |
| 180 continue; |
| 181 |
| 182 index->has_common = 1; |
| 183 for (;;) { |
| 184 should_break = 0; |
| 185 np = NEXT_PTR(index, as); |
| 186 bs = b_ptr; |
| 187 ae = as; |
| 188 be = bs; |
| 189 rc = rec->cnt; |
| 190 |
| 191 while (line1 < as && line2 < bs |
| 192 && CMP(index, 1, as - 1, 2, bs - 1)) { |
| 193 as--; |
| 194 bs--; |
| 195 if (1 < rc) |
| 196 rc = XDL_MIN(rc, CNT(index, as)); |
| 197 } |
| 198 while (ae < LINE_END(1) && be < LINE_END(2) |
| 199 && CMP(index, 1, ae + 1, 2, be + 1)) { |
| 200 ae++; |
| 201 be++; |
| 202 if (1 < rc) |
| 203 rc = XDL_MIN(rc, CNT(index, ae)); |
| 204 } |
| 205 |
| 206 if (b_next <= be) |
| 207 b_next = be + 1; |
| 208 if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt
) { |
| 209 lcs->begin1 = as; |
| 210 lcs->begin2 = bs; |
| 211 lcs->end1 = ae; |
| 212 lcs->end2 = be; |
| 213 index->cnt = rc; |
| 214 } |
| 215 |
| 216 if (np == 0) |
| 217 break; |
| 218 |
| 219 while (np <= ae) { |
| 220 np = NEXT_PTR(index, np); |
| 221 if (np == 0) { |
| 222 should_break = 1; |
| 223 break; |
| 224 } |
| 225 } |
| 226 |
| 227 if (should_break) |
| 228 break; |
| 229 |
| 230 as = np; |
| 231 } |
| 232 } |
| 233 return b_next; |
| 234 } |
| 235 |
| 236 static int find_lcs(struct histindex *index, struct region *lcs, |
| 237 int line1, int count1, int line2, int count2) { |
| 238 int b_ptr; |
| 239 |
| 240 if (scanA(index, line1, count1)) |
| 241 return -1; |
| 242 |
| 243 index->cnt = index->max_chain_length + 1; |
| 244 |
| 245 for (b_ptr = line2; b_ptr <= LINE_END(2); ) |
| 246 b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2)
; |
| 247 |
| 248 return index->has_common && index->max_chain_length < index->cnt; |
| 249 } |
| 250 |
| 251 static int fall_back_to_classic_diff(struct histindex *index, |
| 252 int line1, int count1, int line2, int count2) |
| 253 { |
| 254 xpparam_t xpp; |
| 255 xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; |
| 256 |
| 257 return xdl_fall_back_diff(index->env, &xpp, |
| 258 line1, count1, line2, count2); |
| 259 } |
| 260 |
| 261 static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, |
| 262 int line1, int count1, int line2, int count2) |
| 263 { |
| 264 struct histindex index; |
| 265 struct region lcs; |
| 266 int sz; |
| 267 int result = -1; |
| 268 |
| 269 if (count1 <= 0 && count2 <= 0) |
| 270 return 0; |
| 271 |
| 272 if (LINE_END(1) >= MAX_PTR) |
| 273 return -1; |
| 274 |
| 275 if (!count1) { |
| 276 while(count2--) |
| 277 env->xdf2.rchg[line2++ - 1] = 1; |
| 278 return 0; |
| 279 } else if (!count2) { |
| 280 while(count1--) |
| 281 env->xdf1.rchg[line1++ - 1] = 1; |
| 282 return 0; |
| 283 } |
| 284 |
| 285 memset(&index, 0, sizeof(index)); |
| 286 |
| 287 index.env = env; |
| 288 index.xpp = xpp; |
| 289 |
| 290 index.records = NULL; |
| 291 index.line_map = NULL; |
| 292 /* in case of early xdl_cha_free() */ |
| 293 index.rcha.head = NULL; |
| 294 |
| 295 index.table_bits = xdl_hashbits(count1); |
| 296 sz = index.records_size = 1 << index.table_bits; |
| 297 sz *= sizeof(struct record *); |
| 298 if (!(index.records = (struct record **) xdl_malloc(sz))) |
| 299 goto cleanup; |
| 300 memset(index.records, 0, sz); |
| 301 |
| 302 sz = index.line_map_size = count1; |
| 303 sz *= sizeof(struct record *); |
| 304 if (!(index.line_map = (struct record **) xdl_malloc(sz))) |
| 305 goto cleanup; |
| 306 memset(index.line_map, 0, sz); |
| 307 |
| 308 sz = index.line_map_size; |
| 309 sz *= sizeof(unsigned int); |
| 310 if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz))) |
| 311 goto cleanup; |
| 312 memset(index.next_ptrs, 0, sz); |
| 313 |
| 314 /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */ |
| 315 if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0
) |
| 316 goto cleanup; |
| 317 |
| 318 index.ptr_shift = line1; |
| 319 index.max_chain_length = 64; |
| 320 |
| 321 memset(&lcs, 0, sizeof(lcs)); |
| 322 if (find_lcs(&index, &lcs, line1, count1, line2, count2)) |
| 323 result = fall_back_to_classic_diff(&index, line1, count1, line2,
count2); |
| 324 else { |
| 325 if (lcs.begin1 == 0 && lcs.begin2 == 0) { |
| 326 while (count1--) |
| 327 env->xdf1.rchg[line1++ - 1] = 1; |
| 328 while (count2--) |
| 329 env->xdf2.rchg[line2++ - 1] = 1; |
| 330 result = 0; |
| 331 } else { |
| 332 result = histogram_diff(xpp, env, |
| 333 line1, lcs.begin1 - line1, |
| 334 line2, lcs.begin2 - line2); |
| 335 if (result) |
| 336 goto cleanup; |
| 337 result = histogram_diff(xpp, env, |
| 338 lcs.end1 + 1, LINE_END(1) - lcs.
end1, |
| 339 lcs.end2 + 1, LINE_END(2) - lcs.
end2); |
| 340 if (result) |
| 341 goto cleanup; |
| 342 } |
| 343 } |
| 344 |
| 345 cleanup: |
| 346 xdl_free(index.records); |
| 347 xdl_free(index.line_map); |
| 348 xdl_free(index.next_ptrs); |
| 349 xdl_cha_free(&index.rcha); |
| 350 |
| 351 return result; |
| 352 } |
| 353 |
| 354 int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2, |
| 355 xpparam_t const *xpp, xdfenv_t *env) |
| 356 { |
| 357 if (xdl_prepare_env(file1, file2, xpp, env) < 0) |
| 358 return -1; |
| 359 |
| 360 return histogram_diff(xpp, env, |
| 361 env->xdf1.dstart + 1, env->xdf1.dend - env->xdf1.dstart + 1, |
| 362 env->xdf2.dstart + 1, env->xdf2.dend - env->xdf2.dstart + 1); |
| 363 } |
OLD | NEW |