Index: third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.cc |
=================================================================== |
--- third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.cc (revision 0) |
+++ third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.cc (revision 0) |
@@ -0,0 +1,299 @@ |
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.h" |
+#include <string.h> // memset |
+ |
+#include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_logging.h" |
+ |
+ |
+// Take a set of <key, value> pairs and tote them up. |
+// After explicitly sorting, retrieve top key, value pairs |
+Tote::Tote() { |
+ gram_count_ = 0; |
+ incr_count_ = 0; |
+ byte_count_ = 0; |
+ memset(key_, 0, sizeof(key_)); |
+ // No need to initialize values |
+} |
+ |
+Tote::~Tote() { |
+} |
+ |
+void Tote::Reinit() { |
+ gram_count_ = 0; |
+ incr_count_ = 0; |
+ byte_count_ = 0; |
+ memset(key_, 0, sizeof(key_)); |
+ // No need to initialize values |
+} |
+ |
+// Increment count of quadgrams/trigrams/unigrams scored |
+void Tote::AddGram() { |
+ ++gram_count_; |
+} |
+ |
+// Three-way associative, guaranteeing that the largest two counts are always |
+// in the data structure. kMaxSize must be a multiple of 3, and is tied to the |
+// subscript calculations here, which are for 8 sets of 3-way associative |
+// buckets. The subscripts for set N are [N], [N+8], and [N+16] used in a |
+// slightly-weird way: The initial probe point is [N] or [N+8], whichever |
+// is specified by key mod 16. In most cases (nearly *all* cases except Latin |
+// script), this entry matches and we update/return. The second probe is |
+// the other of [N] and [N+8]. The third probe is only used as a fallback to |
+// these two, and is there only for the rare case that there are three or more |
+// languages with Language enum values equal mod 8, contending within the same |
+// bucket. This can only happen in Latin and (rarely) Cyrillic scripts, because |
+// the other scripts have fewer than 17 languages total. |
+// If you change kMaxSize, change the constants 7/8/15/16 below |
+void Tote::Add(uint8 ikey, int idelta) { |
+ DCHECK(ikey != 0); |
+ ++incr_count_; |
+ |
+ // Look for existing entry |
+ int sub0 = ikey & 15; |
+ if (key_[sub0] == ikey) { |
+ value_[sub0] += idelta; |
+ return; |
+ } |
+ int sub1 = sub0 ^ 8; |
+ if (key_[sub1] == ikey) { |
+ value_[sub1] += idelta; |
+ return; |
+ } |
+ int sub2 = (ikey & 7) + 16; |
+ if (key_[sub2] == ikey) { |
+ value_[sub2] += idelta; |
+ return; |
+ } |
+ |
+ // Allocate new entry |
+ int alloc = -1; |
+ if (key_[sub0] == 0) { |
+ alloc = sub0; |
+ } else if (key_[sub1] == 0) { |
+ alloc = sub1; |
+ } else if (key_[sub2] == 0) { |
+ alloc = sub2; |
+ } else { |
+ // All choices allocated, need to replace smallest one |
+ alloc = sub0; |
+ if (value_[sub1] < value_[alloc]) {alloc = sub1;} |
+ if (value_[sub2] < value_[alloc]) {alloc = sub2;} |
+ } |
+ key_[alloc] = ikey; |
+ value_[alloc] = idelta; |
+ return; |
+} |
+ |
+// Return current top key |
+int Tote::CurrentTopKey() { |
+ int top_key = 0; |
+ int top_value = -1; |
+ for (int sub = 0; sub < kMaxSize_; ++sub) { |
+ if (key_[sub] == 0) {continue;} |
+ if (top_value < value_[sub]) { |
+ top_value = value_[sub]; |
+ top_key = key_[sub]; |
+ } |
+ } |
+ return top_key; |
+} |
+ |
+ |
+// Sort first n entries by decreasing order of value |
+// If key==0 other fields are not valid, treat value as -1 |
+void Tote::Sort(int n) { |
+ // This is n**2, but n is small |
+ for (int sub = 0; sub < n; ++sub) { |
+ if (key_[sub] == 0) {value_[sub] = -1;} |
+ |
+ // Bubble sort key[sub] and entry[sub] |
+ for (int sub2 = sub + 1; sub2 < kMaxSize_; ++sub2) { |
+ if (key_[sub2] == 0) {value_[sub2] = -1;} |
+ if (value_[sub] < value_[sub2]) { |
+ // swap |
+ uint8 tmpk = key_[sub]; |
+ key_[sub] = key_[sub2]; |
+ key_[sub2] = tmpk; |
+ int tmpv = value_[sub]; |
+ value_[sub] = value_[sub2]; |
+ value_[sub2] = tmpv; |
+ } |
+ } |
+ } |
+} |
+ |
+void Tote::Dump(FILE* f) { |
+ for (int sub = 0; sub < kMaxSize_; ++sub) { |
+ if (key_[sub] > 0) { |
+ fprintf(f, "[%2d] %3d %8d\n", sub, key_[sub], value_[sub]); |
+ } |
+ } |
+ fprintf(f, "%d %d %d\n", gram_count_, incr_count_, byte_count_); |
+} |
+ |
+ |
+ |
+ |
+// Take a set of <key, value> pairs and tote them up. |
+// After explicitly sorting, retrieve top key, value pairs |
+ToteWithReliability::ToteWithReliability() { |
+ // No need to initialize score_ or value_ |
+ incr_count_ = 0; |
+ sorted_ = 0; |
+ memset(closepair_, 0, sizeof(closepair_)); |
+ memset(key_, 0, sizeof(key_)); |
+} |
+ |
+ToteWithReliability::~ToteWithReliability() { |
+} |
+ |
+void ToteWithReliability::Reinit() { |
+ // No need to initialize score_ or value_ |
+ incr_count_ = 0; |
+ sorted_ = 0; |
+ memset(closepair_, 0, sizeof(closepair_)); |
+ memset(key_, 0, sizeof(key_)); |
+ ////ss_.Init(); |
+} |
+ |
+// Weight reliability by ibytes |
+// Also see three-way associative comments above for Tote |
+void ToteWithReliability::Add(uint8 ikey, int ibytes, |
+ int score, int ireliability) { |
+ DCHECK(ikey != 0); |
+ CHECK(sorted_ == 0); |
+ ++incr_count_; |
+ |
+ // Look for existing entry |
+ int sub0 = ikey & 15; |
+ if (key_[sub0] == ikey) { |
+ value_[sub0] += ibytes; |
+ score_[sub0] += score; |
+ reliability_[sub0] += ireliability * ibytes; |
+ return; |
+ } |
+ int sub1 = sub0 ^ 8; |
+ if (key_[sub1] == ikey) { |
+ value_[sub1] += ibytes; |
+ score_[sub1] += score; |
+ reliability_[sub1] += ireliability * ibytes; |
+ return; |
+ } |
+ int sub2 = (ikey & 7) + 16; |
+ if (key_[sub2] == ikey) { |
+ value_[sub2] += ibytes; |
+ score_[sub2] += score; |
+ reliability_[sub2] += ireliability * ibytes; |
+ return; |
+ } |
+ |
+ // Allocate new entry |
+ int alloc = -1; |
+ if (key_[sub0] == 0) { |
+ alloc = sub0; |
+ } else if (key_[sub1] == 0) { |
+ alloc = sub1; |
+ } else if (key_[sub2] == 0) { |
+ alloc = sub2; |
+ } else { |
+ // All choices allocated, need to replace smallest one |
+ alloc = sub0; |
+ if (value_[sub1] < value_[alloc]) {alloc = sub1;} |
+ if (value_[sub2] < value_[alloc]) {alloc = sub2;} |
+ } |
+ key_[alloc] = ikey; |
+ value_[alloc] = ibytes; |
+ score_[alloc] = score; |
+ reliability_[alloc] = ireliability * ibytes; |
+ return; |
+} |
+ |
+// Find subscript of a given packed language, or -1 |
+int ToteWithReliability::Find(uint8 ikey) { |
+ DCHECK(ikey != 0); |
+ |
+ if (sorted_) { |
+ // Linear search if sorted |
+ for (int sub = 0; sub < kMaxSize_; ++sub) { |
+ if (key_[sub] == ikey) {return sub;} |
+ } |
+ return -1; |
+ } |
+ |
+ // Look for existing entry |
+ int sub0 = ikey & 15; |
+ if (key_[sub0] == ikey) { |
+ return sub0; |
+ } |
+ int sub1 = sub0 ^ 8; |
+ if (key_[sub1] == ikey) { |
+ return sub1; |
+ } |
+ int sub2 = (ikey & 7) + 16; |
+ if (key_[sub2] == ikey) { |
+ return sub2; |
+ } |
+ |
+ return -1; |
+} |
+ |
+// Return current top key |
+int ToteWithReliability::CurrentTopKey() { |
+ int top_key = 0; |
+ int top_value = -1; |
+ for (int sub = 0; sub < kMaxSize_; ++sub) { |
+ if (key_[sub] == 0) {continue;} |
+ if (top_value < value_[sub]) { |
+ top_value = value_[sub]; |
+ top_key = key_[sub]; |
+ } |
+ } |
+ return top_key; |
+} |
+ |
+ |
+// Sort first n entries by decreasing order of value |
+// If key==0 other fields are not valid, treat value as -1 |
+void ToteWithReliability::Sort(int n) { |
+ // This is n**2, but n is small |
+ for (int sub = 0; sub < n; ++sub) { |
+ if (key_[sub] == 0) {value_[sub] = -1;} |
+ |
+ // Bubble sort key[sub] and entry[sub] |
+ for (int sub2 = sub + 1; sub2 < kMaxSize_; ++sub2) { |
+ if (key_[sub2] == 0) {value_[sub2] = -1;} |
+ if (value_[sub] < value_[sub2]) { |
+ // swap |
+ uint8 tmpk = key_[sub]; |
+ key_[sub] = key_[sub2]; |
+ key_[sub2] = tmpk; |
+ |
+ int tmpv = value_[sub]; |
+ value_[sub] = value_[sub2]; |
+ value_[sub2] = tmpv; |
+ |
+ double tmps = score_[sub]; |
+ score_[sub] = score_[sub2]; |
+ score_[sub2] = tmps; |
+ |
+ int tmpr = reliability_[sub]; |
+ reliability_[sub] = reliability_[sub2]; |
+ reliability_[sub2] = tmpr; |
+ } |
+ } |
+ } |
+ sorted_ = 1; |
+} |
+ |
+void ToteWithReliability::Dump(FILE* f) { |
+ for (int sub = 0; sub < kMaxSize_; ++sub) { |
+ if (key_[sub] > 0) { |
+ fprintf(f, "[%2d] %3d %6d %5d %4d\n", |
+ sub, key_[sub], value_[sub], score_[sub], reliability_[sub]); |
+ } |
+ } |
+ fprintf(f, " %d#\n", incr_count_); |
+} |
Property changes on: third_party\cld\bar\toolbar\cld\i18n\encodings\compact_lang_det\tote.cc |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |