| Index: tools/lua/ngrams_aggregate.lua
|
| diff --git a/tools/lua/ngrams_aggregate.lua b/tools/lua/ngrams_aggregate.lua
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..f8c93381495badfa80079858cf361afaf53650ae
|
| --- /dev/null
|
| +++ b/tools/lua/ngrams_aggregate.lua
|
| @@ -0,0 +1,22 @@
|
| +-- Aggregate the output from ngrams.lua.
|
| +
|
| +-- Get the data from all shards.
|
| +counts = {}
|
| +dofile("/tmp/lua-output")
|
| +
|
| +-- Put the data into a sortable "array".
|
| +countArray = {}
|
| +for ngram, count in pairs(counts) do
|
| + table.insert(countArray, {count, ngram})
|
| +end
|
| +
|
| +-- Sort the data.
|
| +function compare(a, b)
|
| + return a[1] > b[1]
|
| +end
|
| +table.sort(countArray, compare)
|
| +
|
| +-- Write the result.
|
| +for i, countPair in ipairs(countArray) do
|
| + io.write(countPair[1], "\t", countPair[2], "\n")
|
| +end
|
|
|