OLD | NEW |
| (Empty) |
1 #!/bin/bash | |
2 # Check common misspellings | |
3 # input file format: | |
4 # word->word1, ... | |
5 # Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/Fo
r_machines | |
6 | |
7 hunspell=../../src/tools/hunspell | |
8 hlang=${HUNSPELL:-en_US} | |
9 alang=${ASPELL:-en_US} | |
10 input=${INPUT:-List_of_common_misspellings.txt} | |
11 | |
12 # remove bad words recognised by Hunspell as good | |
13 cat $input | sed 's/[-]>/ /' | $hunspell -d $hlang -1 -L | | |
14 | |
15 # remove items with dash for Aspell | |
16 grep '^[^-]* ' | | |
17 | |
18 # remove spaces from end of lines | |
19 sed 's/ *$//' >$input.1 | |
20 | |
21 # remove bad words recognised by Aspell as good | |
22 cut -f 1 -d ' ' $input.1 | aspell -l $alang --list | | |
23 awk 'FILENAME=="-"{a[$1]=1;next}a[$1]{print$0}' - $input.1 | | |
24 | |
25 # change commas with tabs | |
26 sed 's/, */ /g' >$input.2 | |
27 | |
28 # remove lines with unrecognised suggestions (except suggestion with spaces) | |
29 cut -d ' ' -f 2- $input.2 | tr "\t" "\n" | grep -v ' ' >x.1 | |
30 cat x.1 | $hunspell -l -d $hlang >x.2 | |
31 cat x.1 | aspell -l $alang --list >>x.2 | |
32 cat x.2 | awk 'BEGIN{FS="\t"} | |
33 FILENAME=="-"{a[$1]=1;next}a[$2]!=1 && a[$3]!=1{print $0}' - $input.2 >$input.3 | |
34 | |
35 cut -f 1 -d ' ' $input.3 | aspell -l $alang -a | grep -v ^$ | sed -n '2,$p' | | |
36 sed 's/^.*: //;s/, / /g' >$input.4 | |
37 | |
38 cat $input.3 | $hunspell -d $hlang -a -1 | grep -v ^$ | sed -n '2,$p' | | |
39 sed 's/^.*: //;s/, / /g' >$input.5 | |
40 | |
OLD | NEW |