OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 """ |
| 3 Script which takes one or more file paths and reports on their detected |
| 4 encodings |
| 5 |
| 6 Example:: |
| 7 |
| 8 % chardetect somefile someotherfile |
| 9 somefile: windows-1252 with confidence 0.5 |
| 10 someotherfile: ascii with confidence 1.0 |
| 11 |
| 12 If no paths are provided, it takes its input from stdin. |
| 13 |
| 14 """ |
| 15 |
| 16 from __future__ import absolute_import, print_function, unicode_literals |
| 17 |
| 18 import argparse |
| 19 import sys |
| 20 from io import open |
| 21 |
| 22 from chardet import __version__ |
| 23 from chardet.universaldetector import UniversalDetector |
| 24 |
| 25 |
| 26 def description_of(lines, name='stdin'): |
| 27 """ |
| 28 Return a string describing the probable encoding of a file or |
| 29 list of strings. |
| 30 |
| 31 :param lines: The lines to get the encoding of. |
| 32 :type lines: Iterable of bytes |
| 33 :param name: Name of file or collection of lines |
| 34 :type name: str |
| 35 """ |
| 36 u = UniversalDetector() |
| 37 for line in lines: |
| 38 u.feed(line) |
| 39 u.close() |
| 40 result = u.result |
| 41 if result['encoding']: |
| 42 return '{0}: {1} with confidence {2}'.format(name, result['encoding'], |
| 43 result['confidence']) |
| 44 else: |
| 45 return '{0}: no result'.format(name) |
| 46 |
| 47 |
| 48 def main(argv=None): |
| 49 ''' |
| 50 Handles command line arguments and gets things started. |
| 51 |
| 52 :param argv: List of arguments, as if specified on the command-line. |
| 53 If None, ``sys.argv[1:]`` is used instead. |
| 54 :type argv: list of str |
| 55 ''' |
| 56 # Get command line arguments |
| 57 parser = argparse.ArgumentParser( |
| 58 description="Takes one or more file paths and reports their detected \ |
| 59 encodings", |
| 60 formatter_class=argparse.ArgumentDefaultsHelpFormatter, |
| 61 conflict_handler='resolve') |
| 62 parser.add_argument('input', |
| 63 help='File whose encoding we would like to determine.', |
| 64 type=argparse.FileType('rb'), nargs='*', |
| 65 default=[sys.stdin]) |
| 66 parser.add_argument('--version', action='version', |
| 67 version='%(prog)s {0}'.format(__version__)) |
| 68 args = parser.parse_args(argv) |
| 69 |
| 70 for f in args.input: |
| 71 if f.isatty(): |
| 72 print("You are running chardetect interactively. Press " + |
| 73 "CTRL-D twice at the start of a blank line to signal the " + |
| 74 "end of your input. If you want help, run chardetect " + |
| 75 "--help\n", file=sys.stderr) |
| 76 print(description_of(f, f.name)) |
| 77 |
| 78 |
| 79 if __name__ == '__main__': |
| 80 main() |
OLD | NEW |