Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(239)

Side by Side Diff: third_party/logilab/logilab/common/textutils.py

Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Update path to LICENSE.txt of logilab/README.chromium Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
3 #
4 # This file is part of logilab-common.
5 #
6 # logilab-common is free software: you can redistribute it and/or modify it unde r
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation, either version 2.1 of the License, or (at your option) an y
9 # later version.
10 #
11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
14 # details.
15 #
16 # You should have received a copy of the GNU Lesser General Public License along
17 # with logilab-common. If not, see <http://www.gnu.org/licenses/>.
18 """Some text manipulation utility functions.
19
20
21 :group text formatting: normalize_text, normalize_paragraph, pretty_match,\
22 unquote, colorize_ansi
23 :group text manipulation: searchall, splitstrip
24 :sort: text formatting, text manipulation
25
26 :type ANSI_STYLES: dict(str)
27 :var ANSI_STYLES: dictionary mapping style identifier to ANSI terminal code
28
29 :type ANSI_COLORS: dict(str)
30 :var ANSI_COLORS: dictionary mapping color identifier to ANSI terminal code
31
32 :type ANSI_PREFIX: str
33 :var ANSI_PREFIX:
34 ANSI terminal code notifying the start of an ANSI escape sequence
35
36 :type ANSI_END: str
37 :var ANSI_END:
38 ANSI terminal code notifying the end of an ANSI escape sequence
39
40 :type ANSI_RESET: str
41 :var ANSI_RESET:
42 ANSI terminal code resetting format defined by a previous ANSI escape sequence
43 """
44 __docformat__ = "restructuredtext en"
45
46 import sys
47 import re
48 import os.path as osp
49 from warnings import warn
50 from unicodedata import normalize as _uninormalize
51 try:
52 from os import linesep
53 except ImportError:
54 linesep = '\n' # gae
55
56 from logilab.common.deprecation import deprecated
57
58 MANUAL_UNICODE_MAP = {
59 u'\xa1': u'!', # INVERTED EXCLAMATION MARK
60 u'\u0142': u'l', # LATIN SMALL LETTER L WITH STROKE
61 u'\u2044': u'/', # FRACTION SLASH
62 u'\xc6': u'AE', # LATIN CAPITAL LETTER AE
63 u'\xa9': u'(c)', # COPYRIGHT SIGN
64 u'\xab': u'"', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
65 u'\xe6': u'ae', # LATIN SMALL LETTER AE
66 u'\xae': u'(r)', # REGISTERED SIGN
67 u'\u0153': u'oe', # LATIN SMALL LIGATURE OE
68 u'\u0152': u'OE', # LATIN CAPITAL LIGATURE OE
69 u'\xd8': u'O', # LATIN CAPITAL LETTER O WITH STROKE
70 u'\xf8': u'o', # LATIN SMALL LETTER O WITH STROKE
71 u'\xbb': u'"', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
72 u'\xdf': u'ss', # LATIN SMALL LETTER SHARP S
73 }
74
75 def unormalize(ustring, ignorenonascii=None, substitute=None):
76 """replace diacritical characters with their corresponding ascii characters
77
78 Convert the unicode string to its long normalized form (unicode character
79 will be transform into several characters) and keep the first one only.
80 The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
81 replace all compatibility characters with their equivalents.
82
83 :type substitute: str
84 :param substitute: replacement character to use if decomposition fails
85
86 :see: Another project about ASCII transliterations of Unicode text
87 http://pypi.python.org/pypi/Unidecode
88 """
89 # backward compatibility, ignorenonascii was a boolean
90 if ignorenonascii is not None:
91 warn("ignorenonascii is deprecated, use substitute named parameter inste ad",
92 DeprecationWarning, stacklevel=2)
93 if ignorenonascii:
94 substitute = ''
95 res = []
96 for letter in ustring[:]:
97 try:
98 replacement = MANUAL_UNICODE_MAP[letter]
99 except KeyError:
100 replacement = _uninormalize('NFKD', letter)[0]
101 if ord(replacement) >= 2 ** 7:
102 if substitute is None:
103 raise ValueError("can't deal with non-ascii based characters ")
104 replacement = substitute
105 res.append(replacement)
106 return u''.join(res)
107
108 def unquote(string):
109 """remove optional quotes (simple or double) from the string
110
111 :type string: str or unicode
112 :param string: an optionally quoted string
113
114 :rtype: str or unicode
115 :return: the unquoted string (or the input string if it wasn't quoted)
116 """
117 if not string:
118 return string
119 if string[0] in '"\'':
120 string = string[1:]
121 if string[-1] in '"\'':
122 string = string[:-1]
123 return string
124
125
126 _BLANKLINES_RGX = re.compile('\r?\n\r?\n')
127 _NORM_SPACES_RGX = re.compile('\s+')
128
129 def normalize_text(text, line_len=80, indent='', rest=False):
130 """normalize a text to display it with a maximum line size and
131 optionally arbitrary indentation. Line jumps are normalized but blank
132 lines are kept. The indentation string may be used to insert a
133 comment (#) or a quoting (>) mark for instance.
134
135 :type text: str or unicode
136 :param text: the input text to normalize
137
138 :type line_len: int
139 :param line_len: expected maximum line's length, default to 80
140
141 :type indent: str or unicode
142 :param indent: optional string to use as indentation
143
144 :rtype: str or unicode
145 :return:
146 the input text normalized to fit on lines with a maximized size
147 inferior to `line_len`, and optionally prefixed by an
148 indentation string
149 """
150 if rest:
151 normp = normalize_rest_paragraph
152 else:
153 normp = normalize_paragraph
154 result = []
155 for text in _BLANKLINES_RGX.split(text):
156 result.append(normp(text, line_len, indent))
157 return ('%s%s%s' % (linesep, indent, linesep)).join(result)
158
159
160 def normalize_paragraph(text, line_len=80, indent=''):
161 """normalize a text to display it with a maximum line size and
162 optionally arbitrary indentation. Line jumps are normalized. The
163 indentation string may be used top insert a comment mark for
164 instance.
165
166 :type text: str or unicode
167 :param text: the input text to normalize
168
169 :type line_len: int
170 :param line_len: expected maximum line's length, default to 80
171
172 :type indent: str or unicode
173 :param indent: optional string to use as indentation
174
175 :rtype: str or unicode
176 :return:
177 the input text normalized to fit on lines with a maximized size
178 inferior to `line_len`, and optionally prefixed by an
179 indentation string
180 """
181 text = _NORM_SPACES_RGX.sub(' ', text)
182 line_len = line_len - len(indent)
183 lines = []
184 while text:
185 aline, text = splittext(text.strip(), line_len)
186 lines.append(indent + aline)
187 return linesep.join(lines)
188
189 def normalize_rest_paragraph(text, line_len=80, indent=''):
190 """normalize a ReST text to display it with a maximum line size and
191 optionally arbitrary indentation. Line jumps are normalized. The
192 indentation string may be used top insert a comment mark for
193 instance.
194
195 :type text: str or unicode
196 :param text: the input text to normalize
197
198 :type line_len: int
199 :param line_len: expected maximum line's length, default to 80
200
201 :type indent: str or unicode
202 :param indent: optional string to use as indentation
203
204 :rtype: str or unicode
205 :return:
206 the input text normalized to fit on lines with a maximized size
207 inferior to `line_len`, and optionally prefixed by an
208 indentation string
209 """
210 toreport = ''
211 lines = []
212 line_len = line_len - len(indent)
213 for line in text.splitlines():
214 line = toreport + _NORM_SPACES_RGX.sub(' ', line.strip())
215 toreport = ''
216 while len(line) > line_len:
217 # too long line, need split
218 line, toreport = splittext(line, line_len)
219 lines.append(indent + line)
220 if toreport:
221 line = toreport + ' '
222 toreport = ''
223 else:
224 line = ''
225 if line:
226 lines.append(indent + line.strip())
227 return linesep.join(lines)
228
229
230 def splittext(text, line_len):
231 """split the given text on space according to the given max line size
232
233 return a 2-uple:
234 * a line <= line_len if possible
235 * the rest of the text which has to be reported on another line
236 """
237 if len(text) <= line_len:
238 return text, ''
239 pos = min(len(text)-1, line_len)
240 while pos > 0 and text[pos] != ' ':
241 pos -= 1
242 if pos == 0:
243 pos = min(len(text), line_len)
244 while len(text) > pos and text[pos] != ' ':
245 pos += 1
246 return text[:pos], text[pos+1:].strip()
247
248
249 def splitstrip(string, sep=','):
250 """return a list of stripped string by splitting the string given as
251 argument on `sep` (',' by default). Empty string are discarded.
252
253 >>> splitstrip('a, b, c , 4,,')
254 ['a', 'b', 'c', '4']
255 >>> splitstrip('a')
256 ['a']
257 >>>
258
259 :type string: str or unicode
260 :param string: a csv line
261
262 :type sep: str or unicode
263 :param sep: field separator, default to the comma (',')
264
265 :rtype: str or unicode
266 :return: the unquoted string (or the input string if it wasn't quoted)
267 """
268 return [word.strip() for word in string.split(sep) if word.strip()]
269
270 get_csv = deprecated('get_csv is deprecated, use splitstrip')(splitstrip)
271
272
273 def split_url_or_path(url_or_path):
274 """return the latest component of a string containing either an url of the
275 form <scheme>://<path> or a local file system path
276 """
277 if '://' in url_or_path:
278 return url_or_path.rstrip('/').rsplit('/', 1)
279 return osp.split(url_or_path.rstrip(osp.sep))
280
281
282 def text_to_dict(text):
283 """parse multilines text containing simple 'key=value' lines and return a
284 dict of {'key': 'value'}. When the same key is encountered multiple time,
285 value is turned into a list containing all values.
286
287 >>> d = text_to_dict('''multiple=1
288 ... multiple= 2
289 ... single =3
290 ... ''')
291 >>> d['single']
292 '3'
293 >>> d['multiple']
294 ['1', '2']
295
296 """
297 res = {}
298 if not text:
299 return res
300 for line in text.splitlines():
301 line = line.strip()
302 if line and not line.startswith('#'):
303 key, value = [w.strip() for w in line.split('=', 1)]
304 if key in res:
305 try:
306 res[key].append(value)
307 except AttributeError:
308 res[key] = [res[key], value]
309 else:
310 res[key] = value
311 return res
312
313
314 _BLANK_URE = r'(\s|,)+'
315 _BLANK_RE = re.compile(_BLANK_URE)
316 __VALUE_URE = r'-?(([0-9]+\.[0-9]*)|((0x?)?[0-9]+))'
317 __UNITS_URE = r'[a-zA-Z]+'
318 _VALUE_RE = re.compile(r'(?P<value>%s)(?P<unit>%s)?'%(__VALUE_URE, __UNITS_URE))
319 _VALIDATION_RE = re.compile(r'^((%s)(%s))*(%s)?$' % (__VALUE_URE, __UNITS_URE,
320 __VALUE_URE))
321
322 BYTE_UNITS = {
323 "b": 1,
324 "kb": 1024,
325 "mb": 1024 ** 2,
326 "gb": 1024 ** 3,
327 "tb": 1024 ** 4,
328 }
329
330 TIME_UNITS = {
331 "ms": 0.0001,
332 "s": 1,
333 "min": 60,
334 "h": 60 * 60,
335 "d": 60 * 60 *24,
336 }
337
338 def apply_units(string, units, inter=None, final=float, blank_reg=_BLANK_RE,
339 value_reg=_VALUE_RE):
340 """Parse the string applying the units defined in units
341 (e.g.: "1.5m",{'m',60} -> 80).
342
343 :type string: str or unicode
344 :param string: the string to parse
345
346 :type units: dict (or any object with __getitem__ using basestring key)
347 :param units: a dict mapping a unit string repr to its value
348
349 :type inter: type
350 :param inter: used to parse every intermediate value (need __sum__)
351
352 :type blank_reg: regexp
353 :param blank_reg: should match every blank char to ignore.
354
355 :type value_reg: regexp with "value" and optional "unit" group
356 :param value_reg: match a value and it's unit into the
357 """
358 if inter is None:
359 inter = final
360 fstring = _BLANK_RE.sub('', string)
361 if not (fstring and _VALIDATION_RE.match(fstring)):
362 raise ValueError("Invalid unit string: %r." % string)
363 values = []
364 for match in value_reg.finditer(fstring):
365 dic = match.groupdict()
366 lit, unit = dic["value"], dic.get("unit")
367 value = inter(lit)
368 if unit is not None:
369 try:
370 value *= units[unit.lower()]
371 except KeyError:
372 raise KeyError('invalid unit %s. valid units are %s' %
373 (unit, units.keys()))
374 values.append(value)
375 return final(sum(values))
376
377
378 _LINE_RGX = re.compile('\r\n|\r+|\n')
379
380 def pretty_match(match, string, underline_char='^'):
381 """return a string with the match location underlined:
382
383 >>> import re
384 >>> print(pretty_match(re.search('mange', 'il mange du bacon'), 'il mange du bacon'))
385 il mange du bacon
386 ^^^^^
387 >>>
388
389 :type match: _sre.SRE_match
390 :param match: object returned by re.match, re.search or re.finditer
391
392 :type string: str or unicode
393 :param string:
394 the string on which the regular expression has been applied to
395 obtain the `match` object
396
397 :type underline_char: str or unicode
398 :param underline_char:
399 character to use to underline the matched section, default to the
400 carret '^'
401
402 :rtype: str or unicode
403 :return:
404 the original string with an inserted line to underline the match
405 location
406 """
407 start = match.start()
408 end = match.end()
409 string = _LINE_RGX.sub(linesep, string)
410 start_line_pos = string.rfind(linesep, 0, start)
411 if start_line_pos == -1:
412 start_line_pos = 0
413 result = []
414 else:
415 result = [string[:start_line_pos]]
416 start_line_pos += len(linesep)
417 offset = start - start_line_pos
418 underline = ' ' * offset + underline_char * (end - start)
419 end_line_pos = string.find(linesep, end)
420 if end_line_pos == -1:
421 string = string[start_line_pos:]
422 result.append(string)
423 result.append(underline)
424 else:
425 end = string[end_line_pos + len(linesep):]
426 string = string[start_line_pos:end_line_pos]
427 result.append(string)
428 result.append(underline)
429 result.append(end)
430 return linesep.join(result).rstrip()
431
432
433 # Ansi colorization ###########################################################
434
435 ANSI_PREFIX = '\033['
436 ANSI_END = 'm'
437 ANSI_RESET = '\033[0m'
438 ANSI_STYLES = {
439 'reset': "0",
440 'bold': "1",
441 'italic': "3",
442 'underline': "4",
443 'blink': "5",
444 'inverse': "7",
445 'strike': "9",
446 }
447 ANSI_COLORS = {
448 'reset': "0",
449 'black': "30",
450 'red': "31",
451 'green': "32",
452 'yellow': "33",
453 'blue': "34",
454 'magenta': "35",
455 'cyan': "36",
456 'white': "37",
457 }
458
459 def _get_ansi_code(color=None, style=None):
460 """return ansi escape code corresponding to color and style
461
462 :type color: str or None
463 :param color:
464 the color name (see `ANSI_COLORS` for available values)
465 or the color number when 256 colors are available
466
467 :type style: str or None
468 :param style:
469 style string (see `ANSI_COLORS` for available values). To get
470 several style effects at the same time, use a coma as separator.
471
472 :raise KeyError: if an unexistent color or style identifier is given
473
474 :rtype: str
475 :return: the built escape code
476 """
477 ansi_code = []
478 if style:
479 style_attrs = splitstrip(style)
480 for effect in style_attrs:
481 ansi_code.append(ANSI_STYLES[effect])
482 if color:
483 if color.isdigit():
484 ansi_code.extend(['38', '5'])
485 ansi_code.append(color)
486 else:
487 ansi_code.append(ANSI_COLORS[color])
488 if ansi_code:
489 return ANSI_PREFIX + ';'.join(ansi_code) + ANSI_END
490 return ''
491
492 def colorize_ansi(msg, color=None, style=None):
493 """colorize message by wrapping it with ansi escape codes
494
495 :type msg: str or unicode
496 :param msg: the message string to colorize
497
498 :type color: str or None
499 :param color:
500 the color identifier (see `ANSI_COLORS` for available values)
501
502 :type style: str or None
503 :param style:
504 style string (see `ANSI_COLORS` for available values). To get
505 several style effects at the same time, use a coma as separator.
506
507 :raise KeyError: if an unexistent color or style identifier is given
508
509 :rtype: str or unicode
510 :return: the ansi escaped string
511 """
512 # If both color and style are not defined, then leave the text as is
513 if color is None and style is None:
514 return msg
515 escape_code = _get_ansi_code(color, style)
516 # If invalid (or unknown) color, don't wrap msg with ansi codes
517 if escape_code:
518 return '%s%s%s' % (escape_code, msg, ANSI_RESET)
519 return msg
520
521 DIFF_STYLE = {'separator': 'cyan', 'remove': 'red', 'add': 'green'}
522
523 def diff_colorize_ansi(lines, out=sys.stdout, style=DIFF_STYLE):
524 for line in lines:
525 if line[:4] in ('--- ', '+++ '):
526 out.write(colorize_ansi(line, style['separator']))
527 elif line[0] == '-':
528 out.write(colorize_ansi(line, style['remove']))
529 elif line[0] == '+':
530 out.write(colorize_ansi(line, style['add']))
531 elif line[:4] == '--- ':
532 out.write(colorize_ansi(line, style['separator']))
533 elif line[:4] == '+++ ':
534 out.write(colorize_ansi(line, style['separator']))
535 else:
536 out.write(line)
537
OLDNEW
« no previous file with comments | « third_party/logilab/logilab/common/testlib.py ('k') | third_party/logilab/logilab/common/tree.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698