third_party/logilab/logilab/common/textutils.py - Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT

Side by Side Diff: third_party/logilab/logilab/common/textutils.py

Issue 1920403002: [content/test/gpu] Run pylint check of gpu tests in unittest instead of PRESUBMIT (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Update path to LICENSE.txt of logilab/README.chromium Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.

	2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr

	3 #

	4 # This file is part of logilab-common.

	5 #

	6 # logilab-common is free software: you can redistribute it and/or modify it unde r

	7 # the terms of the GNU Lesser General Public License as published by the Free

	8 # Software Foundation, either version 2.1 of the License, or (at your option) an y

	9 # later version.

	10 #

	11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT

	12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

	13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more

	14 # details.

	15 #

	16 # You should have received a copy of the GNU Lesser General Public License along

	17 # with logilab-common. If not, see <http://www.gnu.org/licenses/>.

	18 """Some text manipulation utility functions.

	19

	20

	21 :group text formatting: normalize_text, normalize_paragraph, pretty_match,\

	22 unquote, colorize_ansi

	23 :group text manipulation: searchall, splitstrip

	24 :sort: text formatting, text manipulation

	25

	26 :type ANSI_STYLES: dict(str)

	27 :var ANSI_STYLES: dictionary mapping style identifier to ANSI terminal code

	28

	29 :type ANSI_COLORS: dict(str)

	30 :var ANSI_COLORS: dictionary mapping color identifier to ANSI terminal code

	31

	32 :type ANSI_PREFIX: str

	33 :var ANSI_PREFIX:

	34 ANSI terminal code notifying the start of an ANSI escape sequence

	35

	36 :type ANSI_END: str

	37 :var ANSI_END:

	38 ANSI terminal code notifying the end of an ANSI escape sequence

	39

	40 :type ANSI_RESET: str

	41 :var ANSI_RESET:

	42 ANSI terminal code resetting format defined by a previous ANSI escape sequence

	43 """

	44 __docformat__ = "restructuredtext en"

	45

	46 import sys

	47 import re

	48 import os.path as osp

	49 from warnings import warn

	50 from unicodedata import normalize as _uninormalize

	51 try:

	52 from os import linesep

	53 except ImportError:

	54 linesep = '\n' # gae

	55

	56 from logilab.common.deprecation import deprecated

	57

	58 MANUAL_UNICODE_MAP = {

	59 u'\xa1': u'!', # INVERTED EXCLAMATION MARK

	60 u'\u0142': u'l', # LATIN SMALL LETTER L WITH STROKE

	61 u'\u2044': u'/', # FRACTION SLASH

	62 u'\xc6': u'AE', # LATIN CAPITAL LETTER AE

	63 u'\xa9': u'(c)', # COPYRIGHT SIGN

	64 u'\xab': u'"', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK

	65 u'\xe6': u'ae', # LATIN SMALL LETTER AE

	66 u'\xae': u'(r)', # REGISTERED SIGN

	67 u'\u0153': u'oe', # LATIN SMALL LIGATURE OE

	68 u'\u0152': u'OE', # LATIN CAPITAL LIGATURE OE

	69 u'\xd8': u'O', # LATIN CAPITAL LETTER O WITH STROKE

	70 u'\xf8': u'o', # LATIN SMALL LETTER O WITH STROKE

	71 u'\xbb': u'"', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK

	72 u'\xdf': u'ss', # LATIN SMALL LETTER SHARP S

	73 }

	74

	75 def unormalize(ustring, ignorenonascii=None, substitute=None):

	76 """replace diacritical characters with their corresponding ascii characters

	77

	78 Convert the unicode string to its long normalized form (unicode character

	79 will be transform into several characters) and keep the first one only.

	80 The normal form KD (NFKD) will apply the compatibility decomposition, i.e.

	81 replace all compatibility characters with their equivalents.

	82

	83 :type substitute: str

	84 :param substitute: replacement character to use if decomposition fails

	85

	86 :see: Another project about ASCII transliterations of Unicode text

	87 http://pypi.python.org/pypi/Unidecode

	88 """

	89 # backward compatibility, ignorenonascii was a boolean

	90 if ignorenonascii is not None:

	91 warn("ignorenonascii is deprecated, use substitute named parameter inste ad",

	92 DeprecationWarning, stacklevel=2)

	93 if ignorenonascii:

	94 substitute = ''

	95 res = []

	96 for letter in ustring[:]:

	97 try:

	98 replacement = MANUAL_UNICODE_MAP[letter]

	99 except KeyError:

	100 replacement = _uninormalize('NFKD', letter)[0]

	101 if ord(replacement) >= 2 ** 7:

	102 if substitute is None:

	103 raise ValueError("can't deal with non-ascii based characters ")

	104 replacement = substitute

	105 res.append(replacement)

	106 return u''.join(res)

	107

	108 def unquote(string):

	109 """remove optional quotes (simple or double) from the string

	110

	111 :type string: str or unicode

	112 :param string: an optionally quoted string

	113

	114 :rtype: str or unicode

	115 :return: the unquoted string (or the input string if it wasn't quoted)

	116 """

	117 if not string:

	118 return string

	119 if string[0] in '"\'':

	120 string = string[1:]

	121 if string[-1] in '"\'':

	122 string = string[:-1]

	123 return string

	124

	125

	126 _BLANKLINES_RGX = re.compile('\r?\n\r?\n')

	127 _NORM_SPACES_RGX = re.compile('\s+')

	128

	129 def normalize_text(text, line_len=80, indent='', rest=False):

	130 """normalize a text to display it with a maximum line size and

	131 optionally arbitrary indentation. Line jumps are normalized but blank

	132 lines are kept. The indentation string may be used to insert a

	133 comment (#) or a quoting (>) mark for instance.

	134

	135 :type text: str or unicode

	136 :param text: the input text to normalize

	137

	138 :type line_len: int

	139 :param line_len: expected maximum line's length, default to 80

	140

	141 :type indent: str or unicode

	142 :param indent: optional string to use as indentation

	143

	144 :rtype: str or unicode

	145 :return:

	146 the input text normalized to fit on lines with a maximized size

	147 inferior to `line_len`, and optionally prefixed by an

	148 indentation string

	149 """

	150 if rest:

	151 normp = normalize_rest_paragraph

	152 else:

	153 normp = normalize_paragraph

	154 result = []

	155 for text in _BLANKLINES_RGX.split(text):

	156 result.append(normp(text, line_len, indent))

	157 return ('%s%s%s' % (linesep, indent, linesep)).join(result)

	158

	159

	160 def normalize_paragraph(text, line_len=80, indent=''):

	161 """normalize a text to display it with a maximum line size and

	162 optionally arbitrary indentation. Line jumps are normalized. The

	163 indentation string may be used top insert a comment mark for

	164 instance.

	165

	166 :type text: str or unicode

	167 :param text: the input text to normalize

	168

	169 :type line_len: int

	170 :param line_len: expected maximum line's length, default to 80

	171

	172 :type indent: str or unicode

	173 :param indent: optional string to use as indentation

	174

	175 :rtype: str or unicode

	176 :return:

	177 the input text normalized to fit on lines with a maximized size

	178 inferior to `line_len`, and optionally prefixed by an

	179 indentation string

	180 """

	181 text = _NORM_SPACES_RGX.sub(' ', text)

	182 line_len = line_len - len(indent)

	183 lines = []

	184 while text:

	185 aline, text = splittext(text.strip(), line_len)

	186 lines.append(indent + aline)

	187 return linesep.join(lines)

	188

	189 def normalize_rest_paragraph(text, line_len=80, indent=''):

	190 """normalize a ReST text to display it with a maximum line size and

	191 optionally arbitrary indentation. Line jumps are normalized. The

	192 indentation string may be used top insert a comment mark for

	193 instance.

	194

	195 :type text: str or unicode

	196 :param text: the input text to normalize

	197

	198 :type line_len: int

	199 :param line_len: expected maximum line's length, default to 80

	200

	201 :type indent: str or unicode

	202 :param indent: optional string to use as indentation

	203

	204 :rtype: str or unicode

	205 :return:

	206 the input text normalized to fit on lines with a maximized size

	207 inferior to `line_len`, and optionally prefixed by an

	208 indentation string

	209 """

	210 toreport = ''

	211 lines = []

	212 line_len = line_len - len(indent)

	213 for line in text.splitlines():

	214 line = toreport + _NORM_SPACES_RGX.sub(' ', line.strip())

	215 toreport = ''

	216 while len(line) > line_len:

	217 # too long line, need split

	218 line, toreport = splittext(line, line_len)

	219 lines.append(indent + line)

	220 if toreport:

	221 line = toreport + ' '

	222 toreport = ''

	223 else:

	224 line = ''

	225 if line:

	226 lines.append(indent + line.strip())

	227 return linesep.join(lines)

	228

	229

	230 def splittext(text, line_len):

	231 """split the given text on space according to the given max line size

	232

	233 return a 2-uple:

	234 * a line <= line_len if possible

	235 * the rest of the text which has to be reported on another line

	236 """

	237 if len(text) <= line_len:

	238 return text, ''

	239 pos = min(len(text)-1, line_len)

	240 while pos > 0 and text[pos] != ' ':

	241 pos -= 1

	242 if pos == 0:

	243 pos = min(len(text), line_len)

	244 while len(text) > pos and text[pos] != ' ':

	245 pos += 1

	246 return text[:pos], text[pos+1:].strip()

	247

	248

	249 def splitstrip(string, sep=','):

	250 """return a list of stripped string by splitting the string given as

	251 argument on `sep` (',' by default). Empty string are discarded.

	252

	253 >>> splitstrip('a, b, c , 4,,')

	254 ['a', 'b', 'c', '4']

	255 >>> splitstrip('a')

	256 ['a']

	257 >>>

	258

	259 :type string: str or unicode

	260 :param string: a csv line

	261

	262 :type sep: str or unicode

	263 :param sep: field separator, default to the comma (',')

	264

	265 :rtype: str or unicode

	266 :return: the unquoted string (or the input string if it wasn't quoted)

	267 """

	268 return [word.strip() for word in string.split(sep) if word.strip()]

	269

	270 get_csv = deprecated('get_csv is deprecated, use splitstrip')(splitstrip)

	271

	272

	273 def split_url_or_path(url_or_path):

	274 """return the latest component of a string containing either an url of the

	275 form <scheme>://<path> or a local file system path

	276 """

	277 if '://' in url_or_path:

	278 return url_or_path.rstrip('/').rsplit('/', 1)

	279 return osp.split(url_or_path.rstrip(osp.sep))

	280

	281

	282 def text_to_dict(text):

	283 """parse multilines text containing simple 'key=value' lines and return a

	284 dict of {'key': 'value'}. When the same key is encountered multiple time,

	285 value is turned into a list containing all values.

	286

	287 >>> d = text_to_dict('''multiple=1

	288 ... multiple= 2

	289 ... single =3

	290 ... ''')

	291 >>> d['single']

	292 '3'

	293 >>> d['multiple']

	294 ['1', '2']

	295

	296 """

	297 res = {}

	298 if not text:

	299 return res

	300 for line in text.splitlines():

	301 line = line.strip()

	302 if line and not line.startswith('#'):

	303 key, value = [w.strip() for w in line.split('=', 1)]

	304 if key in res:

	305 try:

	306 res[key].append(value)

	307 except AttributeError:

	308 res[key] = [res[key], value]

	309 else:

	310 res[key] = value

	311 return res

	312

	313

	314 _BLANK_URE = r'(\s\|,)+'

	315 _BLANK_RE = re.compile(_BLANK_URE)

	316 __VALUE_URE = r'-?(([0-9]+\.[0-9]*)\|((0x?)?[0-9]+))'

	317 __UNITS_URE = r'[a-zA-Z]+'

	318 _VALUE_RE = re.compile(r'(?P<value>%s)(?P<unit>%s)?'%(__VALUE_URE, __UNITS_URE))

	319 _VALIDATION_RE = re.compile(r'^((%s)(%s))*(%s)?$' % (__VALUE_URE, __UNITS_URE,

	320 __VALUE_URE))

	321

	322 BYTE_UNITS = {

	323 "b": 1,

	324 "kb": 1024,

	325 "mb": 1024 ** 2,

	326 "gb": 1024 ** 3,

	327 "tb": 1024 ** 4,

	328 }

	329

	330 TIME_UNITS = {

	331 "ms": 0.0001,

	332 "s": 1,

	333 "min": 60,

	334 "h": 60 * 60,

	335 "d": 60 * 60 *24,

	336 }

	337

	338 def apply_units(string, units, inter=None, final=float, blank_reg=_BLANK_RE,

	339 value_reg=_VALUE_RE):

	340 """Parse the string applying the units defined in units

	341 (e.g.: "1.5m",{'m',60} -> 80).

	342

	343 :type string: str or unicode

	344 :param string: the string to parse

	345

	346 :type units: dict (or any object with __getitem__ using basestring key)

	347 :param units: a dict mapping a unit string repr to its value

	348

	349 :type inter: type

	350 :param inter: used to parse every intermediate value (need __sum__)

	351

	352 :type blank_reg: regexp

	353 :param blank_reg: should match every blank char to ignore.

	354

	355 :type value_reg: regexp with "value" and optional "unit" group

	356 :param value_reg: match a value and it's unit into the

	357 """

	358 if inter is None:

	359 inter = final

	360 fstring = _BLANK_RE.sub('', string)

	361 if not (fstring and _VALIDATION_RE.match(fstring)):

	362 raise ValueError("Invalid unit string: %r." % string)

	363 values = []

	364 for match in value_reg.finditer(fstring):

	365 dic = match.groupdict()

	366 lit, unit = dic["value"], dic.get("unit")

	367 value = inter(lit)

	368 if unit is not None:

	369 try:

	370 value *= units[unit.lower()]

	371 except KeyError:

	372 raise KeyError('invalid unit %s. valid units are %s' %

	373 (unit, units.keys()))

	374 values.append(value)

	375 return final(sum(values))

	376

	377

	378 _LINE_RGX = re.compile('\r\n\|\r+\|\n')

	379

	380 def pretty_match(match, string, underline_char='^'):

	381 """return a string with the match location underlined:

	382

	383 >>> import re

	384 >>> print(pretty_match(re.search('mange', 'il mange du bacon'), 'il mange du bacon'))

	385 il mange du bacon

	386 ^^^^^

	387 >>>

	388

	389 :type match: _sre.SRE_match

	390 :param match: object returned by re.match, re.search or re.finditer

	391

	392 :type string: str or unicode

	393 :param string:

	394 the string on which the regular expression has been applied to

	395 obtain the `match` object

	396

	397 :type underline_char: str or unicode

	398 :param underline_char:

	399 character to use to underline the matched section, default to the

	400 carret '^'

	401

	402 :rtype: str or unicode

	403 :return:

	404 the original string with an inserted line to underline the match

	405 location

	406 """

	407 start = match.start()

	408 end = match.end()

	409 string = _LINE_RGX.sub(linesep, string)

	410 start_line_pos = string.rfind(linesep, 0, start)

	411 if start_line_pos == -1:

	412 start_line_pos = 0

	413 result = []

	414 else:

	415 result = [string[:start_line_pos]]

	416 start_line_pos += len(linesep)

	417 offset = start - start_line_pos

	418 underline = ' ' * offset + underline_char * (end - start)

	419 end_line_pos = string.find(linesep, end)

	420 if end_line_pos == -1:

	421 string = string[start_line_pos:]

	422 result.append(string)

	423 result.append(underline)

	424 else:

	425 end = string[end_line_pos + len(linesep):]

	426 string = string[start_line_pos:end_line_pos]

	427 result.append(string)

	428 result.append(underline)

	429 result.append(end)

	430 return linesep.join(result).rstrip()

	431

	432

	433 # Ansi colorization ###########################################################

	434

	435 ANSI_PREFIX = '\033['

	436 ANSI_END = 'm'

	437 ANSI_RESET = '\033[0m'

	438 ANSI_STYLES = {

	439 'reset': "0",

	440 'bold': "1",

	441 'italic': "3",

	442 'underline': "4",

	443 'blink': "5",

	444 'inverse': "7",

	445 'strike': "9",

	446 }

	447 ANSI_COLORS = {

	448 'reset': "0",

	449 'black': "30",

	450 'red': "31",

	451 'green': "32",

	452 'yellow': "33",

	453 'blue': "34",

	454 'magenta': "35",

	455 'cyan': "36",

	456 'white': "37",

	457 }

	458

	459 def _get_ansi_code(color=None, style=None):

	460 """return ansi escape code corresponding to color and style

	461

	462 :type color: str or None

	463 :param color:

	464 the color name (see `ANSI_COLORS` for available values)

	465 or the color number when 256 colors are available

	466

	467 :type style: str or None

	468 :param style:

	469 style string (see `ANSI_COLORS` for available values). To get

	470 several style effects at the same time, use a coma as separator.

	471

	472 :raise KeyError: if an unexistent color or style identifier is given

	473

	474 :rtype: str

	475 :return: the built escape code

	476 """

	477 ansi_code = []

	478 if style:

	479 style_attrs = splitstrip(style)

	480 for effect in style_attrs:

	481 ansi_code.append(ANSI_STYLES[effect])

	482 if color:

	483 if color.isdigit():

	484 ansi_code.extend(['38', '5'])

	485 ansi_code.append(color)

	486 else:

	487 ansi_code.append(ANSI_COLORS[color])

	488 if ansi_code:

	489 return ANSI_PREFIX + ';'.join(ansi_code) + ANSI_END

	490 return ''

	491

	492 def colorize_ansi(msg, color=None, style=None):

	493 """colorize message by wrapping it with ansi escape codes

	494

	495 :type msg: str or unicode

	496 :param msg: the message string to colorize

	497

	498 :type color: str or None

	499 :param color:

	500 the color identifier (see `ANSI_COLORS` for available values)

	501

	502 :type style: str or None

	503 :param style:

	504 style string (see `ANSI_COLORS` for available values). To get

	505 several style effects at the same time, use a coma as separator.

	506

	507 :raise KeyError: if an unexistent color or style identifier is given

	508

	509 :rtype: str or unicode

	510 :return: the ansi escaped string

	511 """

	512 # If both color and style are not defined, then leave the text as is

	513 if color is None and style is None:

	514 return msg

	515 escape_code = _get_ansi_code(color, style)

	516 # If invalid (or unknown) color, don't wrap msg with ansi codes

	517 if escape_code:

	518 return '%s%s%s' % (escape_code, msg, ANSI_RESET)

	519 return msg

	520

	521 DIFF_STYLE = {'separator': 'cyan', 'remove': 'red', 'add': 'green'}

	522

	523 def diff_colorize_ansi(lines, out=sys.stdout, style=DIFF_STYLE):

	524 for line in lines:

	525 if line[:4] in ('--- ', '+++ '):

	526 out.write(colorize_ansi(line, style['separator']))

	527 elif line[0] == '-':

	528 out.write(colorize_ansi(line, style['remove']))

	529 elif line[0] == '+':

	530 out.write(colorize_ansi(line, style['add']))

	531 elif line[:4] == '--- ':

	532 out.write(colorize_ansi(line, style['separator']))

	533 elif line[:4] == '+++ ':

	534 out.write(colorize_ansi(line, style['separator']))

	535 else:

	536 out.write(line)

	537

OLD	NEW

« no previous file with comments | « third_party/logilab/logilab/common/testlib.py ('k') | third_party/logilab/logilab/common/tree.py » ('j') | no next file with comments »