appengine/monorail/framework/validate.py - Issue 1868553004: Open Source Monorail

Unified Diff: appengine/monorail/framework/validate.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Rebase Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: appengine/monorail/framework/validate.py

diff --git a/appengine/monorail/framework/validate.py b/appengine/monorail/framework/validate.py

new file mode 100644

index 0000000000000000000000000000000000000000..4aa40ca147180c2f8c6dea7d5301a0c8212c8ebe

--- /dev/null

+++ b/appengine/monorail/framework/validate.py

@@ -0,0 +1,102 @@

+# Use of this source code is govered by a BSD-style

+# license that can be found in the LICENSE file or at

+# https://developers.google.com/open-source/licenses/bsd

+"""A set of Python input field validators."""

+import re

+# RFC 2821-compliant email address regex

+# Please see sections "4.1.2 Command Argument Syntax" and

+# "4.1.3 Address Literals" of: http://www.faqs.org/rfcs/rfc2821.html

+# The following implementation is still a subset of RFC 2821. Fully

+# double-quoted <user> parts are not supported (since the RFC discourages

+# their use anyway), and using the backslash to escape other characters

+# that are normally invalid, such as commas, is not supported.

+# The groups in this regular expression are:

+# <user>: all of the valid non-quoted portion of the email address before

+# the @ sign (not including the @ sign)

+# <domain>: all of the domain name between the @ sign (but not including it)

+# and the dot before the TLD (but not including that final dot)

+# <tld>: the top-level domain after the last dot (but not including that

+# final dot)

+_RFC_2821_EMAIL_REGEX = r"""(?x)

+ (?P<user>

+ # Part of the username that comes before any dots that may occur in it.

+ # At least one of the listed non-dot characters is required before the

+ # first dot.

+ [-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+

+ # Remaining part of the username that starts with the dot and

+ # which may have other dots, if such a part exists. Only one dot

+ # is permitted between each "Atom", and a trailing dot is not permitted.

+ (?:[.][-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+)*

+ )

+ # Domain name, where subdomains are allowed. Also, dashes are allowed

+ # given that they are preceded and followed by at least one character.

+ @(?P<domain>

+ (?:[0-9a-zA-Z] # at least one non-dash

+ (?:[-]* # plus zero or more dashes

+ [0-9a-zA-Z]+ # plus at least one non-dash

+ )* # zero or more of dashes followed by non-dashes

+ ) # one required domain part (may be a sub-domain)

+ (?:\. # dot separator before additional sub-domain part

+ [0-9a-zA-Z] # at least one non-dash

+ (?:[-]* # plus zero or more dashes

+ [0-9a-zA-Z]+ # plus at least one non-dash

+ )* # zero or more of dashes followed by non-dashes

+ )* # at least one sub-domain part and a dot

+ )

+ \. # dot separator before TLD

+ # TLD, the part after 'usernames@domain.' which can consist of 2-9

+ # letters.

+ (?P<tld>[a-zA-Z]{2,9})

+ """

+# object used with <re>.search() or <re>.sub() to find email addresses

+# within a string (or with <re>.match() to find email addresses at the

+# beginning of a string that may be followed by trailing characters,

+# since <re>.match() implicitly anchors at the beginning of the string)

+RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX)

+# object used with <re>.match to find strings that contain *only* a single

+# email address (by adding the end-of-string anchor $)

+RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX)

+_URL_HOST_PATTERN = (

+ r'(?:https?|ftp)://' # http(s) and ftp protocols

+ r'[-a-zA-Z0-9.]+\.[a-zA-Z]{2,9}(:[0-9]+)?' # ascii host values

+_URL_REGEX = r'%s(/[^\s]*)?' % _URL_HOST_PATTERN

+# A more complete URL regular expression based on a combination of the

+# existing _URL_REGEX and the pattern found for URI regular expressions

+# found in the URL RFC document. It's detailed here:

+# http://www.ietf.org/rfc/rfc2396.txt

+RE_COMPLEX_URL = re.compile(r'^%s(\?([^# ]*))?(#(.*))?$' % _URL_REGEX)

+def IsValidEmail(s):

+ """Return true iff the string is a properly formatted email address."""

+ return RE_EMAIL_ONLY.match(s)

+def IsValidMailTo(s):

+ """Return true iff the string is a properly formatted mailto:."""

+ return s.startswith('mailto:') and RE_EMAIL_ONLY.match(s[7:])

+def IsValidURL(s):

+ """Return true iff the string is a properly formatted web or ftp URL."""

+ return RE_COMPLEX_URL.match(s)

« no previous file with comments | « appengine/monorail/framework/urls.py ('k') | appengine/monorail/framework/xsrf.py » ('j') | no next file with comments »