Index: appengine/monorail/framework/validate.py |
diff --git a/appengine/monorail/framework/validate.py b/appengine/monorail/framework/validate.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..4aa40ca147180c2f8c6dea7d5301a0c8212c8ebe |
--- /dev/null |
+++ b/appengine/monorail/framework/validate.py |
@@ -0,0 +1,102 @@ |
+# Copyright 2016 The Chromium Authors. All rights reserved. |
+# Use of this source code is govered by a BSD-style |
+# license that can be found in the LICENSE file or at |
+# https://developers.google.com/open-source/licenses/bsd |
+ |
+"""A set of Python input field validators.""" |
+ |
+import re |
+ |
+# RFC 2821-compliant email address regex |
+# |
+# Please see sections "4.1.2 Command Argument Syntax" and |
+# "4.1.3 Address Literals" of: http://www.faqs.org/rfcs/rfc2821.html |
+# |
+# The following implementation is still a subset of RFC 2821. Fully |
+# double-quoted <user> parts are not supported (since the RFC discourages |
+# their use anyway), and using the backslash to escape other characters |
+# that are normally invalid, such as commas, is not supported. |
+# |
+# The groups in this regular expression are: |
+# |
+# <user>: all of the valid non-quoted portion of the email address before |
+# the @ sign (not including the @ sign) |
+# |
+# <domain>: all of the domain name between the @ sign (but not including it) |
+# and the dot before the TLD (but not including that final dot) |
+# |
+# <tld>: the top-level domain after the last dot (but not including that |
+# final dot) |
+# |
+_RFC_2821_EMAIL_REGEX = r"""(?x) |
+ (?P<user> |
+ # Part of the username that comes before any dots that may occur in it. |
+ # At least one of the listed non-dot characters is required before the |
+ # first dot. |
+ [-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+ |
+ |
+ # Remaining part of the username that starts with the dot and |
+ # which may have other dots, if such a part exists. Only one dot |
+ # is permitted between each "Atom", and a trailing dot is not permitted. |
+ (?:[.][-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+)* |
+ ) |
+ |
+ # Domain name, where subdomains are allowed. Also, dashes are allowed |
+ # given that they are preceded and followed by at least one character. |
+ @(?P<domain> |
+ (?:[0-9a-zA-Z] # at least one non-dash |
+ (?:[-]* # plus zero or more dashes |
+ [0-9a-zA-Z]+ # plus at least one non-dash |
+ )* # zero or more of dashes followed by non-dashes |
+ ) # one required domain part (may be a sub-domain) |
+ |
+ (?:\. # dot separator before additional sub-domain part |
+ [0-9a-zA-Z] # at least one non-dash |
+ (?:[-]* # plus zero or more dashes |
+ [0-9a-zA-Z]+ # plus at least one non-dash |
+ )* # zero or more of dashes followed by non-dashes |
+ )* # at least one sub-domain part and a dot |
+ ) |
+ \. # dot separator before TLD |
+ |
+ # TLD, the part after 'usernames@domain.' which can consist of 2-9 |
+ # letters. |
+ (?P<tld>[a-zA-Z]{2,9}) |
+ """ |
+ |
+# object used with <re>.search() or <re>.sub() to find email addresses |
+# within a string (or with <re>.match() to find email addresses at the |
+# beginning of a string that may be followed by trailing characters, |
+# since <re>.match() implicitly anchors at the beginning of the string) |
+RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX) |
+ |
+# object used with <re>.match to find strings that contain *only* a single |
+# email address (by adding the end-of-string anchor $) |
+RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX) |
+ |
+_URL_HOST_PATTERN = ( |
+ r'(?:https?|ftp)://' # http(s) and ftp protocols |
+ r'[-a-zA-Z0-9.]+\.[a-zA-Z]{2,9}(:[0-9]+)?' # ascii host values |
+) |
+_URL_REGEX = r'%s(/[^\s]*)?' % _URL_HOST_PATTERN |
+ |
+# A more complete URL regular expression based on a combination of the |
+# existing _URL_REGEX and the pattern found for URI regular expressions |
+# found in the URL RFC document. It's detailed here: |
+# http://www.ietf.org/rfc/rfc2396.txt |
+RE_COMPLEX_URL = re.compile(r'^%s(\?([^# ]*))?(#(.*))?$' % _URL_REGEX) |
+ |
+ |
+def IsValidEmail(s): |
+ """Return true iff the string is a properly formatted email address.""" |
+ return RE_EMAIL_ONLY.match(s) |
+ |
+ |
+def IsValidMailTo(s): |
+ """Return true iff the string is a properly formatted mailto:.""" |
+ return s.startswith('mailto:') and RE_EMAIL_ONLY.match(s[7:]) |
+ |
+ |
+def IsValidURL(s): |
+ """Return true iff the string is a properly formatted web or ftp URL.""" |
+ return RE_COMPLEX_URL.match(s) |