OLD | NEW |
(Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is govered by a BSD-style |
| 3 # license that can be found in the LICENSE file or at |
| 4 # https://developers.google.com/open-source/licenses/bsd |
| 5 |
| 6 """A set of Python input field validators.""" |
| 7 |
| 8 import re |
| 9 |
| 10 # RFC 2821-compliant email address regex |
| 11 # |
| 12 # Please see sections "4.1.2 Command Argument Syntax" and |
| 13 # "4.1.3 Address Literals" of: http://www.faqs.org/rfcs/rfc2821.html |
| 14 # |
| 15 # The following implementation is still a subset of RFC 2821. Fully |
| 16 # double-quoted <user> parts are not supported (since the RFC discourages |
| 17 # their use anyway), and using the backslash to escape other characters |
| 18 # that are normally invalid, such as commas, is not supported. |
| 19 # |
| 20 # The groups in this regular expression are: |
| 21 # |
| 22 # <user>: all of the valid non-quoted portion of the email address before |
| 23 # the @ sign (not including the @ sign) |
| 24 # |
| 25 # <domain>: all of the domain name between the @ sign (but not including it) |
| 26 # and the dot before the TLD (but not including that final dot) |
| 27 # |
| 28 # <tld>: the top-level domain after the last dot (but not including that |
| 29 # final dot) |
| 30 # |
| 31 _RFC_2821_EMAIL_REGEX = r"""(?x) |
| 32 (?P<user> |
| 33 # Part of the username that comes before any dots that may occur in it. |
| 34 # At least one of the listed non-dot characters is required before the |
| 35 # first dot. |
| 36 [-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+ |
| 37 |
| 38 # Remaining part of the username that starts with the dot and |
| 39 # which may have other dots, if such a part exists. Only one dot |
| 40 # is permitted between each "Atom", and a trailing dot is not permitted. |
| 41 (?:[.][-a-zA-Z0-9!#$%&'*+/=?^_`{|}~]+)* |
| 42 ) |
| 43 |
| 44 # Domain name, where subdomains are allowed. Also, dashes are allowed |
| 45 # given that they are preceded and followed by at least one character. |
| 46 @(?P<domain> |
| 47 (?:[0-9a-zA-Z] # at least one non-dash |
| 48 (?:[-]* # plus zero or more dashes |
| 49 [0-9a-zA-Z]+ # plus at least one non-dash |
| 50 )* # zero or more of dashes followed by non-dashes |
| 51 ) # one required domain part (may be a sub-domain) |
| 52 |
| 53 (?:\. # dot separator before additional sub-domain part |
| 54 [0-9a-zA-Z] # at least one non-dash |
| 55 (?:[-]* # plus zero or more dashes |
| 56 [0-9a-zA-Z]+ # plus at least one non-dash |
| 57 )* # zero or more of dashes followed by non-dashes |
| 58 )* # at least one sub-domain part and a dot |
| 59 ) |
| 60 \. # dot separator before TLD |
| 61 |
| 62 # TLD, the part after 'usernames@domain.' which can consist of 2-9 |
| 63 # letters. |
| 64 (?P<tld>[a-zA-Z]{2,9}) |
| 65 """ |
| 66 |
| 67 # object used with <re>.search() or <re>.sub() to find email addresses |
| 68 # within a string (or with <re>.match() to find email addresses at the |
| 69 # beginning of a string that may be followed by trailing characters, |
| 70 # since <re>.match() implicitly anchors at the beginning of the string) |
| 71 RE_EMAIL_SEARCH = re.compile(_RFC_2821_EMAIL_REGEX) |
| 72 |
| 73 # object used with <re>.match to find strings that contain *only* a single |
| 74 # email address (by adding the end-of-string anchor $) |
| 75 RE_EMAIL_ONLY = re.compile('^%s$' % _RFC_2821_EMAIL_REGEX) |
| 76 |
| 77 _URL_HOST_PATTERN = ( |
| 78 r'(?:https?|ftp)://' # http(s) and ftp protocols |
| 79 r'[-a-zA-Z0-9.]+\.[a-zA-Z]{2,9}(:[0-9]+)?' # ascii host values |
| 80 ) |
| 81 _URL_REGEX = r'%s(/[^\s]*)?' % _URL_HOST_PATTERN |
| 82 |
| 83 # A more complete URL regular expression based on a combination of the |
| 84 # existing _URL_REGEX and the pattern found for URI regular expressions |
| 85 # found in the URL RFC document. It's detailed here: |
| 86 # http://www.ietf.org/rfc/rfc2396.txt |
| 87 RE_COMPLEX_URL = re.compile(r'^%s(\?([^# ]*))?(#(.*))?$' % _URL_REGEX) |
| 88 |
| 89 |
| 90 def IsValidEmail(s): |
| 91 """Return true iff the string is a properly formatted email address.""" |
| 92 return RE_EMAIL_ONLY.match(s) |
| 93 |
| 94 |
| 95 def IsValidMailTo(s): |
| 96 """Return true iff the string is a properly formatted mailto:.""" |
| 97 return s.startswith('mailto:') and RE_EMAIL_ONLY.match(s[7:]) |
| 98 |
| 99 |
| 100 def IsValidURL(s): |
| 101 """Return true iff the string is a properly formatted web or ftp URL.""" |
| 102 return RE_COMPLEX_URL.match(s) |
OLD | NEW |