appengine/monorail/framework/ratelimiter.py - Issue 1868553004: Open Source Monorail

Side by Side Diff: appengine/monorail/framework/ratelimiter.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Rebase Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is govered by a BSD-style

	3 # license that can be found in the LICENSE file or at

	4 # https://developers.google.com/open-source/licenses/bsd

	5

	6 """Request rate limiting implementation.

	7

	8 This is intented to be used for automatic DDoS protection.

	9

	10 """

	11

	12 import datetime

	13 import logging

	14 import settings

	15 import time

	16

	17 from infra_libs import ts_mon

	18

	19 from google.appengine.api import memcache

	20 from google.appengine.api.modules import modules

	21 from google.appengine.api import users

	22

	23 N_MINUTES = 5

	24 EXPIRE_AFTER_SECS = 60 * 60

	25 DEFAULT_LIMIT = 300 # 300 requests in 5 minutes is 1 QPS.

	26

	27 ANON_USER = 'anon'

	28

	29 COUNTRY_HEADER = 'X-AppEngine-Country'

	30

	31 COUNTRY_LIMITS = {

	32 # Two-letter country code: max requests per N_MINUTES

	33 # This limit will apply to all requests coming

	34 # from this country.

	35 # To add a country code, see GAE logs and use the

	36 # appropriate code from https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2

	37 # E.g., 'cn': 300, # Limit to 1 QPS.

	38 }

	39

	40 # Modules not in this list will not have rate limiting applied by this

	41 # class.

	42 MODULE_WHITELIST = ['default']

	43

	44 def _CacheKeys(request, now_sec):

	45 """ Returns an array of arrays. Each array contains strings with

	46 the same prefix and a timestamp suffix, starting with the most

	47 recent and decrementing by 1 minute each time.

	48 """

	49 now = datetime.datetime.fromtimestamp(now_sec)

	50 country = request.headers.get(COUNTRY_HEADER, 'ZZ')

	51 ip = request.remote_addr

	52 minute_buckets = [now - datetime.timedelta(minutes=m) for m in

	53 range(N_MINUTES)]

	54 user = users.get_current_user()

	55 user_email = user.email() if user else ANON_USER

	56

	57 # <IP, country, user_email> to be rendered into each key prefix.

	58 prefixes = []

	59

	60 # All logged-in users get a per-user rate limit, regardless of IP and country.

	61 if user:

	62 prefixes.append(['ALL', 'ALL', user.email()])

	63 else:

	64 # All anon requests get a per-IP ratelimit.

	65 prefixes.append([ip, 'ALL', 'ALL'])

	66

	67 # All requests from a problematic country get a per-country rate limit,

	68 # regardless of the user (even a non-logged-in one) or IP.

	69 if country in COUNTRY_LIMITS:

	70 prefixes.append(['ALL', country, 'ALL'])

	71

	72 keysets = []

	73 for prefix in prefixes:

	74 keysets.append(['ratelimit-%s-%s' % ('-'.join(prefix),

	75 str(minute_bucket.replace(second=0, microsecond=0)))

	76 for minute_bucket in minute_buckets])

	77

	78 return keysets, country, ip, user_email

	79

	80 class RateLimiter:

	81 blocked_requests = ts_mon.CounterMetric(

	82 'monorail/ratelimiter/blocked_request')

	83 limit_exceeded = ts_mon.CounterMetric(

	84 'monorail/ratelimiter/rate_exceeded')

	85 cost_thresh_exceeded = ts_mon.CounterMetric(

	86 'monorail/ratelimiter/cost_thresh_exceeded')

	87 checks = ts_mon.CounterMetric(

	88 'monorail/ratelimiter/check')

	89

	90 def __init__(self, _cache=memcache, fail_open=True, **_kwargs):

	91 self.fail_open = fail_open

	92

	93 def CheckStart(self, request, now=None):

	94 if (modules.get_current_module_name() not in MODULE_WHITELIST or

	95 users.is_current_user_admin()):

	96 return

	97 logging.info('X-AppEngine-Country: %s' %

	98 request.headers.get(COUNTRY_HEADER, 'ZZ'))

	99

	100 if now is None:

	101 now = time.time()

	102

	103 keysets, country, ip, user_email = _CacheKeys(request, now)

	104 # There are either two or three sets of keys in keysets.

	105 # Three if the user's country is in COUNTRY_LIMITS, otherwise two.

	106 for keys in keysets:

	107 count = 0

	108 try:

	109 counters = memcache.get_multi(keys)

	110 count = sum(counters.values())

	111 self.checks.increment({'type': 'success'})

	112 except Exception as e:

	113 logging.error(e)

	114 if not self.fail_open:

	115 self.checks.increment({'type': 'fail_open'})

	116 raise RateLimitExceeded(country=country, ip=ip, user_email=user_email)

	117 self.checks.increment({'type': 'fail_closed'})

	118

	119 limit = COUNTRY_LIMITS.get(country, DEFAULT_LIMIT)

	120 if count > limit:

	121 # Since webapp2 won't let us return a 429 error code

	122 # <http://tools.ietf.org/html/rfc6585#section-4>, we can't

	123 # monitor rate limit exceeded events with our standard tools.

	124 # We return a 400 with a custom error message to the client,

	125 # and this logging is so we can monitor it internally.

	126 logging.info('Rate Limit Exceeded: %s, %s, %s, %d' % (

	127 country, ip, user_email, count))

	128

	129 self.limit_exceeded.increment()

	130

	131 if settings.ratelimiting_enabled:

	132 self.blocked_requests.increment()

	133 raise RateLimitExceeded(country=country, ip=ip, user_email=user_email)

	134

	135 k = keys[0]

	136 # Only update the latest time bucket for each prefix (reverse chron).

	137 memcache.add(k, 0, time=EXPIRE_AFTER_SECS)

	138 memcache.incr(k, initial_value=0)

	139

	140 def CheckEnd(self, request, now, start_time):

	141 """If a request was expensive to process, charge some extra points

	142 against this set of buckets.

	143 We pass in both now and start_time so we can update the buckets

	144 based on keys created from start_time instead of now.

	145 now and start_time are float seconds.

	146 """

	147 if (modules.get_current_module_name() not in MODULE_WHITELIST or

	148 not settings.ratelimiting_cost_enabled):

	149 return

	150

	151 elapsed_ms = (now - start_time) * 1000

	152 # Would it kill the python lib maintainers to have timedelta.total_ms()?

	153 if elapsed_ms < settings.ratelimiting_cost_thresh_ms:

	154 return

	155

	156 # TODO: Look into caching the keys instead of generating them twice

	157 # for every request. Say, return them from CheckStart so they can

	158 # be bassed back in here later.

	159 keysets, country, ip, user_email = _CacheKeys(request, start_time)

	160 for keys in keysets:

	161 logging.info('Rate Limit Cost Threshold Exceeded: %s, %s, %s' % (

	162 country, ip, user_email))

	163 self.cost_thresh_exceeded.increment_by(settings.ratelimiting_cost_penalty)

	164

	165 # Only update the latest time bucket for each prefix (reverse chron).

	166 k = keys[0]

	167 memcache.add(k, 0, time=EXPIRE_AFTER_SECS)

	168 memcache.incr(k, initial_value=0)

	169

	170 class RateLimitExceeded(Exception):

	171 def __init__(self, country=None, ip=None, user_email=None, **_kwargs):

	172 self.country = country

	173 self.ip = ip

	174 self.user_email = user_email

	175

	176 def __str__(self):

	177 return 'RateLimitExceeded: %s, %s, %s' % (

	178 self.country, self.ip, self.user_email)

OLD	NEW

« no previous file with comments | « appengine/monorail/framework/profiler.py ('k') | appengine/monorail/framework/reap.py » ('j') | no next file with comments »