appengine/monorail/framework/ratelimiter.py - Issue 1868553004: Open Source Monorail

Unified Diff: appengine/monorail/framework/ratelimiter.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Rebase Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: appengine/monorail/framework/ratelimiter.py

diff --git a/appengine/monorail/framework/ratelimiter.py b/appengine/monorail/framework/ratelimiter.py

new file mode 100644

index 0000000000000000000000000000000000000000..0c968dfa722700dbd8c5bd8527254c2f78384aa8

--- /dev/null

+++ b/appengine/monorail/framework/ratelimiter.py

@@ -0,0 +1,178 @@

+# Use of this source code is govered by a BSD-style

+# license that can be found in the LICENSE file or at

+# https://developers.google.com/open-source/licenses/bsd

+"""Request rate limiting implementation.

+This is intented to be used for automatic DDoS protection.

+"""

+import datetime

+import logging

+import settings

+import time

+from infra_libs import ts_mon

+from google.appengine.api import memcache

+from google.appengine.api.modules import modules

+from google.appengine.api import users

+N_MINUTES = 5

+EXPIRE_AFTER_SECS = 60 * 60

+DEFAULT_LIMIT = 300 # 300 requests in 5 minutes is 1 QPS.

+ANON_USER = 'anon'

+COUNTRY_HEADER = 'X-AppEngine-Country'

+COUNTRY_LIMITS = {

+ # Two-letter country code: max requests per N_MINUTES

+ # This limit will apply to all requests coming

+ # from this country.

+ # To add a country code, see GAE logs and use the

+ # appropriate code from https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2

+ # E.g., 'cn': 300, # Limit to 1 QPS.

+# Modules not in this list will not have rate limiting applied by this

+# class.

+MODULE_WHITELIST = ['default']

+def _CacheKeys(request, now_sec):

+ """ Returns an array of arrays. Each array contains strings with

+ the same prefix and a timestamp suffix, starting with the most

+ recent and decrementing by 1 minute each time.

+ """

+ now = datetime.datetime.fromtimestamp(now_sec)

+ country = request.headers.get(COUNTRY_HEADER, 'ZZ')

+ ip = request.remote_addr

+ minute_buckets = [now - datetime.timedelta(minutes=m) for m in

+ range(N_MINUTES)]

+ user = users.get_current_user()

+ user_email = user.email() if user else ANON_USER

+ # <IP, country, user_email> to be rendered into each key prefix.

+ prefixes = []

+ # All logged-in users get a per-user rate limit, regardless of IP and country.

+ if user:

+ prefixes.append(['ALL', 'ALL', user.email()])

+ else:

+ # All anon requests get a per-IP ratelimit.

+ prefixes.append([ip, 'ALL', 'ALL'])

+ # All requests from a problematic country get a per-country rate limit,

+ # regardless of the user (even a non-logged-in one) or IP.

+ if country in COUNTRY_LIMITS:

+ prefixes.append(['ALL', country, 'ALL'])

+ keysets = []

+ for prefix in prefixes:

+ keysets.append(['ratelimit-%s-%s' % ('-'.join(prefix),

+ str(minute_bucket.replace(second=0, microsecond=0)))

+ for minute_bucket in minute_buckets])

+ return keysets, country, ip, user_email

+class RateLimiter:

+ blocked_requests = ts_mon.CounterMetric(

+ 'monorail/ratelimiter/blocked_request')

+ limit_exceeded = ts_mon.CounterMetric(

+ 'monorail/ratelimiter/rate_exceeded')

+ cost_thresh_exceeded = ts_mon.CounterMetric(

+ 'monorail/ratelimiter/cost_thresh_exceeded')

+ checks = ts_mon.CounterMetric(

+ 'monorail/ratelimiter/check')

+ def __init__(self, _cache=memcache, fail_open=True, **_kwargs):

+ self.fail_open = fail_open

+ def CheckStart(self, request, now=None):

+ if (modules.get_current_module_name() not in MODULE_WHITELIST or

+ users.is_current_user_admin()):

+ return

+ logging.info('X-AppEngine-Country: %s' %

+ request.headers.get(COUNTRY_HEADER, 'ZZ'))

+ if now is None:

+ now = time.time()

+ keysets, country, ip, user_email = _CacheKeys(request, now)

+ # There are either two or three sets of keys in keysets.

+ # Three if the user's country is in COUNTRY_LIMITS, otherwise two.

+ for keys in keysets:

+ count = 0

+ try:

+ counters = memcache.get_multi(keys)

+ count = sum(counters.values())

+ self.checks.increment({'type': 'success'})

+ except Exception as e:

+ logging.error(e)

+ if not self.fail_open:

+ self.checks.increment({'type': 'fail_open'})

+ raise RateLimitExceeded(country=country, ip=ip, user_email=user_email)

+ self.checks.increment({'type': 'fail_closed'})

+ limit = COUNTRY_LIMITS.get(country, DEFAULT_LIMIT)

+ if count > limit:

+ # Since webapp2 won't let us return a 429 error code

+ # <http://tools.ietf.org/html/rfc6585#section-4>, we can't

+ # monitor rate limit exceeded events with our standard tools.

+ # We return a 400 with a custom error message to the client,

+ # and this logging is so we can monitor it internally.

+ logging.info('Rate Limit Exceeded: %s, %s, %s, %d' % (

+ country, ip, user_email, count))

+ self.limit_exceeded.increment()

+ if settings.ratelimiting_enabled:

+ self.blocked_requests.increment()

+ raise RateLimitExceeded(country=country, ip=ip, user_email=user_email)

+ k = keys[0]

+ # Only update the latest *time* bucket for each prefix (reverse chron).

+ memcache.add(k, 0, time=EXPIRE_AFTER_SECS)

+ memcache.incr(k, initial_value=0)

+ def CheckEnd(self, request, now, start_time):

+ """If a request was expensive to process, charge some extra points

+ against this set of buckets.

+ We pass in both now and start_time so we can update the buckets

+ based on keys created from start_time instead of now.

+ now and start_time are float seconds.

+ """

+ if (modules.get_current_module_name() not in MODULE_WHITELIST or

+ not settings.ratelimiting_cost_enabled):

+ return

+ elapsed_ms = (now - start_time) * 1000

+ # Would it kill the python lib maintainers to have timedelta.total_ms()?

+ if elapsed_ms < settings.ratelimiting_cost_thresh_ms:

+ return

+ # TODO: Look into caching the keys instead of generating them twice

+ # for every request. Say, return them from CheckStart so they can

+ # be bassed back in here later.

+ keysets, country, ip, user_email = _CacheKeys(request, start_time)

+ for keys in keysets:

+ logging.info('Rate Limit Cost Threshold Exceeded: %s, %s, %s' % (

+ country, ip, user_email))

+ self.cost_thresh_exceeded.increment_by(settings.ratelimiting_cost_penalty)

+ # Only update the latest *time* bucket for each prefix (reverse chron).

+ k = keys[0]

+ memcache.add(k, 0, time=EXPIRE_AFTER_SECS)

+ memcache.incr(k, initial_value=0)

+class RateLimitExceeded(Exception):

+ def __init__(self, country=None, ip=None, user_email=None, **_kwargs):

+ self.country = country

+ self.ip = ip

+ self.user_email = user_email

+ def __str__(self):

+ return 'RateLimitExceeded: %s, %s, %s' % (

+ self.country, self.ip, self.user_email)

« no previous file with comments | « appengine/monorail/framework/profiler.py ('k') | appengine/monorail/framework/reap.py » ('j') | no next file with comments »