Rate Limiting Bypass Attack
The Interview Question
"Attackers figured out how to bypass our rate limiting. We limit 100 requests per minute per IP, but they're making thousands of requests. We've already blocked known VPN IPs. What else can we do?"
Asked at: Cloudflare, Google, Meta, any security-focused company
Time to solve: 30-35 minutes
Difficulty: ⭐⭐⭐⭐ (Senior Security)
Clarifying Questions to Ask
- "What's the attack pattern?" → Credential stuffing? Scraping? DDoS?
- "What data do we have per request?" → Headers, cookies, fingerprints?
- "Is this API or web traffic?" → Different fingerprinting options
- "What's the cost of false positives?" → Blocking real users = bad
- "Are attackers rotating IPs?" → Cloud providers? Residential proxies?
How Attackers Bypass Rate Limiting
Bypass 1: IP Rotation
# Attacker using rotating proxies
import requests
from itertools import cycle
proxies = [
'http://proxy1.example:8080',
'http://proxy2.example:8080',
# ... thousands of proxies from cloud providers or residential pools
]
proxy_pool = cycle(proxies)
def attack_with_rotation():
for username in usernames_to_try:
proxy = next(proxy_pool)
requests.post(
'https://target.com/login',
data={'username': username, 'password': 'password123'},
proxies={'http': proxy, 'https': proxy}
)
# Each request from different IP - bypasses per-IP rate limit!
Bypass 2: User-Agent Rotation
# Attacker rotating User-Agent strings
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Safari/537.36',
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X)',
# ... hundreds of real browser user agents
]
# If you rate limit by IP + User-Agent, they just rotate both
Bypass 3: Distributed Attack
# Botnet: thousands of compromised devices
# Each device makes only 1-2 requests - looks like normal traffic
# Together: 100,000 requests/minute
Defense: Layered Rate Limiting
Layer 1: Multi-Dimensional Limits
from redis import Redis
import hashlib
class MultiDimensionalRateLimiter:
def __init__(self):
self.redis = Redis()
def check_rate_limit(self, request) -> bool:
"""
Check multiple dimensions - ALL must pass.
"""
checks = [
# Per IP (baseline)
self.check_limit(f"ip:{request.ip}", limit=100, window=60),
# Per User (if authenticated)
self.check_limit(f"user:{request.user_id}", limit=50, window=60) if request.user_id else True,
# Per IP + Endpoint (prevent targeting one endpoint)
self.check_limit(f"ip_endpoint:{request.ip}:{request.endpoint}", limit=20, window=60),
# Per Account (across all IPs - catches distributed attacks)
self.check_limit(f"account:{request.target_account}", limit=10, window=60) if request.target_account else True,
# Per ASN (rate limit entire cloud provider)
self.check_limit(f"asn:{request.asn}", limit=1000, window=60),
# Global endpoint limit (total load protection)
self.check_limit(f"global:{request.endpoint}", limit=10000, window=60),
]
return all(checks)
def check_limit(self, key: str, limit: int, window: int) -> bool:
"""Sliding window rate limit"""
current = self.redis.incr(key)
if current == 1:
self.redis.expire(key, window)
return current <= limit
Layer 2: Device Fingerprinting
import hashlib
import json
def compute_device_fingerprint(request) -> str:
"""
Create fingerprint from browser/device characteristics.
Harder to rotate than IP.
"""
fingerprint_data = {
# HTTP headers (easy to fake, but adds cost)
'accept_language': request.headers.get('Accept-Language'),
'accept_encoding': request.headers.get('Accept-Encoding'),
'accept': request.headers.get('Accept'),
# TLS fingerprint (JA3) - hard to fake without real browser
'ja3_hash': request.ja3_fingerprint,
# Browser characteristics (from JS, for web)
'screen_resolution': request.cookies.get('screen_res'),
'timezone': request.cookies.get('tz'),
'canvas_hash': request.cookies.get('canvas'), # Canvas fingerprint
'webgl_hash': request.cookies.get('webgl'), # WebGL fingerprint
# Navigation patterns
'referer': request.headers.get('Referer'),
}
return hashlib.sha256(json.dumps(fingerprint_data, sort_keys=True).encode()).hexdigest()
JA3 Fingerprinting (TLS fingerprint):
# JA3 identifies the TLS client implementation
# Bots using Python requests have different JA3 than Chrome
def get_ja3_fingerprint(tls_client_hello):
"""
JA3 = MD5(SSLVersion,Cipher,Extensions,EllipticCurves,EllipticCurvePointFormats)
"""
ja3_string = f"{tls_client_hello.version},{','.join(tls_client_hello.ciphers)}," \
f"{','.join(tls_client_hello.extensions)}," \
f"{','.join(tls_client_hello.curves)}," \
f"{','.join(tls_client_hello.point_formats)}"
return hashlib.md5(ja3_string.encode()).hexdigest()
# Real Chrome: ja3 = "769,47-53-5-10-49161-49162..."
# Python requests: ja3 = "771,4866-4867-4865-49196..." (different!)
Layer 3: Behavioral Analysis
from collections import defaultdict
import statistics
class BehavioralAnalyzer:
def __init__(self):
self.request_times = defaultdict(list)
def analyze_request(self, request) -> float:
"""
Return suspicion score 0-1 based on behavior.
"""
fingerprint = request.fingerprint
now = time.time()
self.request_times[fingerprint].append(now)
times = self.request_times[fingerprint][-100:] # Last 100 requests
if len(times) < 10:
return 0 # Not enough data
# Calculate metrics
intervals = [times[i+1] - times[i] for i in range(len(times)-1)]
scores = []
# 1. Too regular intervals (bots have consistent timing)
if len(intervals) > 5:
interval_std = statistics.stdev(intervals)
mean_interval = statistics.mean(intervals)
cv = interval_std / mean_interval if mean_interval > 0 else 0
if cv < 0.1: # Very regular
scores.append(0.8)
elif cv < 0.2:
scores.append(0.4)
else:
scores.append(0)
# 2. Inhuman speed (requests faster than human can click)
fast_requests = sum(1 for i in intervals if i < 0.5) # Less than 500ms apart
fast_ratio = fast_requests / len(intervals)
scores.append(min(fast_ratio, 1.0))
# 3. No mouse movements / scroll events (for web)
if hasattr(request, 'mouse_events'):
if request.mouse_events == 0:
scores.append(0.6)
# 4. Sequential resource access (bots often follow predictable patterns)
if self.is_sequential_access(fingerprint):
scores.append(0.7)
return statistics.mean(scores) if scores else 0
def is_sequential_access(self, fingerprint) -> bool:
"""Check if user IDs or pages are being accessed sequentially"""
# E.g., /user/1001, /user/1002, /user/1003 → bot scraping
pass
Layer 4: CAPTCHA / Proof of Work
from functools import wraps
def require_challenge(suspicion_threshold=0.5):
"""
Require CAPTCHA or proof-of-work for suspicious requests.
"""
def decorator(func):
@wraps(func)
def wrapper(request, *args, **kwargs):
suspicion = behavioral_analyzer.analyze_request(request)
if suspicion > suspicion_threshold:
# Check if they've solved recent challenge
challenge_token = request.headers.get('X-Challenge-Token')
if not verify_challenge(challenge_token):
return {
'status': 'challenge_required',
'challenge_url': '/api/challenge',
'challenge_type': 'captcha' if suspicion > 0.8 else 'pow'
}, 429
return func(request, *args, **kwargs)
return wrapper
return decorator
# Proof of Work challenge
def generate_pow_challenge():
"""
Client must find a nonce such that SHA256(nonce + challenge) starts with N zeros.
Takes ~100ms of computation - bots can't do this at scale.
"""
challenge = secrets.token_hex(16)
difficulty = 4 # Require 4 leading zeros
return {'challenge': challenge, 'difficulty': difficulty}
def verify_pow(challenge, nonce, difficulty):
result = hashlib.sha256(f"{nonce}{challenge}".encode()).hexdigest()
return result.startswith('0' * difficulty)
Layer 5: ASN and IP Reputation
import ipaddress
class IPReputationChecker:
def __init__(self):
# Known bad ASNs (data centers, VPN providers)
self.suspicious_asns = {
'AS14061': 'DigitalOcean',
'AS16276': 'OVH',
'AS14618': 'AWS',
'AS15169': 'Google Cloud',
# ... more data center ASNs
}
# Residential proxy detection (based on IP history)
self.known_residential_proxies = set() # From threat intel feeds
def get_risk_score(self, ip: str) -> float:
"""
Return risk score 0-1 for an IP address.
"""
ip_info = self.lookup_ip(ip)
score = 0.0
# Data center IP (not residential)
if ip_info['asn'] in self.suspicious_asns:
score += 0.3
# Known VPN/Proxy
if ip_info['is_vpn'] or ip_info['is_proxy']:
score += 0.4
# Recently seen in attacks (threat intel)
if ip in self.known_malicious_ips:
score += 0.5
# Residential proxy (sophisticated attacker)
if ip in self.known_residential_proxies:
score += 0.3
# New IP (no history with our service)
if self.get_ip_history(ip) is None:
score += 0.1
return min(score, 1.0)
Rate Limiting Architecture
Response Strategies
| Suspicion Level | Response |
|---|---|
| Low (0-0.3) | Normal request processing |
| Medium (0.3-0.5) | Soft rate limit (slower limits) |
| High (0.5-0.7) | Require proof-of-work |
| Very High (0.7-0.9) | Require CAPTCHA |
| Critical (0.9+) | Block + alert security team |
Key Takeaways
- Multi-dimensional limits - IP alone is not enough
- Fingerprint the client - JA3, device fingerprint harder to rotate
- Analyze behavior - Timing patterns reveal bots
- Graduated response - Don't show your hand immediately
- Rate limit the target - Limit attempts per account, not just per IP
- Use proof-of-work - Makes attacks economically unfeasible
Remember: Security is about raising the cost of attack above the value of the target. Perfect security doesn't exist.