0% read
Skip to main content
API Rate Limiting and Throttling - Implementation Strategies for Production Systems

API Rate Limiting and Throttling - Implementation Strategies for Production Systems

Master API rate limiting with token bucket, leaky bucket, and sliding window algorithms. Learn Redis implementation, distributed rate limiting, and production strategies for scalable APIs.

S
StaticBlock Editorial
21 min read

Introduction

API rate limiting and throttling are essential mechanisms for protecting backend services from abuse, ensuring fair resource distribution, and maintaining system stability under load. As APIs become the backbone of modern applications, implementing effective rate limiting strategies becomes critical for production systems serving millions of requests.

This comprehensive guide explores rate limiting algorithms, implementation patterns with Redis, distributed rate limiting strategies, and production best practices from companies like Stripe, GitHub, and Twitter.

Understanding Rate Limiting vs Throttling

Rate Limiting

Rate limiting restricts the number of requests a client can make within a specific time window:

// Simple in-memory rate limiter
class RateLimiter {
  constructor(maxRequests, windowMs) {
    this.maxRequests = maxRequests;
    this.windowMs = windowMs;
    this.requests = new Map();
  }

isAllowed(clientId) { const now = Date.now(); const clientRequests = this.requests.get(clientId) || [];

// Remove expired requests
const validRequests = clientRequests.filter(
  timestamp => now - timestamp < this.windowMs
);

if (validRequests.length >= this.maxRequests) {
  return false;
}

validRequests.push(now);
this.requests.set(clientId, validRequests);
return true;

} }

// Usage: 100 requests per minute const limiter = new RateLimiter(100, 60000);

app.use((req, res, next) => { const clientId = req.ip;

if (!limiter.isAllowed(clientId)) { return res.status(429).json({ error: 'Too Many Requests', retryAfter: 60 }); }

next(); });

Throttling

Throttling controls the rate of request processing, smoothing traffic spikes:

// Request queue with throttling
class RequestThrottler {
  constructor(requestsPerSecond) {
    this.requestsPerSecond = requestsPerSecond;
    this.interval = 1000 / requestsPerSecond;
    this.queue = [];
    this.lastProcessed = 0;
  }

async throttle(fn) { return new Promise((resolve) => { this.queue.push({ fn, resolve }); this.processQueue(); }); }

processQueue() { if (this.queue.length === 0) return;

const now = Date.now();
const timeSinceLastRequest = now - this.lastProcessed;

if (timeSinceLastRequest >= this.interval) {
  const { fn, resolve } = this.queue.shift();
  this.lastProcessed = now;

  fn().then(resolve);

  if (this.queue.length > 0) {
    setTimeout(() => this.processQueue(), this.interval);
  }
} else {
  setTimeout(
    () => this.processQueue(),
    this.interval - timeSinceLastRequest
  );
}

} }

// Usage: 10 requests per second const throttler = new RequestThrottler(10);

async function makeAPICall(data) { return throttler.throttle(async () => { const response = await fetch('/api/endpoint', { method: 'POST', body: JSON.stringify(data) }); return response.json(); }); }

Rate Limiting Algorithms

1. Token Bucket Algorithm

The token bucket algorithm provides smooth rate limiting with burst capacity:

class TokenBucket {
  constructor(capacity, refillRate) {
    this.capacity = capacity;        // Maximum tokens
    this.tokens = capacity;          // Current tokens
    this.refillRate = refillRate;    // Tokens per second
    this.lastRefill = Date.now();
  }

refill() { const now = Date.now(); const timePassed = (now - this.lastRefill) / 1000; const tokensToAdd = timePassed * this.refillRate;

this.tokens = Math.min(
  this.capacity,
  this.tokens + tokensToAdd
);
this.lastRefill = now;

}

consume(tokens = 1) { this.refill();

if (this.tokens >= tokens) {
  this.tokens -= tokens;
  return true;
}

return false;

}

getWaitTime(tokens = 1) { this.refill();

if (this.tokens >= tokens) {
  return 0;
}

const tokensNeeded = tokens - this.tokens;
return (tokensNeeded / this.refillRate) * 1000;

} }

// Express middleware with token bucket function createTokenBucketMiddleware(capacity, refillRate) { const buckets = new Map();

return (req, res, next) => { const clientId = req.ip;

if (!buckets.has(clientId)) {
  buckets.set(clientId, new TokenBucket(capacity, refillRate));
}

const bucket = buckets.get(clientId);

if (bucket.consume()) {
  res.setHeader('X-RateLimit-Limit', capacity);
  res.setHeader('X-RateLimit-Remaining', Math.floor(bucket.tokens));
  next();
} else {
  const waitTime = Math.ceil(bucket.getWaitTime() / 1000);
  res.setHeader('Retry-After', waitTime);
  res.status(429).json({
    error: 'Rate limit exceeded',
    retryAfter: waitTime
  });
}

}; }

// Usage: 100 token capacity, refill 10 tokens/second app.use(createTokenBucketMiddleware(100, 10));

2. Leaky Bucket Algorithm

The leaky bucket algorithm enforces a constant output rate:

class LeakyBucket {
  constructor(capacity, leakRate) {
    this.capacity = capacity;        // Bucket size
    this.queue = [];                 // Request queue
    this.leakRate = leakRate;        // Requests per second
    this.leaking = false;
  }

async add(request) { if (this.queue.length >= this.capacity) { throw new Error('Bucket overflow - request rejected'); }

return new Promise((resolve, reject) => {
  this.queue.push({ request, resolve, reject });
  this.startLeaking();
});

}

startLeaking() { if (this.leaking) return; this.leaking = true; this.leak(); }

async leak() { if (this.queue.length === 0) { this.leaking = false; return; }

const { request, resolve, reject } = this.queue.shift();

try {
  const result = await request();
  resolve(result);
} catch (error) {
  reject(error);
}

setTimeout(() => this.leak(), 1000 / this.leakRate);

} }

// Usage with API client class RateLimitedAPIClient { constructor(baseURL, requestsPerSecond) { this.baseURL = baseURL; this.bucket = new LeakyBucket(100, requestsPerSecond); }

async request(endpoint, options) { return this.bucket.add(async () => { const response = await fetch(${this.baseURL}${endpoint}, options); return response.json(); }); } }

const client = new RateLimitedAPIClient('https://api.example.com', 10);

// All requests automatically rate limited const data1 = await client.request('/users', { method: 'GET' }); const data2 = await client.request('/posts', { method: 'GET' });

3. Sliding Window Algorithm

The sliding window algorithm provides accurate rate limiting without boundary issues:

class SlidingWindowRateLimiter {
  constructor(maxRequests, windowMs) {
    this.maxRequests = maxRequests;
    this.windowMs = windowMs;
    this.requests = new Map();
  }

isAllowed(clientId) { const now = Date.now(); const windowStart = now - this.windowMs;

if (!this.requests.has(clientId)) {
  this.requests.set(clientId, []);
}

const clientRequests = this.requests.get(clientId);

// Remove requests outside the sliding window
const validRequests = clientRequests.filter(
  timestamp => timestamp > windowStart
);

if (validRequests.length >= this.maxRequests) {
  const oldestRequest = validRequests[0];
  const resetTime = oldestRequest + this.windowMs - now;

  return {
    allowed: false,
    resetTime: Math.ceil(resetTime / 1000),
    remaining: 0
  };
}

validRequests.push(now);
this.requests.set(clientId, validRequests);

return {
  allowed: true,
  remaining: this.maxRequests - validRequests.length,
  resetTime: Math.ceil(this.windowMs / 1000)
};

} }

// Express middleware function slidingWindowMiddleware(maxRequests, windowMs) { const limiter = new SlidingWindowRateLimiter(maxRequests, windowMs);

return (req, res, next) => { const clientId = req.headers['x-api-key'] || req.ip; const result = limiter.isAllowed(clientId);

res.setHeader('X-RateLimit-Limit', maxRequests);
res.setHeader('X-RateLimit-Remaining', result.remaining);
res.setHeader('X-RateLimit-Reset', result.resetTime);

if (!result.allowed) {
  res.setHeader('Retry-After', result.resetTime);
  return res.status(429).json({
    error: 'Rate limit exceeded',
    retryAfter: result.resetTime
  });
}

next();

}; }

app.use(slidingWindowMiddleware(100, 60000)); // 100 req/min

Redis-Based Rate Limiting

Distributed Rate Limiting with Redis

Redis provides atomic operations ideal for distributed rate limiting:

const Redis = require('ioredis');
const redis = new Redis();

class RedisRateLimiter { constructor(redis, maxRequests, windowSeconds)

async isAllowed(key) { const now = Date.now(); const windowStart = now - (this.windowSeconds * 1000);

const multi = this.redis.multi();

// Remove old entries
multi.zremrangebyscore(key, 0, windowStart);

// Count requests in current window
multi.zcard(key);

// Add current request
multi.zadd(key, now, `${now}-${Math.random()}`);

// Set expiry
multi.expire(key, this.windowSeconds);

const results = await multi.exec();
const count = results[1][1];

return {
  allowed: count < this.maxRequests,
  remaining: Math.max(0, this.maxRequests - count - 1),
  resetTime: this.windowSeconds
};

} }

// Express middleware async function redisRateLimitMiddleware(req, res, next) { const limiter = new RedisRateLimiter(redis, 100, 60); const clientId = req.headers['x-api-key'] || req.ip; const key = rate_limit:${clientId};

const result = await limiter.isAllowed(key);

res.setHeader('X-RateLimit-Limit', 100); res.setHeader('X-RateLimit-Remaining', result.remaining); res.setHeader('X-RateLimit-Reset', result.resetTime);

if (!result.allowed) { return res.status(429).json({ error: 'Rate limit exceeded', retryAfter: result.resetTime }); }

next(); }

Token Bucket with Redis

Implement distributed token bucket using Redis:

class RedisTokenBucket {
  constructor(redis, key, capacity, refillRate) {
    this.redis = redis;
    this.key = key;
    this.capacity = capacity;
    this.refillRate = refillRate;
  }

async consume(tokens = 1) { const script = ` local key = KEYS[1] local capacity = tonumber(ARGV[1]) local refill_rate = tonumber(ARGV[2]) local tokens_requested = tonumber(ARGV[3]) local now = tonumber(ARGV[4])

  local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
  local tokens = tonumber(bucket[1]) or capacity
  local last_refill = tonumber(bucket[2]) or now

  -- Calculate refill
  local time_passed = (now - last_refill) / 1000
  local tokens_to_add = time_passed * refill_rate
  tokens = math.min(capacity, tokens + tokens_to_add)

  -- Try to consume
  if tokens >= tokens_requested then
    tokens = tokens - tokens_requested
    redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)
    redis.call('EXPIRE', key, 3600)
    return {1, math.floor(tokens)}
  else
    redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)
    redis.call('EXPIRE', key, 3600)
    return {0, math.floor(tokens)}
  end
`;

const result = await this.redis.eval(
  script,
  1,
  this.key,
  this.capacity,
  this.refillRate,
  tokens,
  Date.now()
);

return {
  allowed: result[0] === 1,
  remaining: result[1]
};

} }

// Usage async function handleRequest(req, res) { const bucket = new RedisTokenBucket( redis, token_bucket:${req.ip}, 100, // capacity 10 // refill 10 tokens/second );

const result = await bucket.consume(1);

if (!result.allowed) { return res.status(429).json({ error: 'Rate limit exceeded', remaining: result.remaining }); }

res.setHeader('X-RateLimit-Remaining', result.remaining); // Process request }

Advanced Rate Limiting Patterns

Tiered Rate Limiting

Different limits for different user tiers:

class TieredRateLimiter {
  constructor(redis) {
    this.redis = redis;
    this.tiers = {
      free: { requests: 100, window: 3600 },
      pro: { requests: 1000, window: 3600 },
      enterprise: { requests: 10000, window: 3600 }
    };
  }

async getUserTier(userId) { const tier = await this.redis.get(user:${userId}:tier); return tier || 'free'; }

async isAllowed(userId) { const tier = await this.getUserTier(userId); const limits = this.tiers[tier];

const key = `rate_limit:${tier}:${userId}`;
const count = await this.redis.incr(key);

if (count === 1) {
  await this.redis.expire(key, limits.window);
}

return {
  allowed: count <= limits.requests,
  remaining: Math.max(0, limits.requests - count),
  limit: limits.requests,
  tier
};

} }

// Middleware async function tieredRateLimitMiddleware(req, res, next) { const limiter = new TieredRateLimiter(redis); const userId = req.user?.id || req.ip;

const result = await limiter.isAllowed(userId);

res.setHeader('X-RateLimit-Limit', result.limit); res.setHeader('X-RateLimit-Remaining', result.remaining); res.setHeader('X-RateLimit-Tier', result.tier);

if (!result.allowed) { return res.status(429).json({ error: 'Rate limit exceeded', tier: result.tier, upgradeUrl: '/pricing' }); }

next(); }

Cost-Based Rate Limiting

Different endpoints consume different amounts of quota:

class CostBasedRateLimiter {
  constructor(redis, quotaPerHour) {
    this.redis = redis;
    this.quotaPerHour = quotaPerHour;
    this.costs = {
      'GET /users': 1,
      'POST /users': 5,
      'GET /reports': 10,
      'POST /reports': 50
    };
  }

getCost(method, path) { const key = ${method} ${path}; return this.costs[key] || 1; }

async consumeQuota(userId, cost) { const key = quota:${userId}; const hour = Math.floor(Date.now() / 3600000); const hourKey = ${key}:${hour};

const used = await this.redis.incrby(hourKey, cost);
await this.redis.expire(hourKey, 3600);

return {
  allowed: used <= this.quotaPerHour,
  used,
  remaining: Math.max(0, this.quotaPerHour - used),
  quota: this.quotaPerHour
};

} }

// Middleware async function costBasedRateLimitMiddleware(req, res, next) { const limiter = new CostBasedRateLimiter(redis, 1000); const cost = limiter.getCost(req.method, req.path); const userId = req.user?.id || req.ip;

const result = await limiter.consumeQuota(userId, cost);

res.setHeader('X-RateLimit-Quota', result.quota); res.setHeader('X-RateLimit-Used', result.used); res.setHeader('X-RateLimit-Remaining', result.remaining); res.setHeader('X-RateLimit-Cost', cost);

if (!result.allowed) { return res.status(429).json({ error: 'Quota exceeded', cost, remaining: result.remaining }); }

next(); }

Dynamic Rate Limiting

Adjust limits based on system load:

class DynamicRateLimiter {
  constructor(redis) {
    this.redis = redis;
    this.baseLimit = 1000;
    this.minLimit = 100;
  }

async getSystemLoad() { // Get current system metrics const [cpu, memory, responseTime] = await Promise.all([ this.redis.get('metrics:cpu'), this.redis.get('metrics:memory'), this.redis.get('metrics:response_time') ]);

return {
  cpu: parseFloat(cpu) || 0,
  memory: parseFloat(memory) || 0,
  responseTime: parseFloat(responseTime) || 0
};

}

calculateLimit(load) { // Reduce limit as system load increases let multiplier = 1.0;

if (load.cpu > 80 || load.memory > 80) {
  multiplier = 0.5;
} else if (load.cpu > 60 || load.memory > 60) {
  multiplier = 0.75;
}

if (load.responseTime > 1000) {
  multiplier *= 0.5;
}

return Math.max(
  this.minLimit,
  Math.floor(this.baseLimit * multiplier)
);

}

async isAllowed(userId) { const load = await this.getSystemLoad(); const currentLimit = this.calculateLimit(load);

const key = `rate_limit:dynamic:${userId}`;
const count = await this.redis.incr(key);

if (count === 1) {
  await this.redis.expire(key, 60);
}

return {
  allowed: count <= currentLimit,
  remaining: Math.max(0, currentLimit - count),
  limit: currentLimit,
  load
};

} }

Production Best Practices

Comprehensive Rate Limiting Strategy

class ProductionRateLimiter {
  constructor(redis) {
    this.redis = redis;
  }

async checkLimits(req) { const userId = req.user?.id || req.ip; const checks = [];

// 1. Per-second burst protection
checks.push(
  this.checkLimit(`burst:${userId}`, 10, 1)
);

// 2. Per-minute sustained rate
checks.push(
  this.checkLimit(`minute:${userId}`, 100, 60)
);

// 3. Per-hour quota
checks.push(
  this.checkLimit(`hour:${userId}`, 1000, 3600)
);

// 4. Per-day quota
checks.push(
  this.checkLimit(`day:${userId}`, 10000, 86400)
);

const results = await Promise.all(checks);
const blocked = results.find(r => !r.allowed);

return blocked || results[results.length - 1];

}

async checkLimit(key, limit, windowSeconds) { const count = await this.redis.incr(key);

if (count === 1) {
  await this.redis.expire(key, windowSeconds);
}

const ttl = await this.redis.ttl(key);

return {
  allowed: count <= limit,
  remaining: Math.max(0, limit - count),
  resetTime: ttl,
  limitType: key.split(':')[0]
};

} }

// Middleware with detailed headers async function comprehensiveRateLimitMiddleware(req, res, next) { const limiter = new ProductionRateLimiter(redis); const result = await limiter.checkLimits(req);

res.setHeader('X-RateLimit-Remaining', result.remaining); res.setHeader('X-RateLimit-Reset', result.resetTime); res.setHeader('X-RateLimit-Type', result.limitType);

if (!result.allowed) { res.setHeader('Retry-After', result.resetTime); return res.status(429).json({ error: 'Rate limit exceeded', type: result.limitType, retryAfter: result.resetTime, documentation: 'https://api.example.com/docs/rate-limits' }); }

next(); }

Graceful Degradation

class GracefulRateLimiter {
  constructor(redis, fallbackLimiter) {
    this.redis = redis;
    this.fallbackLimiter = fallbackLimiter;
    this.circuitBreaker = {
      failures: 0,
      threshold: 5,
      timeout: 30000,
      state: 'closed',
      nextAttempt: 0
    };
  }

async isAllowed(clientId) { // Use fallback if circuit is open if (this.circuitBreaker.state === 'open') { if (Date.now() < this.circuitBreaker.nextAttempt) { return this.fallbackLimiter.isAllowed(clientId); } this.circuitBreaker.state = 'half-open'; }

try {
  const result = await this.checkRedisLimit(clientId);

  // Success - reset circuit breaker
  if (this.circuitBreaker.state === 'half-open') {
    this.circuitBreaker.state = 'closed';
    this.circuitBreaker.failures = 0;
  }

  return result;
} catch (error) {
  console.error('Redis rate limiter error:', error);

  this.circuitBreaker.failures++;

  if (this.circuitBreaker.failures &gt;= this.circuitBreaker.threshold) {
    this.circuitBreaker.state = 'open';
    this.circuitBreaker.nextAttempt =
      Date.now() + this.circuitBreaker.timeout;
  }

  // Fallback to in-memory limiter
  return this.fallbackLimiter.isAllowed(clientId);
}

}

async checkRedisLimit(clientId) { const key = rate_limit:${clientId}; const count = await this.redis.incr(key);

if (count === 1) {
  await this.redis.expire(key, 60);
}

return {
  allowed: count &lt;= 100,
  remaining: Math.max(0, 100 - count)
};

} }

Real-World Examples

Stripe's Rate Limiting Strategy

Stripe uses multiple rate limiting layers:

// Simulated Stripe-style rate limiter
class StripeStyleRateLimiter {
  constructor(redis) {
    this.redis = redis;
  }

async checkLimits(apiKey) { const limits = [ // Burst protection { window: 1, limit: 25, key: 'second' }, // Standard rate { window: 60, limit: 100, key: 'minute' }, // Extended window { window: 3600, limit: 1000, key: 'hour' } ];

for (const { window, limit, key } of limits) {
  const result = await this.checkWindow(
    apiKey,
    key,
    limit,
    window
  );

  if (!result.allowed) {
    return result;
  }
}

return { allowed: true };

}

async checkWindow(apiKey, window, limit, seconds) { const key = stripe_limit:${apiKey}:${window}; const now = Date.now(); const windowStart = now - (seconds * 1000);

await this.redis.zremrangebyscore(key, 0, windowStart);
const count = await this.redis.zcard(key);

if (count &gt;= limit) {
  const ttl = await this.redis.ttl(key);
  return {
    allowed: false,
    window,
    retryAfter: ttl
  };
}

await this.redis.zadd(key, now, `${now}-${Math.random()}`);
await this.redis.expire(key, seconds);

return {
  allowed: true,
  remaining: limit - count - 1
};

} }

GitHub's Abuse Detection

GitHub combines rate limiting with abuse detection:

class GitHubStyleRateLimiter {
  constructor(redis) {
    this.redis = redis;
  }

async checkRequest(userId, endpoint) { // Standard rate limit const rateLimit = await this.checkRateLimit(userId);

// Abuse detection
const abuseScore = await this.calculateAbuseScore(userId, endpoint);

if (abuseScore &gt; 100) {
  await this.flagForReview(userId);
  return {
    allowed: false,
    reason: 'abuse_detected',
    retryAfter: 3600
  };
}

return rateLimit;

}

async calculateAbuseScore(userId, endpoint) { const patterns = [ // Rapid endpoint scanning this.checkEndpointDiversity(userId), // Failed authentication attempts this.checkFailedAuth(userId), // Suspicious patterns this.checkAccessPatterns(userId) ];

const scores = await Promise.all(patterns);
return scores.reduce((sum, score) =&gt; sum + score, 0);

}

async checkEndpointDiversity(userId) { const key = endpoints:${userId}; const endpoints = await this.redis.smembers(key);

// Accessing many different endpoints quickly is suspicious
return endpoints.length &gt; 50 ? 50 : 0;

}

async checkFailedAuth(userId) { const key = failed_auth:${userId}; const failures = await this.redis.get(key);

return parseInt(failures || 0) * 10;

}

async flagForReview(userId) { await this.redis.setex(flagged:${userId}, 86400, '1'); // Alert security team console.log(User ${userId} flagged for abuse review); } }

Monitoring and Observability

Rate Limit Metrics

class RateLimitMonitoring {
  constructor(redis, metrics) {
    this.redis = redis;
    this.metrics = metrics;
  }

async recordRateLimitEvent(userId, endpoint, result) { // Record metrics this.metrics.increment('rate_limit.requests', { user: userId, endpoint, result: result.allowed ? 'allowed' : 'blocked' });

if (!result.allowed) {
  this.metrics.increment('rate_limit.blocked', {
    user: userId,
    endpoint,
    reason: result.limitType
  });
}

// Store for analysis
await this.redis.zadd(
  'rate_limit:events',
  Date.now(),
  JSON.stringify({
    userId,
    endpoint,
    result,
    timestamp: Date.now()
  })
);

// Trim old events
const oneDayAgo = Date.now() - 86400000;
await this.redis.zremrangebyscore(
  'rate_limit:events',
  0,
  oneDayAgo
);

}

async getTopBlockedUsers(limit = 10) { const events = await this.redis.zrange( 'rate_limit:events', 0, -1 );

const userCounts = {};

events.forEach(event =&gt; {
  const { userId, result } = JSON.parse(event);
  if (!result.allowed) {
    userCounts[userId] = (userCounts[userId] || 0) + 1;
  }
});

return Object.entries(userCounts)
  .sort((a, b) =&gt; b[1] - a[1])
  .slice(0, limit);

} }

Conclusion

Effective API rate limiting and throttling protect your services from abuse, ensure fair resource distribution, and maintain system stability. Choose the right algorithm for your use case, implement distributed rate limiting with Redis for scalability, and monitor rate limit metrics to optimize limits over time.

Key takeaways:

  • Use token bucket for burst capacity, leaky bucket for constant rate, sliding window for accuracy
  • Implement distributed rate limiting with Redis for multi-instance deployments
  • Apply tiered and cost-based rate limiting for different user levels
  • Monitor rate limit metrics and adjust limits based on system load
  • Implement graceful degradation with fallback limiters
  • Provide clear error messages with Retry-After headers

Production systems like Stripe process 100+ million rate limit checks per day with sub-millisecond latency using Redis-based distributed rate limiting, while GitHub prevents abuse by combining rate limits with behavioral analysis and anomaly detection.

Found this helpful? Share it!

Related Articles

S

Written by StaticBlock Editorial

StaticBlock Editorial is a technical writer and software engineer specializing in web development, performance optimization, and developer tooling.