from limitry import Limitry
from openai import OpenAI
limitry = Limitry()
openai = OpenAI()
# 1. Create customer with Stripe link
limitry.customers.create(
external_id="cust_123",
name="Acme Corp",
metadata={
"stripe_customer_id": "cus_xxx",
"plan": "pro"
},
billing_cycle_start="2024-01-15",
timezone="America/New_York"
)
# 2. Create meters for each model
limitry.meters.create(
name="GPT-4 Tokens",
aggregation="sum",
field="values.tokens",
event_filter={
"event_type": "llm.completion",
"dimensions": {"model": "gpt-4"}
}
)
limitry.meters.create(
name="Claude Tokens",
aggregation="sum",
field="values.tokens",
event_filter={
"event_type": "llm.completion",
"dimensions": {"model": "claude-3"}
}
)
# 3. Create limits with alerts
limitry.limits.create(
customer_id="cust_123",
meter_id="mtr_gpt4",
limit_value=1_000_000, # 1M tokens/month
period="month",
alert_thresholds=[80, 100]
)
# 4. In your API: check → call → record
async def completion(customer_id: str, prompt: str, model: str = "gpt-4"):
# Check limits first
check = limitry.limits.check(customer_id=customer_id)
if not check.allowed:
raise Exception(f"Limit exceeded: {check.limits[0].name}")
# Make the LLM call
response = openai.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}]
)
# Record usage with cost
tokens = response.usage.total_tokens
cost_cents = calculate_cost(model, tokens)
limitry.events.record(
customer_id=customer_id,
event_type="llm.completion",
values={"tokens": tokens, "cost_cents": cost_cents},
dimensions={"model": model}
)
return response.choices[0].message.content