add balance/mean-reversion signal and Cloudflare visitor logging
Balance signal (15% weight) favors under-represented chairs over last 50 games. Visitor middleware captures real IPs from CF headers, batched into ClickHouse with 90-day TTL.
This commit is contained in:
65
app/db.py
65
app/db.py
@@ -61,6 +61,15 @@ def run_migrations():
|
||||
)
|
||||
client.insert("_migrations", [["swap_ac_chairs"]], column_names=["name"])
|
||||
log.info("Migration swap_ac_chairs applied")
|
||||
# Ensure visitors table exists (for existing deployments)
|
||||
client.command(
|
||||
"CREATE TABLE IF NOT EXISTS visitors ("
|
||||
" ip String, country String, path String, method String,"
|
||||
" user_agent String, referer String, accept_lang String,"
|
||||
" created_at DateTime DEFAULT now()"
|
||||
") ENGINE = MergeTree() ORDER BY (created_at, ip)"
|
||||
" TTL created_at + INTERVAL 90 DAY"
|
||||
)
|
||||
_migrations_applied = True
|
||||
|
||||
|
||||
@@ -143,6 +152,31 @@ def upsert_user(user: dict):
|
||||
)
|
||||
|
||||
|
||||
@_with_lock
|
||||
def insert_visitors(batch: list[dict]):
|
||||
"""Bulk insert visitor records."""
|
||||
if not batch:
|
||||
return
|
||||
client = get_client()
|
||||
rows = [
|
||||
[
|
||||
v.get("ip", ""),
|
||||
v.get("country", ""),
|
||||
v.get("path", ""),
|
||||
v.get("method", ""),
|
||||
v.get("user_agent", ""),
|
||||
v.get("referer", ""),
|
||||
v.get("accept_lang", ""),
|
||||
]
|
||||
for v in batch
|
||||
]
|
||||
client.insert(
|
||||
"visitors",
|
||||
rows,
|
||||
column_names=["ip", "country", "path", "method", "user_agent", "referer", "accept_lang"],
|
||||
)
|
||||
|
||||
|
||||
@_with_lock
|
||||
def get_recent_games(n: int = 50) -> list[dict]:
|
||||
"""Get last N completed games."""
|
||||
@@ -1000,8 +1034,17 @@ def _bayesian_prediction(winners, markov1, markov2):
|
||||
else:
|
||||
streak = {c: 1 / 3 for c in CHAIR_LABELS}
|
||||
|
||||
weights = {"base_rate": 0.20, "markov_1": 0.30, "markov_2": 0.25, "recent_20": 0.15, "streak": 0.10}
|
||||
signals = {"base_rate": base, "markov_1": m1, "markov_2": m2, "recent_20": rec, "streak": streak}
|
||||
# Signal 6: Balance / Mean Reversion — 15%
|
||||
# Look at last 50 games, invert frequencies to favor under-represented chairs
|
||||
window = min(50, len(winners))
|
||||
recent_50 = winners[-window:]
|
||||
freq = {c: recent_50.count(c) / window for c in CHAIR_LABELS}
|
||||
balance = {c: max(0.01, 2 / 3 - freq[c]) for c in CHAIR_LABELS}
|
||||
bal_total = sum(balance.values())
|
||||
balance = {c: balance[c] / bal_total for c in CHAIR_LABELS}
|
||||
|
||||
weights = {"base_rate": 0.15, "markov_1": 0.25, "markov_2": 0.25, "recent_20": 0.10, "streak": 0.10, "balance": 0.15}
|
||||
signals = {"base_rate": base, "markov_1": m1, "markov_2": m2, "recent_20": rec, "streak": streak, "balance": balance}
|
||||
|
||||
combined = {c: 0 for c in CHAIR_LABELS}
|
||||
for sig_name, weight in weights.items():
|
||||
@@ -1122,7 +1165,7 @@ def _backtest_theories(winners):
|
||||
if len(winners) <= warmup:
|
||||
return {"error": "Not enough data for backtesting"}
|
||||
|
||||
theories = ["base_rate", "markov_1", "markov_2", "recent_20", "streak", "combined"]
|
||||
theories = ["base_rate", "markov_1", "markov_2", "recent_20", "streak", "balance", "combined"]
|
||||
full_hits = {t: 0 for t in theories}
|
||||
semi_hits = {t: 0 for t in theories}
|
||||
total_tested = 0
|
||||
@@ -1176,10 +1219,19 @@ def _backtest_theories(winners):
|
||||
streak_probs = {c: 1 / 3 for c in CHAIR_LABELS}
|
||||
streak_ranked = sorted(CHAIR_LABELS, key=lambda c: streak_probs[c], reverse=True)
|
||||
|
||||
# Balance / Mean Reversion
|
||||
bal_window = min(50, len(history))
|
||||
bal_recent = history[-bal_window:]
|
||||
bal_freq = {c: bal_recent.count(c) / bal_window for c in CHAIR_LABELS}
|
||||
bal_probs = {c: max(0.01, 2 / 3 - bal_freq[c]) for c in CHAIR_LABELS}
|
||||
bal_t = sum(bal_probs.values())
|
||||
bal_probs = {c: bal_probs[c] / bal_t for c in CHAIR_LABELS}
|
||||
bal_ranked = sorted(CHAIR_LABELS, key=lambda c: bal_probs[c], reverse=True)
|
||||
|
||||
# Combined Bayesian
|
||||
combined = {c: 0 for c in CHAIR_LABELS}
|
||||
weights = {"base_rate": 0.20, "markov_1": 0.30, "markov_2": 0.25, "recent_20": 0.15, "streak": 0.10}
|
||||
signals = {"base_rate": base, "markov_1": m1_probs, "markov_2": m2_probs, "recent_20": rec, "streak": streak_probs}
|
||||
weights = {"base_rate": 0.15, "markov_1": 0.25, "markov_2": 0.25, "recent_20": 0.10, "streak": 0.10, "balance": 0.15}
|
||||
signals = {"base_rate": base, "markov_1": m1_probs, "markov_2": m2_probs, "recent_20": rec, "streak": streak_probs, "balance": bal_probs}
|
||||
for sig_name, weight in weights.items():
|
||||
for c in CHAIR_LABELS:
|
||||
combined[c] += weight * signals[sig_name].get(c, 1 / 3)
|
||||
@@ -1187,7 +1239,8 @@ def _backtest_theories(winners):
|
||||
|
||||
ranked = {
|
||||
"base_rate": base_ranked, "markov_1": m1_ranked, "markov_2": m2_ranked,
|
||||
"recent_20": rec_ranked, "streak": streak_ranked, "combined": combined_ranked,
|
||||
"recent_20": rec_ranked, "streak": streak_ranked, "balance": bal_ranked,
|
||||
"combined": combined_ranked,
|
||||
}
|
||||
for t in theories:
|
||||
pick = ranked[t][0]
|
||||
|
||||
@@ -30,7 +30,65 @@ class WebServer:
|
||||
def __init__(self):
|
||||
self.app = web.Application()
|
||||
self.clients: set[web.WebSocketResponse] = set()
|
||||
self._visitor_buffer: list[dict] = []
|
||||
self._visitor_lock = asyncio.Lock()
|
||||
self._setup_routes()
|
||||
self.app.middlewares.append(self._make_visitor_middleware())
|
||||
|
||||
def _make_visitor_middleware(self):
|
||||
server = self
|
||||
|
||||
@web.middleware
|
||||
async def visitor_middleware(request: web.Request, handler):
|
||||
response = await handler(request)
|
||||
path = request.path
|
||||
# Skip static files and WebSocket upgrades
|
||||
if path.startswith("/static/") or request.headers.get("Upgrade", "").lower() == "websocket":
|
||||
return response
|
||||
ip = (
|
||||
request.headers.get("CF-Connecting-IP")
|
||||
or (request.headers.get("X-Forwarded-For", "").split(",")[0].strip())
|
||||
or request.remote
|
||||
or ""
|
||||
)
|
||||
visitor = {
|
||||
"ip": ip,
|
||||
"country": request.headers.get("CF-IPCountry", ""),
|
||||
"path": path,
|
||||
"method": request.method,
|
||||
"user_agent": request.headers.get("User-Agent", ""),
|
||||
"referer": request.headers.get("Referer", ""),
|
||||
"accept_lang": request.headers.get("Accept-Language", ""),
|
||||
}
|
||||
batch = None
|
||||
async with server._visitor_lock:
|
||||
server._visitor_buffer.append(visitor)
|
||||
if len(server._visitor_buffer) >= 20:
|
||||
batch = server._visitor_buffer[:]
|
||||
server._visitor_buffer.clear()
|
||||
if batch:
|
||||
try:
|
||||
await _run_sync(db.insert_visitors, batch)
|
||||
except Exception as e:
|
||||
log.warning("Visitor insert failed: %s", e)
|
||||
return response
|
||||
|
||||
return visitor_middleware
|
||||
|
||||
async def _flush_visitors(self):
|
||||
"""Periodically flush visitor buffer so low-traffic visits aren't lost."""
|
||||
while True:
|
||||
await asyncio.sleep(30)
|
||||
batch = None
|
||||
async with self._visitor_lock:
|
||||
if self._visitor_buffer:
|
||||
batch = self._visitor_buffer[:]
|
||||
self._visitor_buffer.clear()
|
||||
if batch:
|
||||
try:
|
||||
await _run_sync(db.insert_visitors, batch)
|
||||
except Exception as e:
|
||||
log.warning("Visitor flush failed: %s", e)
|
||||
|
||||
def _setup_routes(self):
|
||||
self.app.router.add_get("/", self._handle_index)
|
||||
@@ -219,6 +277,7 @@ class WebServer:
|
||||
site = web.TCPSite(runner, "0.0.0.0", config.WEB_PORT)
|
||||
await site.start()
|
||||
log.info("Web server listening on http://0.0.0.0:%s", config.WEB_PORT)
|
||||
flush_task = asyncio.create_task(self._flush_visitors())
|
||||
# Keep running until cancelled
|
||||
try:
|
||||
while True:
|
||||
@@ -226,4 +285,5 @@ class WebServer:
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
finally:
|
||||
flush_task.cancel()
|
||||
await runner.cleanup()
|
||||
|
||||
Reference in New Issue
Block a user