From 86865166ef554f2b96e1a68fcfafe48162d146f0 Mon Sep 17 00:00:00 2001 From: Junaid Saeed Uppal Date: Thu, 26 Feb 2026 09:59:27 +0500 Subject: [PATCH] add balance/mean-reversion signal and Cloudflare visitor logging Balance signal (15% weight) favors under-represented chairs over last 50 games. Visitor middleware captures real IPs from CF headers, batched into ClickHouse with 90-day TTL. --- app/db.py | 65 +++++++++++++++++++++++++++++++++++++---- app/server.py | 60 +++++++++++++++++++++++++++++++++++++ clickhouse/init.sql | 13 +++++++++ static/predictions.html | 6 ++-- 4 files changed, 135 insertions(+), 9 deletions(-) diff --git a/app/db.py b/app/db.py index 49871418a..2cb3349bc 100644 --- a/app/db.py +++ b/app/db.py @@ -61,6 +61,15 @@ def run_migrations(): ) client.insert("_migrations", [["swap_ac_chairs"]], column_names=["name"]) log.info("Migration swap_ac_chairs applied") + # Ensure visitors table exists (for existing deployments) + client.command( + "CREATE TABLE IF NOT EXISTS visitors (" + " ip String, country String, path String, method String," + " user_agent String, referer String, accept_lang String," + " created_at DateTime DEFAULT now()" + ") ENGINE = MergeTree() ORDER BY (created_at, ip)" + " TTL created_at + INTERVAL 90 DAY" + ) _migrations_applied = True @@ -143,6 +152,31 @@ def upsert_user(user: dict): ) +@_with_lock +def insert_visitors(batch: list[dict]): + """Bulk insert visitor records.""" + if not batch: + return + client = get_client() + rows = [ + [ + v.get("ip", ""), + v.get("country", ""), + v.get("path", ""), + v.get("method", ""), + v.get("user_agent", ""), + v.get("referer", ""), + v.get("accept_lang", ""), + ] + for v in batch + ] + client.insert( + "visitors", + rows, + column_names=["ip", "country", "path", "method", "user_agent", "referer", "accept_lang"], + ) + + @_with_lock def get_recent_games(n: int = 50) -> list[dict]: """Get last N completed games.""" @@ -1000,8 +1034,17 @@ def _bayesian_prediction(winners, markov1, markov2): else: streak = {c: 1 / 3 for c in CHAIR_LABELS} - weights = {"base_rate": 0.20, "markov_1": 0.30, "markov_2": 0.25, "recent_20": 0.15, "streak": 0.10} - signals = {"base_rate": base, "markov_1": m1, "markov_2": m2, "recent_20": rec, "streak": streak} + # Signal 6: Balance / Mean Reversion — 15% + # Look at last 50 games, invert frequencies to favor under-represented chairs + window = min(50, len(winners)) + recent_50 = winners[-window:] + freq = {c: recent_50.count(c) / window for c in CHAIR_LABELS} + balance = {c: max(0.01, 2 / 3 - freq[c]) for c in CHAIR_LABELS} + bal_total = sum(balance.values()) + balance = {c: balance[c] / bal_total for c in CHAIR_LABELS} + + weights = {"base_rate": 0.15, "markov_1": 0.25, "markov_2": 0.25, "recent_20": 0.10, "streak": 0.10, "balance": 0.15} + signals = {"base_rate": base, "markov_1": m1, "markov_2": m2, "recent_20": rec, "streak": streak, "balance": balance} combined = {c: 0 for c in CHAIR_LABELS} for sig_name, weight in weights.items(): @@ -1122,7 +1165,7 @@ def _backtest_theories(winners): if len(winners) <= warmup: return {"error": "Not enough data for backtesting"} - theories = ["base_rate", "markov_1", "markov_2", "recent_20", "streak", "combined"] + theories = ["base_rate", "markov_1", "markov_2", "recent_20", "streak", "balance", "combined"] full_hits = {t: 0 for t in theories} semi_hits = {t: 0 for t in theories} total_tested = 0 @@ -1176,10 +1219,19 @@ def _backtest_theories(winners): streak_probs = {c: 1 / 3 for c in CHAIR_LABELS} streak_ranked = sorted(CHAIR_LABELS, key=lambda c: streak_probs[c], reverse=True) + # Balance / Mean Reversion + bal_window = min(50, len(history)) + bal_recent = history[-bal_window:] + bal_freq = {c: bal_recent.count(c) / bal_window for c in CHAIR_LABELS} + bal_probs = {c: max(0.01, 2 / 3 - bal_freq[c]) for c in CHAIR_LABELS} + bal_t = sum(bal_probs.values()) + bal_probs = {c: bal_probs[c] / bal_t for c in CHAIR_LABELS} + bal_ranked = sorted(CHAIR_LABELS, key=lambda c: bal_probs[c], reverse=True) + # Combined Bayesian combined = {c: 0 for c in CHAIR_LABELS} - weights = {"base_rate": 0.20, "markov_1": 0.30, "markov_2": 0.25, "recent_20": 0.15, "streak": 0.10} - signals = {"base_rate": base, "markov_1": m1_probs, "markov_2": m2_probs, "recent_20": rec, "streak": streak_probs} + weights = {"base_rate": 0.15, "markov_1": 0.25, "markov_2": 0.25, "recent_20": 0.10, "streak": 0.10, "balance": 0.15} + signals = {"base_rate": base, "markov_1": m1_probs, "markov_2": m2_probs, "recent_20": rec, "streak": streak_probs, "balance": bal_probs} for sig_name, weight in weights.items(): for c in CHAIR_LABELS: combined[c] += weight * signals[sig_name].get(c, 1 / 3) @@ -1187,7 +1239,8 @@ def _backtest_theories(winners): ranked = { "base_rate": base_ranked, "markov_1": m1_ranked, "markov_2": m2_ranked, - "recent_20": rec_ranked, "streak": streak_ranked, "combined": combined_ranked, + "recent_20": rec_ranked, "streak": streak_ranked, "balance": bal_ranked, + "combined": combined_ranked, } for t in theories: pick = ranked[t][0] diff --git a/app/server.py b/app/server.py index d3187305e..98c3df611 100644 --- a/app/server.py +++ b/app/server.py @@ -30,7 +30,65 @@ class WebServer: def __init__(self): self.app = web.Application() self.clients: set[web.WebSocketResponse] = set() + self._visitor_buffer: list[dict] = [] + self._visitor_lock = asyncio.Lock() self._setup_routes() + self.app.middlewares.append(self._make_visitor_middleware()) + + def _make_visitor_middleware(self): + server = self + + @web.middleware + async def visitor_middleware(request: web.Request, handler): + response = await handler(request) + path = request.path + # Skip static files and WebSocket upgrades + if path.startswith("/static/") or request.headers.get("Upgrade", "").lower() == "websocket": + return response + ip = ( + request.headers.get("CF-Connecting-IP") + or (request.headers.get("X-Forwarded-For", "").split(",")[0].strip()) + or request.remote + or "" + ) + visitor = { + "ip": ip, + "country": request.headers.get("CF-IPCountry", ""), + "path": path, + "method": request.method, + "user_agent": request.headers.get("User-Agent", ""), + "referer": request.headers.get("Referer", ""), + "accept_lang": request.headers.get("Accept-Language", ""), + } + batch = None + async with server._visitor_lock: + server._visitor_buffer.append(visitor) + if len(server._visitor_buffer) >= 20: + batch = server._visitor_buffer[:] + server._visitor_buffer.clear() + if batch: + try: + await _run_sync(db.insert_visitors, batch) + except Exception as e: + log.warning("Visitor insert failed: %s", e) + return response + + return visitor_middleware + + async def _flush_visitors(self): + """Periodically flush visitor buffer so low-traffic visits aren't lost.""" + while True: + await asyncio.sleep(30) + batch = None + async with self._visitor_lock: + if self._visitor_buffer: + batch = self._visitor_buffer[:] + self._visitor_buffer.clear() + if batch: + try: + await _run_sync(db.insert_visitors, batch) + except Exception as e: + log.warning("Visitor flush failed: %s", e) def _setup_routes(self): self.app.router.add_get("/", self._handle_index) @@ -219,6 +277,7 @@ class WebServer: site = web.TCPSite(runner, "0.0.0.0", config.WEB_PORT) await site.start() log.info("Web server listening on http://0.0.0.0:%s", config.WEB_PORT) + flush_task = asyncio.create_task(self._flush_visitors()) # Keep running until cancelled try: while True: @@ -226,4 +285,5 @@ class WebServer: except asyncio.CancelledError: pass finally: + flush_task.cancel() await runner.cleanup() diff --git a/clickhouse/init.sql b/clickhouse/init.sql index 1930ab4ec..41a2aed93 100644 --- a/clickhouse/init.sql +++ b/clickhouse/init.sql @@ -40,3 +40,16 @@ CREATE TABLE IF NOT EXISTS users ( updated_at DateTime DEFAULT now() ) ENGINE = ReplacingMergeTree(updated_at) ORDER BY user_id; + +CREATE TABLE IF NOT EXISTS visitors ( + ip String, + country String, + path String, + method String, + user_agent String, + referer String, + accept_lang String, + created_at DateTime DEFAULT now() +) ENGINE = MergeTree() +ORDER BY (created_at, ip) +TTL created_at + INTERVAL 90 DAY; diff --git a/static/predictions.html b/static/predictions.html index c8292b806..e4bca5249 100644 --- a/static/predictions.html +++ b/static/predictions.html @@ -456,7 +456,7 @@ function renderPrediction(data) { // Signal table const tbody = $('signal-table').querySelector('tbody'); - const sigNames = {'base_rate':'Base Rate','markov_1':'Markov-1','markov_2':'Markov-2','recent_20':'Recent 20','streak':'Streak'}; + const sigNames = {'base_rate':'Base Rate','markov_1':'Markov-1','markov_2':'Markov-2','recent_20':'Recent 20','streak':'Streak','balance':'Balance'}; tbody.innerHTML = Object.entries(data.signals).map(([key, sig]) => `${sigNames[key]||key}${(sig.weight*100).toFixed(0)}%` + CHAIRS.map(c => `${pct(sig.probs[c])}`).join('') + '' @@ -914,7 +914,7 @@ function renderRunsTest(runs) { function renderBacktest(bt) { if (bt.error) { $('backtest-cards').innerHTML = `
${bt.error}
`; return; } - const names = {base_rate:'Base Rate',markov_1:'Markov-1',markov_2:'Markov-2',recent_20:'Recent 20',streak:'Streak',combined:'Combined'}; + const names = {base_rate:'Base Rate',markov_1:'Markov-1',markov_2:'Markov-2',recent_20:'Recent 20',streak:'Streak',balance:'Balance',combined:'Combined'}; $('backtest-cards').innerHTML = Object.entries(bt.accuracy).map(([key, acc]) => { const fh = bt.full_hits?.[key] ?? '?'; const sh = bt.semi_hits?.[key] ?? '?'; @@ -925,7 +925,7 @@ function renderBacktest(bt) { }).join(''); if (bt.rolling_accuracy) { const ctx = $('backtest-chart').getContext('2d'); - const colors = {base_rate:'#8b8fa3',markov_1:'#3b82f6',markov_2:'#ec4899',recent_20:'#f59e0b',streak:'#10b981',combined:'#6c5ce7'}; + const colors = {base_rate:'#8b8fa3',markov_1:'#3b82f6',markov_2:'#ec4899',recent_20:'#f59e0b',streak:'#10b981',balance:'#f472b6',combined:'#6c5ce7'}; const datasets = Object.entries(bt.rolling_accuracy).map(([key, data]) => ({ label: names[key]||key, data, borderColor: colors[key]||'#fff', backgroundColor: 'transparent', borderWidth: key === 'combined' ? 3 : 1.5, pointRadius: 0, tension: 0.3,