add balance/mean-reversion signal and Cloudflare visitor logging

Balance signal (15% weight) favors under-represented chairs over last 50
games. Visitor middleware captures real IPs from CF headers, batched into
ClickHouse with 90-day TTL.
This commit is contained in:
2026-02-26 09:59:27 +05:00
parent 5fd4894599
commit 86865166ef
4 changed files with 135 additions and 9 deletions

View File

@@ -61,6 +61,15 @@ def run_migrations():
) )
client.insert("_migrations", [["swap_ac_chairs"]], column_names=["name"]) client.insert("_migrations", [["swap_ac_chairs"]], column_names=["name"])
log.info("Migration swap_ac_chairs applied") log.info("Migration swap_ac_chairs applied")
# Ensure visitors table exists (for existing deployments)
client.command(
"CREATE TABLE IF NOT EXISTS visitors ("
" ip String, country String, path String, method String,"
" user_agent String, referer String, accept_lang String,"
" created_at DateTime DEFAULT now()"
") ENGINE = MergeTree() ORDER BY (created_at, ip)"
" TTL created_at + INTERVAL 90 DAY"
)
_migrations_applied = True _migrations_applied = True
@@ -143,6 +152,31 @@ def upsert_user(user: dict):
) )
@_with_lock
def insert_visitors(batch: list[dict]):
"""Bulk insert visitor records."""
if not batch:
return
client = get_client()
rows = [
[
v.get("ip", ""),
v.get("country", ""),
v.get("path", ""),
v.get("method", ""),
v.get("user_agent", ""),
v.get("referer", ""),
v.get("accept_lang", ""),
]
for v in batch
]
client.insert(
"visitors",
rows,
column_names=["ip", "country", "path", "method", "user_agent", "referer", "accept_lang"],
)
@_with_lock @_with_lock
def get_recent_games(n: int = 50) -> list[dict]: def get_recent_games(n: int = 50) -> list[dict]:
"""Get last N completed games.""" """Get last N completed games."""
@@ -1000,8 +1034,17 @@ def _bayesian_prediction(winners, markov1, markov2):
else: else:
streak = {c: 1 / 3 for c in CHAIR_LABELS} streak = {c: 1 / 3 for c in CHAIR_LABELS}
weights = {"base_rate": 0.20, "markov_1": 0.30, "markov_2": 0.25, "recent_20": 0.15, "streak": 0.10} # Signal 6: Balance / Mean Reversion — 15%
signals = {"base_rate": base, "markov_1": m1, "markov_2": m2, "recent_20": rec, "streak": streak} # Look at last 50 games, invert frequencies to favor under-represented chairs
window = min(50, len(winners))
recent_50 = winners[-window:]
freq = {c: recent_50.count(c) / window for c in CHAIR_LABELS}
balance = {c: max(0.01, 2 / 3 - freq[c]) for c in CHAIR_LABELS}
bal_total = sum(balance.values())
balance = {c: balance[c] / bal_total for c in CHAIR_LABELS}
weights = {"base_rate": 0.15, "markov_1": 0.25, "markov_2": 0.25, "recent_20": 0.10, "streak": 0.10, "balance": 0.15}
signals = {"base_rate": base, "markov_1": m1, "markov_2": m2, "recent_20": rec, "streak": streak, "balance": balance}
combined = {c: 0 for c in CHAIR_LABELS} combined = {c: 0 for c in CHAIR_LABELS}
for sig_name, weight in weights.items(): for sig_name, weight in weights.items():
@@ -1122,7 +1165,7 @@ def _backtest_theories(winners):
if len(winners) <= warmup: if len(winners) <= warmup:
return {"error": "Not enough data for backtesting"} return {"error": "Not enough data for backtesting"}
theories = ["base_rate", "markov_1", "markov_2", "recent_20", "streak", "combined"] theories = ["base_rate", "markov_1", "markov_2", "recent_20", "streak", "balance", "combined"]
full_hits = {t: 0 for t in theories} full_hits = {t: 0 for t in theories}
semi_hits = {t: 0 for t in theories} semi_hits = {t: 0 for t in theories}
total_tested = 0 total_tested = 0
@@ -1176,10 +1219,19 @@ def _backtest_theories(winners):
streak_probs = {c: 1 / 3 for c in CHAIR_LABELS} streak_probs = {c: 1 / 3 for c in CHAIR_LABELS}
streak_ranked = sorted(CHAIR_LABELS, key=lambda c: streak_probs[c], reverse=True) streak_ranked = sorted(CHAIR_LABELS, key=lambda c: streak_probs[c], reverse=True)
# Balance / Mean Reversion
bal_window = min(50, len(history))
bal_recent = history[-bal_window:]
bal_freq = {c: bal_recent.count(c) / bal_window for c in CHAIR_LABELS}
bal_probs = {c: max(0.01, 2 / 3 - bal_freq[c]) for c in CHAIR_LABELS}
bal_t = sum(bal_probs.values())
bal_probs = {c: bal_probs[c] / bal_t for c in CHAIR_LABELS}
bal_ranked = sorted(CHAIR_LABELS, key=lambda c: bal_probs[c], reverse=True)
# Combined Bayesian # Combined Bayesian
combined = {c: 0 for c in CHAIR_LABELS} combined = {c: 0 for c in CHAIR_LABELS}
weights = {"base_rate": 0.20, "markov_1": 0.30, "markov_2": 0.25, "recent_20": 0.15, "streak": 0.10} weights = {"base_rate": 0.15, "markov_1": 0.25, "markov_2": 0.25, "recent_20": 0.10, "streak": 0.10, "balance": 0.15}
signals = {"base_rate": base, "markov_1": m1_probs, "markov_2": m2_probs, "recent_20": rec, "streak": streak_probs} signals = {"base_rate": base, "markov_1": m1_probs, "markov_2": m2_probs, "recent_20": rec, "streak": streak_probs, "balance": bal_probs}
for sig_name, weight in weights.items(): for sig_name, weight in weights.items():
for c in CHAIR_LABELS: for c in CHAIR_LABELS:
combined[c] += weight * signals[sig_name].get(c, 1 / 3) combined[c] += weight * signals[sig_name].get(c, 1 / 3)
@@ -1187,7 +1239,8 @@ def _backtest_theories(winners):
ranked = { ranked = {
"base_rate": base_ranked, "markov_1": m1_ranked, "markov_2": m2_ranked, "base_rate": base_ranked, "markov_1": m1_ranked, "markov_2": m2_ranked,
"recent_20": rec_ranked, "streak": streak_ranked, "combined": combined_ranked, "recent_20": rec_ranked, "streak": streak_ranked, "balance": bal_ranked,
"combined": combined_ranked,
} }
for t in theories: for t in theories:
pick = ranked[t][0] pick = ranked[t][0]

View File

@@ -30,7 +30,65 @@ class WebServer:
def __init__(self): def __init__(self):
self.app = web.Application() self.app = web.Application()
self.clients: set[web.WebSocketResponse] = set() self.clients: set[web.WebSocketResponse] = set()
self._visitor_buffer: list[dict] = []
self._visitor_lock = asyncio.Lock()
self._setup_routes() self._setup_routes()
self.app.middlewares.append(self._make_visitor_middleware())
def _make_visitor_middleware(self):
server = self
@web.middleware
async def visitor_middleware(request: web.Request, handler):
response = await handler(request)
path = request.path
# Skip static files and WebSocket upgrades
if path.startswith("/static/") or request.headers.get("Upgrade", "").lower() == "websocket":
return response
ip = (
request.headers.get("CF-Connecting-IP")
or (request.headers.get("X-Forwarded-For", "").split(",")[0].strip())
or request.remote
or ""
)
visitor = {
"ip": ip,
"country": request.headers.get("CF-IPCountry", ""),
"path": path,
"method": request.method,
"user_agent": request.headers.get("User-Agent", ""),
"referer": request.headers.get("Referer", ""),
"accept_lang": request.headers.get("Accept-Language", ""),
}
batch = None
async with server._visitor_lock:
server._visitor_buffer.append(visitor)
if len(server._visitor_buffer) >= 20:
batch = server._visitor_buffer[:]
server._visitor_buffer.clear()
if batch:
try:
await _run_sync(db.insert_visitors, batch)
except Exception as e:
log.warning("Visitor insert failed: %s", e)
return response
return visitor_middleware
async def _flush_visitors(self):
"""Periodically flush visitor buffer so low-traffic visits aren't lost."""
while True:
await asyncio.sleep(30)
batch = None
async with self._visitor_lock:
if self._visitor_buffer:
batch = self._visitor_buffer[:]
self._visitor_buffer.clear()
if batch:
try:
await _run_sync(db.insert_visitors, batch)
except Exception as e:
log.warning("Visitor flush failed: %s", e)
def _setup_routes(self): def _setup_routes(self):
self.app.router.add_get("/", self._handle_index) self.app.router.add_get("/", self._handle_index)
@@ -219,6 +277,7 @@ class WebServer:
site = web.TCPSite(runner, "0.0.0.0", config.WEB_PORT) site = web.TCPSite(runner, "0.0.0.0", config.WEB_PORT)
await site.start() await site.start()
log.info("Web server listening on http://0.0.0.0:%s", config.WEB_PORT) log.info("Web server listening on http://0.0.0.0:%s", config.WEB_PORT)
flush_task = asyncio.create_task(self._flush_visitors())
# Keep running until cancelled # Keep running until cancelled
try: try:
while True: while True:
@@ -226,4 +285,5 @@ class WebServer:
except asyncio.CancelledError: except asyncio.CancelledError:
pass pass
finally: finally:
flush_task.cancel()
await runner.cleanup() await runner.cleanup()

View File

@@ -40,3 +40,16 @@ CREATE TABLE IF NOT EXISTS users (
updated_at DateTime DEFAULT now() updated_at DateTime DEFAULT now()
) ENGINE = ReplacingMergeTree(updated_at) ) ENGINE = ReplacingMergeTree(updated_at)
ORDER BY user_id; ORDER BY user_id;
CREATE TABLE IF NOT EXISTS visitors (
ip String,
country String,
path String,
method String,
user_agent String,
referer String,
accept_lang String,
created_at DateTime DEFAULT now()
) ENGINE = MergeTree()
ORDER BY (created_at, ip)
TTL created_at + INTERVAL 90 DAY;

View File

@@ -456,7 +456,7 @@ function renderPrediction(data) {
// Signal table // Signal table
const tbody = $('signal-table').querySelector('tbody'); const tbody = $('signal-table').querySelector('tbody');
const sigNames = {'base_rate':'Base Rate','markov_1':'Markov-1','markov_2':'Markov-2','recent_20':'Recent 20','streak':'Streak'}; const sigNames = {'base_rate':'Base Rate','markov_1':'Markov-1','markov_2':'Markov-2','recent_20':'Recent 20','streak':'Streak','balance':'Balance'};
tbody.innerHTML = Object.entries(data.signals).map(([key, sig]) => tbody.innerHTML = Object.entries(data.signals).map(([key, sig]) =>
`<tr><td style="text-align:left">${sigNames[key]||key}</td><td>${(sig.weight*100).toFixed(0)}%</td>` + `<tr><td style="text-align:left">${sigNames[key]||key}</td><td>${(sig.weight*100).toFixed(0)}%</td>` +
CHAIRS.map(c => `<td style="color:${CHAIR_COLORS[c]}">${pct(sig.probs[c])}</td>`).join('') + '</tr>' CHAIRS.map(c => `<td style="color:${CHAIR_COLORS[c]}">${pct(sig.probs[c])}</td>`).join('') + '</tr>'
@@ -914,7 +914,7 @@ function renderRunsTest(runs) {
function renderBacktest(bt) { function renderBacktest(bt) {
if (bt.error) { $('backtest-cards').innerHTML = `<div style="color:var(--text2)">${bt.error}</div>`; return; } if (bt.error) { $('backtest-cards').innerHTML = `<div style="color:var(--text2)">${bt.error}</div>`; return; }
const names = {base_rate:'Base Rate',markov_1:'Markov-1',markov_2:'Markov-2',recent_20:'Recent 20',streak:'Streak',combined:'Combined'}; const names = {base_rate:'Base Rate',markov_1:'Markov-1',markov_2:'Markov-2',recent_20:'Recent 20',streak:'Streak',balance:'Balance',combined:'Combined'};
$('backtest-cards').innerHTML = Object.entries(bt.accuracy).map(([key, acc]) => { $('backtest-cards').innerHTML = Object.entries(bt.accuracy).map(([key, acc]) => {
const fh = bt.full_hits?.[key] ?? '?'; const fh = bt.full_hits?.[key] ?? '?';
const sh = bt.semi_hits?.[key] ?? '?'; const sh = bt.semi_hits?.[key] ?? '?';
@@ -925,7 +925,7 @@ function renderBacktest(bt) {
}).join(''); }).join('');
if (bt.rolling_accuracy) { if (bt.rolling_accuracy) {
const ctx = $('backtest-chart').getContext('2d'); const ctx = $('backtest-chart').getContext('2d');
const colors = {base_rate:'#8b8fa3',markov_1:'#3b82f6',markov_2:'#ec4899',recent_20:'#f59e0b',streak:'#10b981',combined:'#6c5ce7'}; const colors = {base_rate:'#8b8fa3',markov_1:'#3b82f6',markov_2:'#ec4899',recent_20:'#f59e0b',streak:'#10b981',balance:'#f472b6',combined:'#6c5ce7'};
const datasets = Object.entries(bt.rolling_accuracy).map(([key, data]) => ({ const datasets = Object.entries(bt.rolling_accuracy).map(([key, data]) => ({
label: names[key]||key, data, borderColor: colors[key]||'#fff', backgroundColor: 'transparent', label: names[key]||key, data, borderColor: colors[key]||'#fff', backgroundColor: 'transparent',
borderWidth: key === 'combined' ? 3 : 1.5, pointRadius: 0, tension: 0.3, borderWidth: key === 'combined' ? 3 : 1.5, pointRadius: 0, tension: 0.3,