refactor migration system: single source of truth via replay

Eliminated dual-maintenance of schema.sql + versioned migrations. All databases (fresh and existing) now replay migrations in order starting from 0000_initial_schema.py. Removed _is_fresh_db() and the fresh-DB fast-path that skipped migration execution. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 00:23:28 +01:00
parent 0b8350c770
commit 07c7e61049
6 changed files with 658 additions and 655 deletions
--- a/padelnomics/tests/conftest.py
+++ b/padelnomics/tests/conftest.py
@@ -3,6 +3,8 @@ Shared test fixtures for the Padelnomics test suite.
 """
 import hashlib
 import hmac
+import sqlite3
+import tempfile
 import time
 from datetime import datetime
 from pathlib import Path
@@ -13,21 +15,44 @@ import pytest

 from padelnomics import core
 from padelnomics.app import create_app
+from padelnomics.migrations.migrate import migrate

-SCHEMA_PATH = Path(__file__).parent.parent / "src" / "padelnomics" / "migrations" / "schema.sql"
+_SCHEMA_CACHE = None
+
+
+def _get_schema_ddl():
+    """Run all migrations once against a temp DB and cache the resulting DDL."""
+    global _SCHEMA_CACHE
+    if _SCHEMA_CACHE is not None:
+        return _SCHEMA_CACHE
+
+    tmp_db = str(Path(tempfile.mkdtemp()) / "schema.db")
+    migrate(tmp_db)
+    tmp_conn = sqlite3.connect(tmp_db)
+    rows = tmp_conn.execute(
+        "SELECT sql FROM sqlite_master"
+        " WHERE sql IS NOT NULL"
+        " AND name NOT LIKE 'sqlite_%'"
+        " AND name NOT LIKE '%_fts_%'"  # FTS5 shadow tables (created by VIRTUAL TABLE)
+        " AND name != '_migrations'"
+        " ORDER BY rowid"
+    ).fetchall()
+    tmp_conn.close()
+    _SCHEMA_CACHE = ";\n".join(r[0] for r in rows) + ";"
+    return _SCHEMA_CACHE


 # ── Database ─────────────────────────────────────────────────

@pytest.fixture
 async def db():
-    """In-memory SQLite with full schema, patches core._db."""
+    """In-memory SQLite with full schema from replaying migrations."""
+    schema_ddl = _get_schema_ddl()
+
    conn = await aiosqlite.connect(":memory:")
    conn.row_factory = aiosqlite.Row
    await conn.execute("PRAGMA foreign_keys=ON")
-
-    schema = SCHEMA_PATH.read_text()
-    await conn.executescript(schema)
+    await conn.executescript(schema_ddl)
    await conn.commit()

    original_db = core._db
@@ -91,17 +116,24 @@ def create_subscription(db):
        user_id: int,
        plan: str = "pro",
        status: str = "active",
-        paddle_customer_id: str = "ctm_test123",
-        paddle_subscription_id: str = "sub_test456",
+        provider_customer_id: str = "ctm_test123",
+        provider_subscription_id: str = "sub_test456",
        current_period_end: str = "2025-03-01T00:00:00Z",
    ) -> int:
        now = datetime.utcnow().isoformat()
+        # Create billing_customers record if provider_customer_id given
+        if provider_customer_id:
+            await db.execute(
+                """INSERT OR IGNORE INTO billing_customers
+                   (user_id, provider_customer_id, created_at) VALUES (?, ?, ?)""",
+                (user_id, provider_customer_id, now),
+            )
        async with db.execute(
            """INSERT INTO subscriptions
-               (user_id, plan, status, paddle_customer_id,
-                paddle_subscription_id, current_period_end, created_at, updated_at)
-               VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
-            (user_id, plan, status, paddle_customer_id, paddle_subscription_id,
+               (user_id, plan, status,
+                provider_subscription_id, current_period_end, created_at, updated_at)
+               VALUES (?, ?, ?, ?, ?, ?, ?)""",
+            (user_id, plan, status, provider_subscription_id,
             current_period_end, now, now),
        ) as cursor:
            sub_id = cursor.lastrowid
--- a/padelnomics/tests/test_migrations.py
+++ b/padelnomics/tests/test_migrations.py
@@ -13,11 +13,8 @@ from unittest.mock import patch

 import pytest

-from padelnomics.migrations.migrate import _discover_versions, _is_fresh_db, migrate
+from padelnomics.migrations.migrate import _discover_versions, migrate

-SCHEMA_PATH = (
-    Path(__file__).parent.parent / "src" / "padelnomics" / "migrations" / "schema.sql"
-)
 VERSIONS_DIR = (
    Path(__file__).parent.parent / "src" / "padelnomics" / "migrations" / "versions"
 )
@@ -25,14 +22,6 @@ VERSIONS_DIR = (
 # ── Helpers ───────────────────────────────────────────────────


-def _old_schema_sql():
-    """Return schema.sql with paddle columns swapped back to lemonsqueezy."""
-    schema = SCHEMA_PATH.read_text()
-    schema = schema.replace("paddle_customer_id", "lemonsqueezy_customer_id")
-    schema = schema.replace("paddle_subscription_id", "lemonsqueezy_subscription_id")
-    return schema
-
-
 def _table_names(conn):
    """Return sorted list of user-visible table names."""
    rows = conn.execute(
@@ -49,11 +38,6 @@ def _column_names(conn, table):
 # ── Fixtures ──────────────────────────────────────────────────


-@pytest.fixture
-def schema_sql():
-    return SCHEMA_PATH.read_text()
-
-
@pytest.fixture
 def fresh_db_path(tmp_path):
    """Path to a non-existent DB file."""
@@ -62,53 +46,37 @@ def fresh_db_path(tmp_path):

@pytest.fixture
 def existing_db(tmp_path):
-    """DB with old lemonsqueezy column names and no _migrations table."""
+    """DB with 0000 baseline applied (simulates an existing production DB)."""
    db_path = str(tmp_path / "existing.db")
-    schema = _old_schema_sql()
-    # Remove the _migrations table DDL so this DB has no tracking
-    schema = re.sub(
-        r"CREATE TABLE IF NOT EXISTS _migrations\s*\([^)]+\);",
-        "",
-        schema,
-    )
    conn = sqlite3.connect(db_path)
-    conn.executescript(schema)
+    conn.execute("PRAGMA foreign_keys=ON")
+
+    # Create _migrations table and apply only 0000
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS _migrations (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            name TEXT UNIQUE NOT NULL,
+            applied_at TEXT NOT NULL DEFAULT (datetime('now'))
+        )
+    """)
+    mod_0000 = importlib.import_module(
+        "padelnomics.migrations.versions.0000_initial_schema"
+    )
+    mod_0000.up(conn)
+    conn.execute(
+        "INSERT INTO _migrations (name) VALUES (?)",
+        ("0000_initial_schema",),
+    )
    conn.commit()
    conn.close()
    return db_path


@pytest.fixture
-def production_db(tmp_path, schema_sql):
-    """DB with current paddle columns but no _migrations records."""
-    db_path = str(tmp_path / "production.db")
-    # Remove the _migrations DDL so it simulates manual migration
-    schema = re.sub(
-        r"CREATE TABLE IF NOT EXISTS _migrations\s*\([^)]+\);",
-        "",
-        schema_sql,
-    )
-    conn = sqlite3.connect(db_path)
-    conn.executescript(schema)
-    conn.commit()
-    conn.close()
-    return db_path
-
-
-@pytest.fixture
-def up_to_date_db(tmp_path, schema_sql):
-    """DB with final schema and all migrations recorded."""
+def up_to_date_db(tmp_path):
+    """DB with all migrations applied via migrate()."""
    db_path = str(tmp_path / "uptodate.db")
-    conn = sqlite3.connect(db_path)
-    conn.executescript(schema_sql)
-    # Record all discovered versions as applied
-    for f in sorted(VERSIONS_DIR.iterdir()):
-        if re.match(r"^\d{4}_.+\.py$", f.name):
-            conn.execute(
-                "INSERT INTO _migrations (name) VALUES (?)", (f.stem,)
-            )
-    conn.commit()
-    conn.close()
+    migrate(db_path)
    return db_path


@@ -124,9 +92,8 @@ def mock_versions_dir(tmp_path):


 class TestFreshDatabase:
-    def test_creates_all_tables(self, fresh_db_path, monkeypatch):
-        monkeypatch.setenv("DATABASE_PATH", fresh_db_path)
-        migrate()
+    def test_creates_all_tables(self, fresh_db_path):
+        migrate(fresh_db_path)
        conn = sqlite3.connect(fresh_db_path)
        tables = _table_names(conn)
        conn.close()
@@ -135,9 +102,8 @@ class TestFreshDatabase:
        assert "subscriptions" in tables
        assert "scenarios" in tables

-    def test_records_all_versions_as_applied(self, fresh_db_path, monkeypatch):
-        monkeypatch.setenv("DATABASE_PATH", fresh_db_path)
-        migrate()
+    def test_records_all_versions_as_applied(self, fresh_db_path):
+        migrate(fresh_db_path)
        conn = sqlite3.connect(fresh_db_path)
        applied = {
            r[0] for r in conn.execute("SELECT name FROM _migrations").fetchall()
@@ -146,40 +112,40 @@ class TestFreshDatabase:
        versions = _discover_versions()
        assert applied == set(versions)

-    def test_does_not_call_import_module(self, fresh_db_path, monkeypatch):
-        monkeypatch.setenv("DATABASE_PATH", fresh_db_path)
-        with patch("padelnomics.migrations.migrate.importlib.import_module") as mock_imp:
-            migrate()
-            mock_imp.assert_not_called()
-
-    def test_uses_paddle_column_names(self, fresh_db_path, monkeypatch):
-        monkeypatch.setenv("DATABASE_PATH", fresh_db_path)
-        migrate()
+    def test_uses_provider_column_names(self, fresh_db_path):
+        migrate(fresh_db_path)
        conn = sqlite3.connect(fresh_db_path)
        cols = _column_names(conn, "subscriptions")
        conn.close()
-        assert "paddle_customer_id" in cols
-        assert "paddle_subscription_id" in cols
+        assert "provider_subscription_id" in cols
+        assert "paddle_customer_id" not in cols
        assert "lemonsqueezy_customer_id" not in cols

+    def test_creates_rbac_tables(self, fresh_db_path):
+        migrate(fresh_db_path)
+        conn = sqlite3.connect(fresh_db_path)
+        tables = _table_names(conn)
+        conn.close()
+        assert "user_roles" in tables
+        assert "billing_customers" in tables
+

 # ── TestExistingDatabase ──────────────────────────────────────


 class TestExistingDatabase:
-    def test_applies_pending_migration(self, existing_db, monkeypatch):
-        monkeypatch.setenv("DATABASE_PATH", existing_db)
-        migrate()
+    def test_applies_pending_migrations(self, existing_db):
+        migrate(existing_db)
        conn = sqlite3.connect(existing_db)
-        cols = _column_names(conn, "subscriptions")
+        applied = {
+            r[0] for r in conn.execute("SELECT name FROM _migrations").fetchall()
+        }
        conn.close()
-        assert "paddle_customer_id" in cols
-        assert "paddle_subscription_id" in cols
-        assert "lemonsqueezy_customer_id" not in cols
+        versions = _discover_versions()
+        assert applied == set(versions)

-    def test_records_migration_with_timestamp(self, existing_db, monkeypatch):
-        monkeypatch.setenv("DATABASE_PATH", existing_db)
-        migrate()
+    def test_records_migration_with_timestamp(self, existing_db):
+        migrate(existing_db)
        conn = sqlite3.connect(existing_db)
        row = conn.execute(
            "SELECT name, applied_at FROM _migrations WHERE name LIKE '0001%'"
@@ -194,16 +160,14 @@ class TestExistingDatabase:


 class TestUpToDateDatabase:
-    def test_noop_when_all_applied(self, up_to_date_db, monkeypatch):
-        monkeypatch.setenv("DATABASE_PATH", up_to_date_db)
+    def test_noop_when_all_applied(self, up_to_date_db):
        with patch("padelnomics.migrations.migrate.importlib.import_module") as mock_imp:
-            migrate()
+            migrate(up_to_date_db)
            mock_imp.assert_not_called()

-    def test_no_duplicate_entries_on_rerun(self, up_to_date_db, monkeypatch):
-        monkeypatch.setenv("DATABASE_PATH", up_to_date_db)
-        migrate()
-        migrate()
+    def test_no_duplicate_entries_on_rerun(self, up_to_date_db):
+        migrate(up_to_date_db)
+        migrate(up_to_date_db)
        conn = sqlite3.connect(up_to_date_db)
        count = conn.execute("SELECT COUNT(*) FROM _migrations").fetchone()[0]
        conn.close()
@@ -214,21 +178,22 @@ class TestUpToDateDatabase:


 class TestIdempotentMigration:
-    def test_production_db_paddle_cols_already_exist(
-        self, production_db, monkeypatch
-    ):
-        """Production scenario: paddle columns exist, no _migrations table.
-        0001 runs without error and gets recorded."""
-        monkeypatch.setenv("DATABASE_PATH", production_db)
-        migrate()
-        conn = sqlite3.connect(production_db)
-        cols = _column_names(conn, "subscriptions")
-        applied = {
-            r[0] for r in conn.execute("SELECT name FROM _migrations").fetchall()
-        }
+    def test_migrate_twice_is_idempotent(self, fresh_db_path):
+        """Running migrate() twice produces the same result."""
+        migrate(fresh_db_path)
+        conn = sqlite3.connect(fresh_db_path)
+        tables_first = _table_names(conn)
+        count_first = conn.execute("SELECT COUNT(*) FROM _migrations").fetchone()[0]
        conn.close()
-        assert "paddle_customer_id" in cols
-        assert "0001_rename_ls_to_paddle" in applied
+
+        migrate(fresh_db_path)
+        conn = sqlite3.connect(fresh_db_path)
+        tables_second = _table_names(conn)
+        count_second = conn.execute("SELECT COUNT(*) FROM _migrations").fetchone()[0]
+        conn.close()
+
+        assert tables_first == tables_second
+        assert count_first == count_second


 # ── TestDiscoverVersions ─────────────────────────────────────
@@ -237,8 +202,9 @@ class TestIdempotentMigration:
 class TestDiscoverVersions:
    def test_finds_and_sorts_version_files(self):
        versions = _discover_versions()
-        assert len(versions) >= 1
-        assert versions[0] == "0001_rename_ls_to_paddle"
+        assert len(versions) >= 2
+        assert versions[0] == "0000_initial_schema"
+        assert versions[1] == "0001_rename_ls_to_paddle"

    def test_ignores_non_matching_files(self, mock_versions_dir, monkeypatch):
        (mock_versions_dir / "__init__.py").write_text("")
@@ -258,69 +224,6 @@ class TestDiscoverVersions:
        assert _discover_versions() == []


-# ── TestIsFreshDb ─────────────────────────────────────────────
-
-
-class TestIsFreshDb:
-    def test_empty_db_is_fresh(self, tmp_path):
-        conn = sqlite3.connect(str(tmp_path / "empty.db"))
-        assert _is_fresh_db(conn) is True
-        conn.close()
-
-    def test_db_with_schema_is_not_fresh(self, tmp_path, schema_sql):
-        conn = sqlite3.connect(str(tmp_path / "full.db"))
-        conn.executescript(schema_sql)
-        assert _is_fresh_db(conn) is False
-        conn.close()
-
-    def test_db_with_single_table_is_not_fresh(self, tmp_path):
-        conn = sqlite3.connect(str(tmp_path / "one.db"))
-        conn.execute("CREATE TABLE foo (id INTEGER PRIMARY KEY)")
-        assert _is_fresh_db(conn) is False
-        conn.close()
-
-
-# ── TestMigration0001 ─────────────────────────────────────────
-
-
-class TestMigration0001:
-    @pytest.fixture
-    def mod_0001(self):
-        return importlib.import_module(
-            "padelnomics.migrations.versions.0001_rename_ls_to_paddle"
-        )
-
-    def test_renames_columns(self, tmp_path, mod_0001):
-        conn = sqlite3.connect(str(tmp_path / "rename.db"))
-        conn.executescript(_old_schema_sql())
-        mod_0001.up(conn)
-        cols = _column_names(conn, "subscriptions")
-        conn.close()
-        assert "paddle_customer_id" in cols
-        assert "paddle_subscription_id" in cols
-        assert "lemonsqueezy_customer_id" not in cols
-
-    def test_idempotent_when_already_renamed(self, tmp_path, schema_sql, mod_0001):
-        conn = sqlite3.connect(str(tmp_path / "idem.db"))
-        conn.executescript(schema_sql)
-        # Should not raise even though columns are already paddle_*
-        mod_0001.up(conn)
-        cols = _column_names(conn, "subscriptions")
-        conn.close()
-        assert "paddle_customer_id" in cols
-
-    def test_recreates_index(self, tmp_path, mod_0001):
-        conn = sqlite3.connect(str(tmp_path / "idx.db"))
-        conn.executescript(_old_schema_sql())
-        mod_0001.up(conn)
-        indexes = conn.execute(
-            "SELECT name FROM sqlite_master WHERE type='index'"
-            " AND name='idx_subscriptions_provider'"
-        ).fetchall()
-        conn.close()
-        assert len(indexes) == 1
-
-
 # ── TestMigrationOrdering ─────────────────────────────────────


@@ -328,11 +231,6 @@ class TestMigrationOrdering:
    def test_multiple_pending_run_in_order(self, tmp_path, monkeypatch):
        """Mock two version files and verify they run in sorted order."""
        db_path = str(tmp_path / "order.db")
-        # Create a DB with one arbitrary table so it's not "fresh"
-        conn = sqlite3.connect(db_path)
-        conn.execute("CREATE TABLE dummy (id INTEGER PRIMARY KEY)")
-        conn.close()
-
        monkeypatch.setenv("DATABASE_PATH", db_path)

        # Create fake version files in a temp versions dir
@@ -364,13 +262,10 @@ class TestMigrationOrdering:
            "padelnomics.migrations.versions.0002_second",
        ]

-    def test_migrations_table_created_on_existing_db(
-        self, existing_db, monkeypatch
-    ):
-        """An existing DB without _migrations gets the table after migrate()."""
-        monkeypatch.setenv("DATABASE_PATH", existing_db)
-        migrate()
-        conn = sqlite3.connect(existing_db)
+    def test_migrations_table_created_automatically(self, fresh_db_path):
+        """A fresh DB gets the _migrations table from migrate()."""
+        migrate(fresh_db_path)
+        conn = sqlite3.connect(fresh_db_path)
        tables = _table_names(conn)
        conn.close()
        assert "_migrations" in tables