Compare commits
32 Commits
41a598df53
...
v10
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6fb1e990e3 | ||
|
|
6edf8ba65e | ||
|
|
ed0a578050 | ||
|
|
c1cdeec6be | ||
|
|
710624f417 | ||
|
|
6cf98f44d4 | ||
|
|
60659a5ec5 | ||
|
|
beb4195f16 | ||
|
|
88cc857f3a | ||
|
|
9116625884 | ||
|
|
1af65bb46f | ||
|
|
9b0bfc478d | ||
|
|
adf22924f6 | ||
|
|
09665b7786 | ||
|
|
93349923bd | ||
|
|
642041b32b | ||
|
|
bb70a5372b | ||
|
|
bc28d93662 | ||
|
|
81ce1d277a | ||
|
|
2012894eeb | ||
|
|
143ad28854 | ||
|
|
415d28afa9 | ||
|
|
66d7cdea21 | ||
|
|
9c2bf51c73 | ||
|
|
7e0b06a2ad | ||
|
|
dca198c17d | ||
|
|
49820391ab | ||
|
|
f048e8276f | ||
|
|
bcacc7aae6 | ||
|
|
00393933ca | ||
|
|
89ff931212 | ||
|
|
4e82907a70 |
@@ -56,9 +56,10 @@ WORKFLOWS_PATH=ENC[AES256_GCM,data:PehxEUMb1K3F1557BY3IqKD7sbJcoaIjnQvboBRJ1g==,
|
||||
ALERT_WEBHOOK_URL=
|
||||
NTFY_TOKEN=
|
||||
#ENC[AES256_GCM,data:BCyQYjRnTx8yW9A=,iv:4OPCP+xzRLUJrpoFewVnbZRKnZH4sAbV76SM//2k5wU=,tag:HxwEp7VFVZUN/VjPiL/+Vw==,type:comment]
|
||||
PROXY_URLS=ENC[AES256_GCM,data:CzRaK0piUQfvuYYsdz0i2MEQIphKi0BhNvHw9alo46aTH+kqEKvoS7dKEKzyU9VJ4TyNweInlVMxB962DsvRoBtnHwo/pUmYtVeEr2881clNgEiZVYRDFRdEbpULcLPDJa3ey1leqAAHlmiL0RQ6Qa57gPCOVBzVG6npGLKO+K8XVIb+BZMs9kEUOlw7iuqTJW5xPN/t4X/jHidEqfTSAl9b4vU4bsYVuY3yQrL+/V5QpTbyXlf+cMq3flpA3zE2Fxhalzg+c/wHMTrCksFwrCkrInW0kY9yPkA7usUWr1xwwaV3wIDoNQsLXpMd/3RztipNvKtOMRhRJOmjzP7BKhCJvvvKTV5p+mBCulFijbMQgArg3BqcFanfw3YZ4wPd4hp8q/vOhE/U9Wu0yrMmyWYFHYGQnFVARlBH7pwn/ez8W4KqRFveEAuev9CE7K7s5RqzPLelSkoa9UuiiULJ+t0LFgKlgxuLtQ8GdFdgsmBCxY/4U/xzvNdC82hD549z5nMWWlaUJm4onPWirT/RYm7j3v6z4mmNImI2W6rCNbvEvsXwWsciquVaBIgReA47p6/GTzZ9VZMyGr4PdzB87BJGAgX1W57WNdPAsRIF49XP2BU72RtRFxsUG8Ha2dc=,iv:a10Vpk7Zv8QqORuEcMlpcvtHO/zjBLaFphWPYBXwysc=,tag:8N66/R+CLqEZ45wj+tCt6w==,type:str]
|
||||
RECHECK_WINDOW_MINUTES=ENC[AES256_GCM,data:YWM=,iv:iY5+uMazLAFdwyLT7Gr7MaF1QHBIgHuoi6nF2VbSsOA=,tag:dc6AmuJdTQ55gVe16uzs6A==,type:str]
|
||||
PROXY_URLS_FALLBACK=ENC[AES256_GCM,data:95rwI7kKUj1YxLpjChtrM4f2EFUDzQdAg1e1MOHnLwQ9ZY54UNH7v4JcqTsvDk9D+0N/BIdwFSDi7pnCSd6BWFV+cQ==,iv:rm9HdBsibSne7JR6vWl+ao/GHb1rbuVdZZDUWhVbTnE=,tag:NJ2STxmFZPvFayfTrEEYbg==,type:str]
|
||||
PROXY_URLS_RESIDENTIAL=ENC[AES256_GCM,data:lfmlsjXFtL+zo40SNFLiFKaZiYvE7CNH+zRwjMK5pqPfCs0TlMX+Y9e1KmzAS+y/cI69TP5sgMPRBzER0Jn7RvH0KA==,iv:jBN/4/K5L5886G4rSzxt8V8u/57tAuj3R76haltzqeU=,tag:Xe6o9eg2PodfktDqmLgVNA==,type:str]
|
||||
PROXY_URLS_DATACENTER=ENC[AES256_GCM,data:X6xpxz5u8Xh3OXjkIz3UwqH847qLvY9cVWVktW5B+lqhmXAKTzoTzHds8vlRGJf5Up9Yx44XcigbvuK33ZJDSq9ovkAIbY55OK4=,iv:3hHyFD+H9HMzQ/27bPjGr59+7yWmEneUdN9XPQasCig=,tag:oBXsSuV5idB7HqNrNOruwg==,type:str]
|
||||
WEBSHARE_DOWNLOAD_URL=ENC[AES256_GCM,data:1D9VRZ3MCXPQWfiMH8+CLcrxeYnVVcQgZDvt5kltvbSTuSHQ2hHDmZpBkTOMIBJnw4JLZ2JQKHgG4OaYDtsM2VltFPnfwaRgVI9G5PSenR3o4PeQmYO1AqWOmjn19jPxNXRhEXdupP9UT+xQNXoBJsl6RR20XOpMA5AipUHmSjD0UIKXoZLU,iv:uWUkAydac//qrOTPUThuOLKAKXK4xcZmK9qBVFwpqt4=,tag:1vYhukBW9kEuSXCLAiZZmQ==,type:str]
|
||||
CIRCUIT_BREAKER_THRESHOLD=
|
||||
#ENC[AES256_GCM,data:ZcX/OEbrMfKizIQYq3CYGnvzeTEX7KsmQaz2+Jj1rG5tbTy2aljQBIEkjtiwuo8NsNAD+FhIGRGVfBmKe1CAKME1MuiCbgSG,iv:4BSkeD3jZFawP09qECcqyuiWcDnCNSgbIjBATYhazq4=,tag:Ep1d2Uk700MOlWcLWaQ/ig==,type:comment]
|
||||
GSC_SERVICE_ACCOUNT_PATH=
|
||||
@@ -70,7 +71,7 @@ GEONAMES_USERNAME=ENC[AES256_GCM,data:aSkVdLNrhiF6tlg=,iv:eemFGwDIv3EG/P3lVHGZj9
|
||||
CENSUS_API_KEY=ENC[AES256_GCM,data:qqG971573aGq9MiHI2xLlanKKFwjfcNNoMXtm8LNbyh0rMbQN2XukQ==,iv:az2i0ldH75nHGah4DeOxaXmDbVYqmC1c77ptZqFA9BI=,tag:zoDdKj9bR7fgIDo1/dEU2g==,type:str]
|
||||
sops_age__list_0__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBxNWNmUzVNUGdWRnE0ZFpF\nM0JQZWZ3UDdEVzlwTmIxakxOZXBkT2x2ZlNrClRtV2M3S2daSGxUZmFDSWQ2Nmh4\neU51QndFcUxlSE00RFovOVJTcDZmUUUKLS0tIDcvL3hRMDRoMWZZSXljNzA3WG5o\nMWFic21MV0krMzlIaldBTVU0ZDdlTE0K7euGQtA+9lHNws+x7TMCArZamm9att96\nL8cXoUDWe5fNI5+M1bXReqVfNwPTwZsV6j/+ZtYKybklIzWz02Ex4A==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_0__map_recipient=age1f5002gj4s78jju45jd28kuejtcfhn5cdujz885fl7z2p9ym68pnsgky87a
|
||||
sops_lastmodified=2026-02-26T14:31:14Z
|
||||
sops_mac=ENC[AES256_GCM,data:iqFuTexTS9U/Nv8xoTpHljTNQTGX9ITcJ3AjhDEtxrh0Z9/lngfBvGtjiKmpwFGlobQw/x+/YLM+u3MhciwXF7qNwFfJ/StN2Y66uF71SxWotbL70Dxl4oWSVL3sU+2NYbw5yP0p+xCbE+rEd5SqAe6K5yyq5X25hz8fIapxlYA=,iv:foqoWQVMipuOAQ0Kp799PaIhCIrxV8T5cC811wIzxR8=,tag:yNfxSV3R21XEXksjmdsKBw==,type:str]
|
||||
sops_lastmodified=2026-02-28T15:50:46Z
|
||||
sops_mac=ENC[AES256_GCM,data:HiLZTLa+p3mqa4hw+tKOK27F/bsJOy4jmDi8MHToi6S7tRfBA/TzcEzXvXUIkkwAixN73NQHvBVeRnbcEsApVpkaxH1OqnjvvyT+B3YFkTEtxczaKGWlCvbqFZNmXYsFvGR9njaWYWsTQPkRIjrroXrSrhr7uxC8F40v7ByxJKo=,iv:qj2IpzWRIh/mM1HtjjkNbyFuhtORKXslVnf/vdEC9Uw=,tag:fr9CZsL74HxRJLXn9eS0xQ==,type:str]
|
||||
sops_unencrypted_suffix=_unencrypted
|
||||
sops_version=3.12.1
|
||||
|
||||
@@ -43,7 +43,10 @@ ALERT_WEBHOOK_URL=ENC[AES256_GCM,data:4sXQk8zklruC525J279TUUatdDJQ43qweuoPhtpI82
|
||||
NTFY_TOKEN=ENC[AES256_GCM,data:YlOxhsRJ8P1y4kk6ugWm41iyRCsM6oAWjvbU9lGcD0A=,iv:JZXOvi3wTOPV9A46c7fMiqbszNCvXkOgh9i/H1hob24=,tag:8xnPimgy7sesOAnxhaXmpg==,type:str]
|
||||
SUPERVISOR_GIT_PULL=ENC[AES256_GCM,data:mg==,iv:KgqMVYj12FjOzWxtA1T0r0pqCDJ6MtHzMjE+4W/W+s4=,tag:czFaOqhHG8nqrQ8AZ8QiGw==,type:str]
|
||||
#ENC[AES256_GCM,data:hzAZvCWc4RTk290=,iv:RsSI4OpAOQGcFVpfXDZ6t705yWmlO0JEWwWF5uQu9As=,tag:UPqFtA2tXiSa0vzJAv8qXg==,type:comment]
|
||||
PROXY_URLS=ENC[AES256_GCM,data:nm4B++SkZZgN3p2xru3WrpVA0X6O8yvb45tH/ovF4006zBy28xqVxbsd44Mz6b5FMinjOXRmGwoI/GDWmdJLzBYdpryQ/FhpbzSUpr1ZOjOz+7P0vn2jfBGAB8ksU3i5kuYglud3EyQGFL+v+uooxwrIUCjfzmmB4vCmf7phssKDsK1CqzmdZ1c54ehSu4bRRdmGp9d0+r+j1SpXb/JbZ8LTqUIhLlZXrHFqkCfN1czhFK9IwMVgR00Q4v2YkjaRBME4lVqwk1NwwatbS9Fq8LlzwuT1uKk+T6ZDkFKC8ZoPW1YRqF13X7hFGFXCNRqABRDZ45lqxYQbBoRrWmH2tfMiAmTrIuRsdPM8bZ/Ol5mXSDhs0HyWX2urX+LD65rIOO0zN/lwjXSwh5mwwBdB61akdzsWRyLZsdafuQUmgGul8y0eGMEbFWaty3bdrtAmqtsvHwxD/Dp/gQWScESXvPd1arn55zaXmefOy+ZLwcmx+FAJPpTMXRaq6Y/Z+D1PZZ+Uhu2D6tsAR4VvqqwlUgpsrAFXk6chJzOry8rmmxoMuIj9mXfjG+BqPFhV2oQsKSuIqFQqd/ZidJLO8ZSxA7L+h1eH4cQjcUd2nfzroG8nnKZ+cA8hQMfLuFiMY1I=,iv:nTaNQlC3px/lnodLphnILWbPVnelaUKKOZAFAaHi8MU=,tag:TYkIX1nrc+PKbvvnWYcvbg==,type:str]
|
||||
PROXY_URLS_RESIDENTIAL=ENC[AES256_GCM,data:x/F0toXDc8stsUNxaepCmxq1+WuacqqPtdc+R5mxTwcAzsKxCdwt8KpBZWMvz7ku4tHDGsKD949QAX2ANXP9oCMTgW0=,iv:6G9gE9/v7GaYj8aqVTmMrpw6AcQK9yMSCAohNdAD1Ws=,tag:2Jimr1ldVSfkh8LPEwdN3w==,type:str]
|
||||
PROXY_URLS_DATACENTER=ENC[AES256_GCM,data:6BfXBYmyHpgZU/kJWpZLf8eH5VowVK1n0r6GzFTNAx/OmyaaS1RZVPC1JPkPBnTwEmo0WHYRW8uiUdkABmH9F5ZqqlsAesyfW7zvU9r7yD+D7w==,iv:3CBn2qCoTueQy8xVcQqZS4E3F0qoFYnNbzTZTpJ1veo=,tag:wC3Ecl4uNTwPiT23ATvRZg==,type:str]
|
||||
WEBSHARE_DOWNLOAD_URL=ENC[AES256_GCM,data:/N77CFf6tJWCk7HrnBOm2Q1ynx7XoblzfbzJySeCjrxqiu4r+CB90aDkaPahlQKI00DUZih3pcy7WhnjdAwI30G5kJZ3P8H8/R0tP7OBK1wPVbsJq8prQJPFOAWewsS4KWNtSURZPYSCxslcBb7DHLX6ZAjv6A5KFOjRK2N8usR9sIabrCWh,iv:G3Ropu/JGytZK/zKsNGFjjSu3Wt6fvHaAqI9RpUHvlI=,tag:fv6xuS94OR+4xfiyKrYELA==,type:str]
|
||||
PROXY_CONCURRENCY=ENC[AES256_GCM,data:vdEZ,iv:+eTNQO+s/SsVDBLg1/+fneMzEEsFkuEFxo/FcVV+mWc=,tag:i/EPwi/jOoWl3xW8H0XMdw==,type:str]
|
||||
RECHECK_WINDOW_MINUTES=ENC[AES256_GCM,data:L2s=,iv:fV3mCKmK5fxUmIWRePELBDAPTb8JZqasVIhnAl55kYw=,tag:XL+PO6sblz/7WqHC3dtk1w==,type:str]
|
||||
#ENC[AES256_GCM,data:RC+t2vqLwLjapdAUql8rQls=,iv:Kkiz3ND0g0MRAgcPJysIYMzSQS96Rq+3YP5yO7yWfIY=,tag:Y6TbZd81ihIwn+U515qd1g==,type:comment]
|
||||
GSC_SERVICE_ACCOUNT_PATH=ENC[AES256_GCM,data:Vki6yHk+gd4n,iv:rxzKvwrGnAkLcpS41EZ097E87NrIpNZGFfl4iXFvr40=,tag:EZkBJpCq5rSpKYVC4H3JHQ==,type:str]
|
||||
@@ -53,9 +56,13 @@ BING_SITE_URL=ENC[AES256_GCM,data:M33VI97DyxH8gRR3ZUXoXg4QrEv5og==,iv:GxZtwfbBVi
|
||||
#ENC[AES256_GCM,data:OTUMKNkRW0zrupNppXthwE1oieILhNjM+cjx5hFn69g=,iv:48ID2qtSe9ggD2X+G/iUqp3v2uwEc7fZw8lxHIvVXmk=,tag:okBn0Npk1K9dDOFWA/AB1A==,type:comment]
|
||||
GEONAMES_USERNAME=ENC[AES256_GCM,data:UXd/S2TzXPiGmLY=,iv:OMURM5E6SFEsaqroUlH76DEnr7C/ujNk9UQnbWT0hK4=,tag:VsjjS12QDbudiEhdAQ/OCQ==,type:str]
|
||||
CENSUS_API_KEY=ENC[AES256_GCM,data:9RbKlxSD17LqIuuNXaOKSgZ8LnFh9Wbze3XHgpctfV/1TqBMZTIedQ==,iv:WwsmR3HLUEcgUpLliGRaUPhGM9vFNPMGXSAQQ6+9UVc=,tag:R4EMNy5MxxvK0UTaCL0umA==,type:str]
|
||||
sops_age__list_0__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBqck9GdHVkUmIzNnlvMW5k\nVkNtazZ0ZytzZ25vMU5SckdFLzcrTFNYOVZZCmNjbU9yV0lTRlB5cEpMVC81QTdu\nS2ZDc0ZkNnRBNFhFMEN1bjY3YVhwZEEKLS0tIGE5TEdYenVOV1IwcE0wYnlKNElF\ncXV1K0xuczZzZ3JnL1lrSC9QWHIwNGsKfW4ARke6Cj83BpQc8weayL3v8SVgQ+Fp\n99aVWp103O1fumksR1w4u0X7fSNRrgAmpY/yyZuEvsoIY8ELFVcqgQ==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_0__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBaUVk0UEVqdmtsM3VzQnpZ\nZjJDZ1lsM0VqWFpVVXUvNzdQcCtHbVJLNjFnCmhna01vTkVBaFQ5ZVlXeGhYNXdH\ncWJ5Qi9PdkxLaHBhQnR3cmtoblkxdEUKLS0tIDhHamY4NXhxOG9YN1NpbTN1aVRh\nOHVKcEN1d0QwQldVTDlBWUU4SDVDWlUKRJU+CTfTzIx6LLKin9sTXAHPVAfiUerZ\nCqYVFncsCJE3TbMI424urQj7kragPoGl1z4++yqAXNTRxfZIY4KTkg==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_0__map_recipient=age1f5002gj4s78jju45jd28kuejtcfhn5cdujz885fl7z2p9ym68pnsgky87a
|
||||
sops_lastmodified=2026-02-26T14:32:28Z
|
||||
sops_mac=ENC[AES256_GCM,data:pyHQHwTtjh7OLiMqbqhUjfrmetEtYS7yB342C/TWfDCwEotWLVwnGWlC4+HIl53pw9+3AgoBVRnW0t86e4kG9O8KyHnk68S9qBcpUsybW3lyGPNXmBydv1W9gQHuK8f/4WGIbkhNxyIToKg9ZAmYWFxNhRKSoYKm5P9Uh7B7CF4=,iv:syrX8VdL3JsDsawvFWbX04Ygcr18hjSSHfEwHkyKETk=,tag:qrhWkh/e+21OKGU2+rCeyg==,type:str]
|
||||
sops_age__list_1__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBmVEticFRVemlzZnlzek4x\nbWJ0d0h5ejJVUk5remo1VkdxNjVpdllqbFhFClc1UXlNd09xVVA5MnltMlN5MWRy\nYUlNRmNybHh1RGdPVC9yWlYrVmRTdkkKLS0tIHBUbU9qSDMrVGVHZDZGSFdpWlBh\nT3NXTGl0SmszaU9hRmU5bXI0cDRoRW8KLvbNYsBEwz+ITKvn7Yn+iNHiRzyyjtQt\no9/HupykJ3WjSdleGz7ZN6UiPGelHp0D/rzSASTYaI1+0i0xZ4PUoQ==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_1__map_recipient=age1wjepykv3glvsrtegu25tevg7vyn3ngpl607u3yjc9ucay04s045s796msw
|
||||
sops_age__list_2__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBFeHhaOURNZnRVMEwxNThu\nUjF4Q0kwUXhTUE1QSzZJbmpubnh3RnpQTmdvCjRmWWxpNkxFUmVGb3NRbnlydW5O\nWEg3ZXJQTU4vcndzS2pUQXY3Q0ttYjAKLS0tIE9IRFJ1c2ZxbGVHa2xTL0swbGN1\nTzgwMThPUDRFTWhuZHJjZUYxOTZrU00KY62qrNBCUQYxwcLMXFEnLkwncxq3BPJB\nKm4NzeHBU87XmPWVrgrKuf+PH1mxJlBsl7Hev8xBTy7l6feiZjLIvQ==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_2__map_recipient=age1c783ym2q5x9tv7py5d28uc4k44aguudjn03g97l9nzs00dd9tsrqum8h4d
|
||||
sops_lastmodified=2026-02-28T17:03:44Z
|
||||
sops_mac=ENC[AES256_GCM,data:IQ9jpRxVUssaMK+qFcM3nPdzXHkiqp6E+DhEey1TfqUu5GCBNsWeVy9m9A6p9RWhu2NtJV7aKdUeqneuMtD1q5Tnm6L96zuyot2ESnx2N2ssD9ilrDauQxoBJcrJVnGV61CgaCz9458w8BuVUZydn3MoHeRaU7bOBBzQlTI6vZk=,iv:qHqdt3av/KZRQHr/OS/9KdAJUgKlKEDgan7qI3Zzkck=,tag:fOvdO9iRTTF1Siobu2mLqg==,type:str]
|
||||
sops_unencrypted_suffix=_unencrypted
|
||||
sops_version=3.12.1
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -61,3 +61,4 @@ web/src/padelnomics/static/css/output.css
|
||||
|
||||
# Generated report PDFs (built locally via make report-pdf, not committed)
|
||||
data/content/reports/_build/
|
||||
_serving_meta.json
|
||||
|
||||
@@ -17,7 +17,9 @@ test:
|
||||
|
||||
tag:
|
||||
stage: tag
|
||||
image: alpine/git
|
||||
image:
|
||||
name: alpine/git
|
||||
entrypoint: [""]
|
||||
script:
|
||||
- git tag "v${CI_PIPELINE_IID}"
|
||||
- git push "https://gitlab-ci-token:${CI_JOB_TOKEN}@${CI_SERVER_HOST}/${CI_PROJECT_PATH}.git" "v${CI_PIPELINE_IID}"
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
creation_rules:
|
||||
- path_regex: \.env\..+\.sops$
|
||||
age: age1f5002gj4s78jju45jd28kuejtcfhn5cdujz885fl7z2p9ym68pnsgky87a,age1wjepykv3glvsrtegu25tevg7vyn3ngpl607u3yjc9ucay04s045s796msw
|
||||
age: age1f5002gj4s78jju45jd28kuejtcfhn5cdujz885fl7z2p9ym68pnsgky87a,age1wjepykv3glvsrtegu25tevg7vyn3ngpl607u3yjc9ucay04s045s796msw,age1c783ym2q5x9tv7py5d28uc4k44aguudjn03g97l9nzs00dd9tsrqum8h4d
|
||||
|
||||
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- **Three-tier proxy system** for extraction pipeline: free (Webshare auto-fetched) → datacenter (`PROXY_URLS_DATACENTER`) → residential (`PROXY_URLS_RESIDENTIAL`). Webshare free proxies are now auto-fetched from their download API on each run — no more manually copying stale proxy lists.
|
||||
- `proxy.py`: added `fetch_webshare_proxies()` (stdlib urllib, bounded read + timeout), `load_proxy_tiers()` (assembles N tiers from env), generalised `make_tiered_cycler()` to accept `list[list[str]]` with N-level escalation. Exposes `is_exhausted()`, `active_tier_index()`, `tier_count()`.
|
||||
- `playtomic_availability.py`: both `extract()` and `extract_recheck()` now use `load_proxy_tiers()` + N-tier cycler. `_fetch_venues_parallel` `fallback_urls` param removed. `is_fallback_active()` replaced by `is_exhausted()`.
|
||||
- `playtomic_tenants.py`: uses `load_proxy_tiers()` flattened for simple round-robin.
|
||||
|
||||
### Changed
|
||||
- **Env vars renamed** (breaking): `PROXY_URLS` → removed, `PROXY_URLS_FALLBACK` → removed. New vars: `WEBSHARE_DOWNLOAD_URL`, `PROXY_URLS_DATACENTER`, `PROXY_URLS_RESIDENTIAL`.
|
||||
|
||||
### Added
|
||||
- **Phase 2a — NUTS-1 regional income differentiation** (`opportunity_score`): Munich and Berlin no longer share the same income figure as Chemnitz.
|
||||
- `eurostat.py`: added `nama_10r_2hhinc` dataset config (NUTS-2 cube with NUTS-1 entries); filter params now appended to API URL so the server pre-filters the large cube before download (also makes `ilc_di03` requests smaller).
|
||||
|
||||
27
README.md
27
README.md
@@ -396,18 +396,19 @@ docker compose logs -f app # tail logs
|
||||
|
||||
## CI/CD
|
||||
|
||||
Go to GitLab → padelnomics → Settings → CI/CD → Variables and add:
|
||||
Pull-based deployment via Gitea Actions — no SSH keys or deploy credentials in CI.
|
||||
|
||||
| Variable | Value | Notes |
|
||||
|----------|-------|-------|
|
||||
| SSH_PRIVATE_KEY | Your ed25519 private key | Mask it, type "Variable" |
|
||||
| DEPLOY_HOST | Your Hetzner server IP | e.g. 1.2.3.4 |
|
||||
| DEPLOY_USER | SSH username on the server | e.g. deploy or root |
|
||||
| SSH_KNOWN_HOSTS | Server host key | Run `ssh-keyscan $YOUR_SERVER_IP` |
|
||||
1. Push to master → Gitea Actions runs tests (`.gitea/workflows/ci.yaml`)
|
||||
2. On success, CI creates tag `v<run_number>` using the built-in `github.token`
|
||||
3. On-server supervisor polls for new tags every 60s and deploys automatically
|
||||
|
||||
Server-side one-time setup:
|
||||
1. Add the matching public key to `~/.ssh/authorized_keys` for the deploy user
|
||||
2. Clone the repo to `/opt/padelnomics`
|
||||
3. Create `.env` from `padelnomics/.env.example` with production values
|
||||
4. `chmod +x deploy.sh && ./deploy.sh` for the first deploy
|
||||
5. Point reverse proxy to port 5000
|
||||
**Server-side one-time setup:**
|
||||
```bash
|
||||
bash infra/setup_server.sh # creates padelnomics_service user, keys, dirs
|
||||
ssh root@<server> 'bash -s' < infra/bootstrap_supervisor.sh
|
||||
```
|
||||
|
||||
1. `setup_server.sh` generates an ed25519 SSH deploy key — add the printed public key to Gitea:
|
||||
`git.padelnomics.io → padelnomics → Settings → Deploy Keys → Add key (read-only)`
|
||||
2. Add the printed age public key to `.sops.yaml`, re-encrypt, commit + push
|
||||
3. Run `bootstrap_supervisor.sh` — clones from `git.padelnomics.io:2222`, decrypts secrets, starts systemd supervisor
|
||||
|
||||
@@ -33,7 +33,7 @@ from pathlib import Path
|
||||
import niquests
|
||||
|
||||
from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging, ua_for_proxy
|
||||
from .proxy import load_fallback_proxy_urls, load_proxy_urls, make_tiered_cycler
|
||||
from .proxy import load_proxy_tiers, make_tiered_cycler
|
||||
from .utils import (
|
||||
compress_jsonl_atomic,
|
||||
flush_partial_batch,
|
||||
@@ -52,6 +52,9 @@ MAX_VENUES_PER_RUN = 20_000
|
||||
MAX_RETRIES_PER_VENUE = 2
|
||||
RECHECK_WINDOW_MINUTES = int(os.environ.get("RECHECK_WINDOW_MINUTES", "30"))
|
||||
CIRCUIT_BREAKER_THRESHOLD = int(os.environ.get("CIRCUIT_BREAKER_THRESHOLD") or "10")
|
||||
# Worker count: defaults to MAX_PROXY_CONCURRENCY (200). Override via PROXY_CONCURRENCY env var.
|
||||
_PROXY_CONCURRENCY = os.environ.get("PROXY_CONCURRENCY", "").strip()
|
||||
MAX_PROXY_CONCURRENCY = 200
|
||||
|
||||
# Parallel mode submits futures in batches so the circuit breaker can stop
|
||||
# new submissions after it opens. Already-inflight futures in the current
|
||||
@@ -76,8 +79,10 @@ def _load_tenant_ids(landing_dir: Path) -> list[str]:
|
||||
if not playtomic_dir.exists():
|
||||
return []
|
||||
|
||||
# Prefer JSONL (new format), fall back to blob (old format)
|
||||
tenant_files = sorted(playtomic_dir.glob("*/*/tenants.jsonl.gz"), reverse=True)
|
||||
# Prefer daily partition (YYYY/MM/DD), fall back to older monthly/weekly partitions
|
||||
tenant_files = sorted(playtomic_dir.glob("*/*/*/tenants.jsonl.gz"), reverse=True)
|
||||
if not tenant_files:
|
||||
tenant_files = sorted(playtomic_dir.glob("*/*/tenants.jsonl.gz"), reverse=True)
|
||||
if not tenant_files:
|
||||
tenant_files = sorted(playtomic_dir.glob("*/*/tenants.json.gz"), reverse=True)
|
||||
if not tenant_files:
|
||||
@@ -190,14 +195,13 @@ def _fetch_venues_parallel(
|
||||
start_max_str: str,
|
||||
worker_count: int,
|
||||
cycler: dict,
|
||||
fallback_urls: list[str],
|
||||
on_result=None,
|
||||
) -> tuple[list[dict], int]:
|
||||
"""Fetch availability for multiple venues in parallel.
|
||||
|
||||
Submits futures in batches of PARALLEL_BATCH_SIZE. After each batch
|
||||
completes, checks the circuit breaker: if it opened and there is no
|
||||
fallback configured, stops submitting further batches.
|
||||
completes, checks the circuit breaker: if all proxy tiers are exhausted,
|
||||
stops submitting further batches.
|
||||
|
||||
on_result: optional callable(result: dict) invoked inside the lock for
|
||||
each successful result — used for incremental partial-file flushing.
|
||||
@@ -215,10 +219,10 @@ def _fetch_venues_parallel(
|
||||
|
||||
with ThreadPoolExecutor(max_workers=worker_count) as pool:
|
||||
for batch_start in range(0, len(tenant_ids), PARALLEL_BATCH_SIZE):
|
||||
# Stop submitting new work if circuit is open with no fallback
|
||||
if cycler["is_fallback_active"]() and not fallback_urls:
|
||||
# Stop submitting new work if all proxy tiers are exhausted
|
||||
if cycler["is_exhausted"]():
|
||||
logger.error(
|
||||
"Circuit open with no fallback — stopping after %d/%d venues",
|
||||
"All proxy tiers exhausted — stopping after %d/%d venues",
|
||||
completed_count, len(tenant_ids),
|
||||
)
|
||||
break
|
||||
@@ -294,10 +298,9 @@ def extract(
|
||||
venues_to_process = [tid for tid in all_venues_to_process if tid not in already_done]
|
||||
|
||||
# Set up tiered proxy cycler with circuit breaker
|
||||
proxy_urls = load_proxy_urls()
|
||||
fallback_urls = load_fallback_proxy_urls()
|
||||
worker_count = len(proxy_urls) if proxy_urls else 1
|
||||
cycler = make_tiered_cycler(proxy_urls, fallback_urls, CIRCUIT_BREAKER_THRESHOLD)
|
||||
tiers = load_proxy_tiers()
|
||||
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1)
|
||||
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
||||
|
||||
start_min_str = start_min.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
start_max_str = start_max.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
@@ -325,9 +328,9 @@ def extract(
|
||||
venues_errored = 0
|
||||
|
||||
if worker_count > 1:
|
||||
logger.info("Parallel mode: %d workers, %d proxies", worker_count, len(proxy_urls))
|
||||
logger.info("Parallel mode: %d workers, %d tier(s)", worker_count, len(tiers))
|
||||
new_venues_data, venues_errored = _fetch_venues_parallel(
|
||||
venues_to_process, start_min_str, start_max_str, worker_count, cycler, fallback_urls,
|
||||
venues_to_process, start_min_str, start_max_str, worker_count, cycler,
|
||||
on_result=_on_result,
|
||||
)
|
||||
else:
|
||||
@@ -342,9 +345,9 @@ def extract(
|
||||
_on_result(result)
|
||||
else:
|
||||
venues_errored += 1
|
||||
circuit_opened = cycler["record_failure"]()
|
||||
if circuit_opened and not fallback_urls:
|
||||
logger.error("Circuit open with no fallback — writing partial results")
|
||||
cycler["record_failure"]()
|
||||
if cycler["is_exhausted"]():
|
||||
logger.error("All proxy tiers exhausted — writing partial results")
|
||||
break
|
||||
|
||||
if (i + 1) % 100 == 0:
|
||||
@@ -485,14 +488,13 @@ def extract_recheck(
|
||||
start_max_str = window_end.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
# Set up tiered proxy cycler with circuit breaker
|
||||
proxy_urls = load_proxy_urls()
|
||||
fallback_urls = load_fallback_proxy_urls()
|
||||
worker_count = len(proxy_urls) if proxy_urls else 1
|
||||
cycler = make_tiered_cycler(proxy_urls, fallback_urls, CIRCUIT_BREAKER_THRESHOLD)
|
||||
tiers = load_proxy_tiers()
|
||||
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1)
|
||||
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
||||
|
||||
if worker_count > 1 and len(venues_to_recheck) > 10:
|
||||
venues_data, venues_errored = _fetch_venues_parallel(
|
||||
venues_to_recheck, start_min_str, start_max_str, worker_count, cycler, fallback_urls,
|
||||
venues_to_recheck, start_min_str, start_max_str, worker_count, cycler,
|
||||
)
|
||||
else:
|
||||
venues_data = []
|
||||
@@ -504,9 +506,9 @@ def extract_recheck(
|
||||
cycler["record_success"]()
|
||||
else:
|
||||
venues_errored += 1
|
||||
circuit_opened = cycler["record_failure"]()
|
||||
if circuit_opened and not fallback_urls:
|
||||
logger.error("Circuit open with no fallback — writing partial recheck results")
|
||||
cycler["record_failure"]()
|
||||
if cycler["is_exhausted"]():
|
||||
logger.error("All proxy tiers exhausted — writing partial recheck results")
|
||||
break
|
||||
|
||||
# Write recheck file as JSONL — one venue per line with metadata injected
|
||||
|
||||
@@ -25,12 +25,13 @@ import json
|
||||
import sqlite3
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
import niquests
|
||||
|
||||
from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging, ua_for_proxy
|
||||
from .proxy import load_proxy_urls, make_round_robin_cycler
|
||||
from .proxy import load_proxy_tiers, make_round_robin_cycler
|
||||
from .utils import compress_jsonl_atomic, landing_path
|
||||
|
||||
logger = setup_logging("padelnomics.extract.playtomic_tenants")
|
||||
@@ -69,25 +70,31 @@ def _fetch_pages_parallel(pages: list[int], next_proxy) -> list[tuple[int, list[
|
||||
|
||||
def extract(
|
||||
landing_dir: Path,
|
||||
year_month: str,
|
||||
year_month: str, # noqa: ARG001 — unused; tenants uses ISO week partition instead
|
||||
conn: sqlite3.Connection,
|
||||
session: niquests.Session,
|
||||
) -> dict:
|
||||
"""Fetch all Playtomic venues via global pagination. Returns run metrics."""
|
||||
year, month = year_month.split("/")
|
||||
dest_dir = landing_path(landing_dir, "playtomic", year, month)
|
||||
"""Fetch all Playtomic venues via global pagination. Returns run metrics.
|
||||
|
||||
Partitioned by ISO week (e.g. 2026/W09) so each weekly run produces a
|
||||
fresh file. _load_tenant_ids() in playtomic_availability globs across all
|
||||
partitions and picks the most recent one.
|
||||
"""
|
||||
today = datetime.now(UTC)
|
||||
year, month, day = today.strftime("%Y"), today.strftime("%m"), today.strftime("%d")
|
||||
dest_dir = landing_path(landing_dir, "playtomic", year, month, day)
|
||||
dest = dest_dir / "tenants.jsonl.gz"
|
||||
old_blob = dest_dir / "tenants.json.gz"
|
||||
if dest.exists() or old_blob.exists():
|
||||
logger.info("Already have tenants for %s/%s — skipping", year, month)
|
||||
if dest.exists():
|
||||
logger.info("Already have tenants for %s/%s/%s — skipping", year, month, day)
|
||||
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||
|
||||
proxy_urls = load_proxy_urls()
|
||||
next_proxy = make_round_robin_cycler(proxy_urls) if proxy_urls else None
|
||||
batch_size = len(proxy_urls) if proxy_urls else 1
|
||||
tiers = load_proxy_tiers()
|
||||
all_proxies = [url for tier in tiers for url in tier]
|
||||
next_proxy = make_round_robin_cycler(all_proxies) if all_proxies else None
|
||||
batch_size = len(all_proxies) if all_proxies else 1
|
||||
|
||||
if next_proxy:
|
||||
logger.info("Parallel mode: %d pages per batch (%d proxies)", batch_size, len(proxy_urls))
|
||||
logger.info("Parallel mode: %d pages per batch (%d proxies across %d tier(s))", batch_size, len(all_proxies), len(tiers))
|
||||
else:
|
||||
logger.info("Serial mode: 1 page at a time (no proxies)")
|
||||
|
||||
@@ -154,7 +161,7 @@ def extract(
|
||||
"files_written": 1,
|
||||
"files_skipped": 0,
|
||||
"bytes_written": bytes_written,
|
||||
"cursor_value": year_month,
|
||||
"cursor_value": f"{year}/{month}/{day}",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,41 +1,97 @@
|
||||
"""Optional proxy rotation for parallel HTTP fetching.
|
||||
|
||||
Proxies are configured via the PROXY_URLS environment variable (comma-separated).
|
||||
When unset, all functions return None/no-op — extractors fall back to direct requests.
|
||||
Proxies are configured via environment variables. When unset, all functions
|
||||
return None/no-op — extractors fall back to direct requests.
|
||||
|
||||
Tiered proxy with circuit breaker:
|
||||
Primary tier (PROXY_URLS) is used by default — typically cheap datacenter proxies.
|
||||
Fallback tier (PROXY_URLS_FALLBACK) activates once consecutive failures >= threshold.
|
||||
Once the circuit opens it stays open for the duration of the run (no auto-recovery).
|
||||
Three-tier escalation: free → datacenter → residential.
|
||||
Tier 1 (free): WEBSHARE_DOWNLOAD_URL — auto-fetched from Webshare API
|
||||
Tier 2 (datacenter): PROXY_URLS_DATACENTER — comma-separated paid DC proxies
|
||||
Tier 3 (residential): PROXY_URLS_RESIDENTIAL — comma-separated paid residential proxies
|
||||
|
||||
Tiered circuit breaker:
|
||||
Active tier is used until consecutive failures >= threshold, then escalates
|
||||
to the next tier. Once all tiers are exhausted, is_exhausted() returns True.
|
||||
Escalation is permanent for the duration of the run — no auto-recovery.
|
||||
"""
|
||||
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_WEBSHARE_PROXIES = 20
|
||||
WEBSHARE_FETCH_TIMEOUT_SECONDS = 10
|
||||
WEBSHARE_MAX_RESPONSE_BYTES = 1024 * 1024 # 1MB
|
||||
|
||||
def load_proxy_urls() -> list[str]:
|
||||
"""Read PROXY_URLS env var (comma-separated). Returns [] if unset.
|
||||
|
||||
Format: http://user:pass@host:port or socks5://host:port
|
||||
def fetch_webshare_proxies(download_url: str, max_proxies: int = MAX_WEBSHARE_PROXIES) -> list[str]:
|
||||
"""Fetch proxy list from the Webshare download API. Returns [] on any error.
|
||||
|
||||
Expected line format: ip:port:username:password
|
||||
Converts to: http://username:password@ip:port
|
||||
|
||||
Bounded: reads at most WEBSHARE_MAX_RESPONSE_BYTES, returns at most max_proxies.
|
||||
"""
|
||||
raw = os.environ.get("PROXY_URLS", "")
|
||||
urls = [u.strip() for u in raw.split(",") if u.strip()]
|
||||
assert max_proxies > 0, f"max_proxies must be positive, got {max_proxies}"
|
||||
assert download_url, "download_url must not be empty"
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
download_url,
|
||||
headers={"User-Agent": "padelnomics-extract/1.0"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=WEBSHARE_FETCH_TIMEOUT_SECONDS) as resp:
|
||||
raw = resp.read(WEBSHARE_MAX_RESPONSE_BYTES).decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch Webshare proxies: %s", e)
|
||||
return []
|
||||
|
||||
urls = []
|
||||
for line in raw.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split(":")
|
||||
if len(parts) != 4:
|
||||
logger.debug("Skipping malformed proxy line: %r", line)
|
||||
continue
|
||||
ip, port, username, password = parts
|
||||
urls.append(f"http://{username}:{password}@{ip}:{port}")
|
||||
if len(urls) >= max_proxies:
|
||||
break
|
||||
|
||||
logger.info("Fetched %d proxies from Webshare", len(urls))
|
||||
return urls
|
||||
|
||||
|
||||
def load_fallback_proxy_urls() -> list[str]:
|
||||
"""Read PROXY_URLS_FALLBACK env var (comma-separated). Returns [] if unset.
|
||||
def load_proxy_tiers() -> list[list[str]]:
|
||||
"""Assemble proxy tiers in escalation order: free → datacenter → residential.
|
||||
|
||||
Used as the residential/reliable fallback tier when the primary tier fails.
|
||||
Format: http://user:pass@host:port or socks5://host:port
|
||||
Tier 1 (free): fetched from WEBSHARE_DOWNLOAD_URL if set.
|
||||
Tier 2 (datacenter): PROXY_URLS_DATACENTER (comma-separated).
|
||||
Tier 3 (residential): PROXY_URLS_RESIDENTIAL (comma-separated).
|
||||
|
||||
Empty tiers are omitted. Returns [] if no proxies configured anywhere.
|
||||
"""
|
||||
raw = os.environ.get("PROXY_URLS_FALLBACK", "")
|
||||
urls = [u.strip() for u in raw.split(",") if u.strip()]
|
||||
return urls
|
||||
tiers: list[list[str]] = []
|
||||
|
||||
webshare_url = os.environ.get("WEBSHARE_DOWNLOAD_URL", "").strip()
|
||||
if webshare_url:
|
||||
free_proxies = fetch_webshare_proxies(webshare_url)
|
||||
if free_proxies:
|
||||
tiers.append(free_proxies)
|
||||
|
||||
for var in ("PROXY_URLS_DATACENTER", "PROXY_URLS_RESIDENTIAL"):
|
||||
raw = os.environ.get(var, "")
|
||||
urls = [u.strip() for u in raw.split(",") if u.strip()]
|
||||
if urls:
|
||||
tiers.append(urls)
|
||||
|
||||
return tiers
|
||||
|
||||
|
||||
def make_round_robin_cycler(proxy_urls: list[str]):
|
||||
@@ -78,83 +134,96 @@ def make_sticky_selector(proxy_urls: list[str]):
|
||||
return select_proxy
|
||||
|
||||
|
||||
def make_tiered_cycler(
|
||||
primary_urls: list[str],
|
||||
fallback_urls: list[str],
|
||||
threshold: int,
|
||||
) -> dict:
|
||||
"""Thread-safe tiered proxy cycler with circuit breaker.
|
||||
def make_tiered_cycler(tiers: list[list[str]], threshold: int) -> dict:
|
||||
"""Thread-safe N-tier proxy cycler with circuit breaker.
|
||||
|
||||
Uses primary_urls until consecutive failures >= threshold, then switches
|
||||
permanently to fallback_urls for the rest of the run. No auto-recovery —
|
||||
once the circuit opens it stays open to avoid flapping.
|
||||
Uses tiers[0] until consecutive failures >= threshold, then escalates
|
||||
to tiers[1], then tiers[2], etc. Once all tiers are exhausted,
|
||||
is_exhausted() returns True and next_proxy() returns None.
|
||||
|
||||
Failure counter resets on each escalation — the new tier gets a fresh start.
|
||||
Once exhausted, further record_failure() calls are no-ops.
|
||||
|
||||
Returns a dict of callables:
|
||||
next_proxy() -> str | None — returns URL from the active tier
|
||||
record_success() — resets consecutive failure counter
|
||||
record_failure() -> bool — increments counter; True if circuit just opened
|
||||
is_fallback_active() -> bool — whether fallback tier is currently active
|
||||
next_proxy() -> str | None — URL from the active tier, or None
|
||||
record_success() -> None — resets consecutive failure counter
|
||||
record_failure() -> bool — True if just escalated to next tier
|
||||
is_exhausted() -> bool — True if all tiers exhausted
|
||||
active_tier_index() -> int — 0-based index of current tier
|
||||
tier_count() -> int — total number of tiers
|
||||
|
||||
If primary_urls is empty: always returns from fallback_urls (no circuit breaker needed).
|
||||
If both are empty: next_proxy() always returns None.
|
||||
Edge cases:
|
||||
Empty tiers list: next_proxy() always returns None, is_exhausted() True.
|
||||
Single tier: behaves like the primary-only case, is_exhausted() after threshold.
|
||||
"""
|
||||
assert threshold > 0, f"threshold must be positive, got {threshold}"
|
||||
assert isinstance(tiers, list), f"tiers must be a list, got {type(tiers)}"
|
||||
|
||||
lock = threading.Lock()
|
||||
cycles = [itertools.cycle(t) for t in tiers]
|
||||
state = {
|
||||
"active_tier": 0,
|
||||
"consecutive_failures": 0,
|
||||
"fallback_active": False,
|
||||
}
|
||||
|
||||
primary_cycle = itertools.cycle(primary_urls) if primary_urls else None
|
||||
fallback_cycle = itertools.cycle(fallback_urls) if fallback_urls else None
|
||||
|
||||
# No primary proxies — skip circuit breaker, use fallback directly
|
||||
if not primary_urls:
|
||||
state["fallback_active"] = True
|
||||
|
||||
def next_proxy() -> str | None:
|
||||
with lock:
|
||||
if state["fallback_active"]:
|
||||
return next(fallback_cycle) if fallback_cycle else None
|
||||
return next(primary_cycle) if primary_cycle else None
|
||||
idx = state["active_tier"]
|
||||
if idx >= len(cycles):
|
||||
return None
|
||||
return next(cycles[idx])
|
||||
|
||||
def record_success() -> None:
|
||||
with lock:
|
||||
state["consecutive_failures"] = 0
|
||||
|
||||
def record_failure() -> bool:
|
||||
"""Increment failure counter. Returns True if circuit just opened."""
|
||||
"""Increment failure counter. Returns True if just escalated to next tier."""
|
||||
with lock:
|
||||
if state["fallback_active"]:
|
||||
# Already on fallback — don't trip the circuit again
|
||||
idx = state["active_tier"]
|
||||
if idx >= len(tiers):
|
||||
# Already exhausted — no-op
|
||||
return False
|
||||
state["consecutive_failures"] += 1
|
||||
if state["consecutive_failures"] >= threshold:
|
||||
state["fallback_active"] = True
|
||||
if fallback_urls:
|
||||
logger.warning(
|
||||
"Circuit open after %d consecutive failures — "
|
||||
"switching to fallback residential proxies",
|
||||
state["consecutive_failures"],
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"Circuit open after %d consecutive failures — "
|
||||
"no fallback configured, aborting run",
|
||||
state["consecutive_failures"],
|
||||
)
|
||||
return True
|
||||
return False
|
||||
if state["consecutive_failures"] < threshold:
|
||||
return False
|
||||
# Threshold reached — escalate
|
||||
state["consecutive_failures"] = 0
|
||||
state["active_tier"] += 1
|
||||
new_idx = state["active_tier"]
|
||||
if new_idx < len(tiers):
|
||||
logger.warning(
|
||||
"Circuit open after %d consecutive failures — "
|
||||
"escalating to proxy tier %d/%d",
|
||||
threshold,
|
||||
new_idx + 1,
|
||||
len(tiers),
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"All %d proxy tier(s) exhausted after %d consecutive failures — "
|
||||
"no more fallbacks",
|
||||
len(tiers),
|
||||
threshold,
|
||||
)
|
||||
return True
|
||||
|
||||
def is_fallback_active() -> bool:
|
||||
def is_exhausted() -> bool:
|
||||
with lock:
|
||||
return state["fallback_active"]
|
||||
return state["active_tier"] >= len(tiers)
|
||||
|
||||
def active_tier_index() -> int:
|
||||
with lock:
|
||||
return state["active_tier"]
|
||||
|
||||
def tier_count() -> int:
|
||||
return len(tiers)
|
||||
|
||||
return {
|
||||
"next_proxy": next_proxy,
|
||||
"record_success": record_success,
|
||||
"record_failure": record_failure,
|
||||
"is_fallback_active": is_fallback_active,
|
||||
"is_exhausted": is_exhausted,
|
||||
"active_tier_index": active_tier_index,
|
||||
"tier_count": tier_count,
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ set -euo pipefail
|
||||
|
||||
SERVICE_USER="padelnomics_service"
|
||||
REPO_DIR="/opt/padelnomics"
|
||||
GITLAB_PROJECT="deemanone/padelnomics"
|
||||
GITEA_REPO="ssh://git@git.padelnomics.io:2222/deemanone/padelnomics.git"
|
||||
UV="/home/${SERVICE_USER}/.local/bin/uv"
|
||||
|
||||
[ "$(id -u)" = "0" ] || { echo "ERROR: Run as root"; exit 1; }
|
||||
@@ -35,7 +35,7 @@ if [ -d "${REPO_DIR}/.git" ]; then
|
||||
sudo -u "${SERVICE_USER}" git -C "${REPO_DIR}" fetch --tags --prune-tags origin
|
||||
else
|
||||
sudo -u "${SERVICE_USER}" git clone \
|
||||
"git@gitlab.com:${GITLAB_PROJECT}.git" "${REPO_DIR}"
|
||||
"${GITEA_REPO}" "${REPO_DIR}"
|
||||
fi
|
||||
|
||||
LATEST_TAG=$(sudo -u "${SERVICE_USER}" \
|
||||
|
||||
@@ -40,7 +40,7 @@ fi
|
||||
|
||||
log "Creating directories..."
|
||||
mkdir -p "${APP_DIR}" "${DATA_DIR}/landing"
|
||||
chown "${SERVICE_USER}:${SERVICE_USER}" "${APP_DIR}"
|
||||
chown -R "${SERVICE_USER}:${SERVICE_USER}" "${APP_DIR}"
|
||||
chown -R "${SERVICE_USER}:${SERVICE_USER}" "${DATA_DIR}"
|
||||
|
||||
# ── Docker ────────────────────────────────────────────────────────────────────
|
||||
@@ -75,7 +75,8 @@ fi
|
||||
|
||||
if [ ! -f "${SSH_DIR}/config" ]; then
|
||||
cat > "${SSH_DIR}/config" <<EOF
|
||||
Host gitlab.com
|
||||
Host git.padelnomics.io
|
||||
Port 2222
|
||||
IdentityFile ${DEPLOY_KEY}
|
||||
IdentitiesOnly yes
|
||||
EOF
|
||||
@@ -83,7 +84,7 @@ EOF
|
||||
chmod 600 "${SSH_DIR}/config"
|
||||
fi
|
||||
|
||||
ssh-keyscan -H gitlab.com >> "${SSH_DIR}/known_hosts" 2>/dev/null
|
||||
ssh-keyscan -H -p 2222 git.padelnomics.io >> "${SSH_DIR}/known_hosts" 2>/dev/null
|
||||
sort -u "${SSH_DIR}/known_hosts" -o "${SSH_DIR}/known_hosts"
|
||||
chown "${SERVICE_USER}:${SERVICE_USER}" "${SSH_DIR}/known_hosts"
|
||||
chmod 644 "${SSH_DIR}/known_hosts"
|
||||
|
||||
@@ -23,7 +23,7 @@ schedule = "monthly"
|
||||
|
||||
[playtomic_tenants]
|
||||
module = "padelnomics_extract.playtomic_tenants"
|
||||
schedule = "weekly"
|
||||
schedule = "daily"
|
||||
|
||||
[playtomic_availability]
|
||||
module = "padelnomics_extract.playtomic_availability"
|
||||
|
||||
860
scratch/lineage-ux-prototype.html
Normal file
860
scratch/lineage-ux-prototype.html
Normal file
@@ -0,0 +1,860 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Lineage UX Prototype</title>
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link href="https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;600&family=DM+Mono:wght@400;500&display=swap" rel="stylesheet">
|
||||
<style>
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
:root {
|
||||
--green-bg: #F0FDF4; --green-border: #BBF7D0; --green-accent: #16A34A;
|
||||
--green-fill: #DCFCE7; --green-text: #14532D;
|
||||
--blue-bg: #EFF6FF; --blue-border: #BFDBFE; --blue-accent: #1D4ED8;
|
||||
--blue-fill: #DBEAFE; --blue-text: #1E3A8A;
|
||||
--amber-bg: #FFFBEB; --amber-border: #FDE68A; --amber-accent: #D97706;
|
||||
--amber-fill: #FEF3C7; --amber-text: #78350F;
|
||||
--slate-50: #F8FAFC; --slate-100: #F1F5F9; --slate-200: #E2E8F0;
|
||||
--slate-400: #94A3B8; --slate-500: #64748B; --slate-700: #334155;
|
||||
--slate-800: #1E293B; --slate-900: #0F172A;
|
||||
--panel-w: 340px;
|
||||
--font-sans: 'DM Sans', ui-sans-serif, system-ui, sans-serif;
|
||||
--font-mono: 'DM Mono', ui-monospace, monospace;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: var(--font-sans);
|
||||
background: var(--slate-50);
|
||||
color: var(--slate-800);
|
||||
min-height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
/* ── Page shell ── */
|
||||
.page-header {
|
||||
background: white;
|
||||
border-bottom: 1px solid var(--slate-200);
|
||||
padding: 0.75rem 1.5rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
}
|
||||
.page-header h1 { font-size: 0.875rem; font-weight: 600; color: var(--slate-800); }
|
||||
.badge {
|
||||
font-size: 0.6875rem; font-weight: 500; padding: 2px 7px;
|
||||
border-radius: 99px; background: var(--slate-100); color: var(--slate-500);
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
.hint {
|
||||
font-size: 0.75rem; color: var(--slate-400); margin-left: auto;
|
||||
}
|
||||
|
||||
.workspace {
|
||||
display: flex;
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
/* ── DAG canvas ── */
|
||||
.canvas-wrap {
|
||||
flex: 1;
|
||||
overflow: auto;
|
||||
padding: 1.5rem;
|
||||
transition: margin-right 0.22s cubic-bezier(0.4,0,0.2,1);
|
||||
}
|
||||
.canvas-wrap.panel-open { margin-right: var(--panel-w); }
|
||||
|
||||
.card {
|
||||
background: white;
|
||||
border: 1px solid var(--slate-200);
|
||||
border-radius: 10px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.card-header {
|
||||
padding: 0.75rem 1rem;
|
||||
font-size: 0.8125rem;
|
||||
font-weight: 600;
|
||||
border-bottom: 1px solid var(--slate-100);
|
||||
color: var(--slate-700);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.svg-wrap { padding: 1rem 0.75rem 0.75rem; overflow-x: auto; }
|
||||
|
||||
/* ── Lineage SVG node/edge states ── */
|
||||
.lineage-node { cursor: pointer; }
|
||||
.lineage-node rect:first-child { transition: filter 0.1s; }
|
||||
.lineage-node:hover rect:first-child { filter: brightness(0.93); }
|
||||
.lineage-node.selected rect:first-child { filter: brightness(0.88); }
|
||||
.lineage-edge { transition: stroke 0.12s, stroke-width 0.12s, opacity 0.12s; }
|
||||
.lineage-edge.hi { stroke: #1D4ED8 !important; stroke-width: 2 !important; opacity: 1 !important; }
|
||||
.lineage-edge.dim { opacity: 0.1; }
|
||||
|
||||
/* ── Hover tooltip ── */
|
||||
#tooltip {
|
||||
position: fixed;
|
||||
z-index: 100;
|
||||
pointer-events: none;
|
||||
opacity: 0;
|
||||
transition: opacity 0.1s;
|
||||
filter: drop-shadow(0 4px 12px rgba(0,0,0,0.12));
|
||||
}
|
||||
#tooltip.visible { opacity: 1; }
|
||||
.tt-box {
|
||||
background: var(--slate-900);
|
||||
color: white;
|
||||
border-radius: 8px;
|
||||
padding: 10px 12px;
|
||||
min-width: 200px;
|
||||
max-width: 260px;
|
||||
}
|
||||
.tt-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
margin-bottom: 8px;
|
||||
padding-bottom: 7px;
|
||||
border-bottom: 1px solid rgba(255,255,255,0.08);
|
||||
}
|
||||
.tt-name {
|
||||
font-family: var(--font-mono);
|
||||
font-size: 0.6875rem;
|
||||
font-weight: 500;
|
||||
color: white;
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.tt-layer {
|
||||
font-size: 0.5625rem;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.08em;
|
||||
padding: 2px 6px;
|
||||
border-radius: 4px;
|
||||
flex-shrink: 0;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.tt-layer.staging { background: rgba(22,163,74,0.25); color: #86EFAC; }
|
||||
.tt-layer.foundation { background: rgba(29,78,216,0.3); color: #93C5FD; }
|
||||
.tt-layer.serving { background: rgba(217,119,6,0.25); color: #FCD34D; }
|
||||
|
||||
.tt-row {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 6px;
|
||||
padding: 2px 0;
|
||||
font-size: 0.6875rem;
|
||||
}
|
||||
.tt-col { font-family: var(--font-mono); color: #CBD5E1; flex: 1; min-width: 0; overflow: hidden; text-overflow: ellipsis; }
|
||||
.tt-type { font-family: var(--font-mono); color: #64748B; font-size: 0.625rem; flex-shrink: 0; }
|
||||
.tt-more { font-size: 0.625rem; color: #64748B; margin-top: 5px; padding-top: 5px; border-top: 1px solid rgba(255,255,255,0.06); }
|
||||
|
||||
/* ── Detail panel ── */
|
||||
#detail-panel {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
width: var(--panel-w);
|
||||
background: white;
|
||||
border-left: 1px solid var(--slate-200);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
transform: translateX(100%);
|
||||
transition: transform 0.22s cubic-bezier(0.4,0,0.2,1);
|
||||
z-index: 50;
|
||||
overflow: hidden;
|
||||
}
|
||||
#detail-panel.open { transform: translateX(0); }
|
||||
|
||||
.panel-top {
|
||||
padding: 0.875rem 1rem;
|
||||
border-bottom: 1px solid var(--slate-100);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.375rem;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
.panel-title-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.panel-model-name {
|
||||
font-family: var(--font-mono);
|
||||
font-size: 0.8125rem;
|
||||
font-weight: 500;
|
||||
color: var(--slate-800);
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.panel-close {
|
||||
background: none;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
color: var(--slate-400);
|
||||
padding: 2px;
|
||||
border-radius: 4px;
|
||||
line-height: 1;
|
||||
font-size: 1rem;
|
||||
transition: color 0.1s, background 0.1s;
|
||||
}
|
||||
.panel-close:hover { color: var(--slate-700); background: var(--slate-100); }
|
||||
|
||||
.panel-meta {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.6875rem;
|
||||
color: var(--slate-500);
|
||||
}
|
||||
.meta-chip {
|
||||
padding: 2px 7px;
|
||||
border-radius: 99px;
|
||||
font-size: 0.625rem;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.05em;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.meta-chip.staging { background: var(--green-fill); color: var(--green-text); border: 1px solid var(--green-border); }
|
||||
.meta-chip.foundation { background: var(--blue-fill); color: var(--blue-text); border: 1px solid var(--blue-border); }
|
||||
.meta-chip.serving { background: var(--amber-fill); color: var(--amber-text); border: 1px solid var(--amber-border); }
|
||||
|
||||
.panel-body {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.panel-section {
|
||||
border-bottom: 1px solid var(--slate-100);
|
||||
padding: 0.75rem 1rem;
|
||||
}
|
||||
.panel-section:last-child { border-bottom: none; }
|
||||
.section-label {
|
||||
font-size: 0.625rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.08em;
|
||||
text-transform: uppercase;
|
||||
color: var(--slate-400);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
/* Schema table */
|
||||
.schema-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 0.6875rem;
|
||||
}
|
||||
.schema-table th {
|
||||
text-align: left;
|
||||
font-weight: 600;
|
||||
color: var(--slate-500);
|
||||
padding: 0 0 0.375rem;
|
||||
font-size: 0.625rem;
|
||||
letter-spacing: 0.04em;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.schema-table td {
|
||||
padding: 3px 0;
|
||||
vertical-align: middle;
|
||||
}
|
||||
.schema-table tr + tr td { border-top: 1px solid var(--slate-100); }
|
||||
.col-name { font-family: var(--font-mono); color: var(--slate-800); font-weight: 500; }
|
||||
.col-type { font-family: var(--font-mono); color: var(--slate-400); font-size: 0.625rem; }
|
||||
.col-null { font-size: 0.5625rem; color: var(--slate-300); text-align: right; }
|
||||
.col-null.yes { color: var(--amber-accent); }
|
||||
|
||||
/* Row count */
|
||||
.stat-row {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 0.375rem;
|
||||
}
|
||||
.stat-val {
|
||||
font-family: var(--font-mono);
|
||||
font-size: 1rem;
|
||||
font-weight: 500;
|
||||
color: var(--slate-800);
|
||||
}
|
||||
.stat-unit { font-size: 0.6875rem; color: var(--slate-500); }
|
||||
|
||||
/* Dep lists */
|
||||
.dep-list { display: flex; flex-direction: column; gap: 3px; }
|
||||
.dep-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
padding: 4px 7px;
|
||||
border-radius: 5px;
|
||||
cursor: pointer;
|
||||
transition: background 0.1s;
|
||||
font-family: var(--font-mono);
|
||||
font-size: 0.6875rem;
|
||||
color: var(--slate-700);
|
||||
}
|
||||
.dep-item:hover { background: var(--slate-50); }
|
||||
.dep-dot {
|
||||
width: 6px; height: 6px; border-radius: 50%; flex-shrink: 0;
|
||||
}
|
||||
.dep-dot.staging { background: var(--green-accent); }
|
||||
.dep-dot.foundation { background: var(--blue-accent); }
|
||||
.dep-dot.serving { background: var(--amber-accent); }
|
||||
|
||||
.empty-state { color: var(--slate-400); font-size: 0.6875rem; font-style: italic; }
|
||||
|
||||
/* ── "Click to explore" annotation ── */
|
||||
.interaction-hint {
|
||||
font-size: 0.6875rem; color: var(--slate-400);
|
||||
display: flex; align-items: center; gap: 4px;
|
||||
}
|
||||
.kbd {
|
||||
display: inline-flex; align-items: center; justify-content: center;
|
||||
background: var(--slate-100); border: 1px solid var(--slate-200);
|
||||
border-radius: 3px; padding: 1px 5px;
|
||||
font-family: var(--font-mono); font-size: 0.5625rem;
|
||||
color: var(--slate-600); line-height: 1.4;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="page-header">
|
||||
<h1>Data Lineage</h1>
|
||||
<span class="badge">26 models</span>
|
||||
<span class="badge">staging → foundation → serving</span>
|
||||
<span class="hint interaction-hint">
|
||||
hover to preview schema · click to inspect
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="workspace">
|
||||
<div class="canvas-wrap" id="canvas-wrap">
|
||||
<div class="card">
|
||||
<div class="svg-wrap" id="svg-wrap">
|
||||
<!-- SVG injected by JS below -->
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Detail panel -->
|
||||
<div id="detail-panel">
|
||||
<div class="panel-top">
|
||||
<div class="panel-title-row">
|
||||
<span class="panel-model-name" id="panel-model-name">—</span>
|
||||
<button class="panel-close" id="panel-close" title="Close">✕</button>
|
||||
</div>
|
||||
<div class="panel-meta">
|
||||
<span class="meta-chip" id="panel-layer-chip">—</span>
|
||||
<span id="panel-materialization">view</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="panel-body" id="panel-body">
|
||||
<!-- injected by JS -->
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Floating tooltip -->
|
||||
<div id="tooltip">
|
||||
<div class="tt-box" id="tt-box"></div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// ── Mock data (would come from DuckDB DESCRIBE in production) ──────────────
|
||||
const SCHEMA = {
|
||||
stg_padel_courts: {
|
||||
layer: 'staging', materialization: 'view',
|
||||
rows: null,
|
||||
columns: [
|
||||
{ name: 'court_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'name', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'lat', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'lon', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'country_code', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'source_file', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'extracted_at', type: 'TIMESTAMP', nullable: false },
|
||||
],
|
||||
},
|
||||
stg_playtomic_venues: {
|
||||
layer: 'staging', materialization: 'view', rows: null,
|
||||
columns: [
|
||||
{ name: 'tenant_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'name', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'city', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'country_code', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'lat', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'lon', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'sport_ids', type: 'VARCHAR[]', nullable: true },
|
||||
],
|
||||
},
|
||||
stg_playtomic_resources: {
|
||||
layer: 'staging', materialization: 'view', rows: null,
|
||||
columns: [
|
||||
{ name: 'resource_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'tenant_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'name', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'sport_id', type: 'VARCHAR', nullable: true },
|
||||
],
|
||||
},
|
||||
stg_playtomic_availability: {
|
||||
layer: 'staging', materialization: 'view', rows: null,
|
||||
columns: [
|
||||
{ name: 'slot_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'tenant_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'resource_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'start_at', type: 'TIMESTAMP', nullable: false },
|
||||
{ name: 'duration_min', type: 'INTEGER', nullable: false },
|
||||
{ name: 'price_cents', type: 'INTEGER', nullable: true },
|
||||
{ name: 'currency', type: 'VARCHAR', nullable: true },
|
||||
],
|
||||
},
|
||||
stg_population: {
|
||||
layer: 'staging', materialization: 'view', rows: null,
|
||||
columns: [
|
||||
{ name: 'city_code', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'city_name', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'population', type: 'INTEGER', nullable: true },
|
||||
{ name: 'year', type: 'INTEGER', nullable: false },
|
||||
],
|
||||
},
|
||||
dim_venues: {
|
||||
layer: 'foundation', materialization: 'table', rows: 4821,
|
||||
columns: [
|
||||
{ name: 'venue_hk', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'tenant_id', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'overpass_id', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'name', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'country_code', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'city', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'lat', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'lon', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'court_count', type: 'INTEGER', nullable: true },
|
||||
{ name: 'has_playtomic', type: 'BOOLEAN', nullable: false },
|
||||
{ name: 'loaded_at', type: 'TIMESTAMP', nullable: false },
|
||||
],
|
||||
},
|
||||
dim_cities: {
|
||||
layer: 'foundation', materialization: 'table', rows: 1203,
|
||||
columns: [
|
||||
{ name: 'city_hk', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'city_name', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'country_code', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'population', type: 'INTEGER', nullable: true },
|
||||
{ name: 'nuts2_code', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'income_eur', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'venue_count', type: 'INTEGER', nullable: false },
|
||||
{ name: 'lat', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'lon', type: 'DOUBLE', nullable: true },
|
||||
],
|
||||
},
|
||||
dim_venue_capacity: {
|
||||
layer: 'foundation', materialization: 'table', rows: 4812,
|
||||
columns: [
|
||||
{ name: 'tenant_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'court_count', type: 'INTEGER', nullable: false },
|
||||
{ name: 'open_hours_wday',type: 'DOUBLE', nullable: true },
|
||||
{ name: 'open_hours_wend',type: 'DOUBLE', nullable: true },
|
||||
],
|
||||
},
|
||||
fct_daily_availability: {
|
||||
layer: 'foundation', materialization: 'table', rows: 382104,
|
||||
columns: [
|
||||
{ name: 'date', type: 'DATE', nullable: false },
|
||||
{ name: 'tenant_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'resource_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'slots_total', type: 'INTEGER', nullable: false },
|
||||
{ name: 'slots_booked', type: 'INTEGER', nullable: false },
|
||||
{ name: 'occupancy_rate', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'avg_price_eur', type: 'DOUBLE', nullable: true },
|
||||
],
|
||||
},
|
||||
venue_pricing_benchmarks: {
|
||||
layer: 'serving', materialization: 'table', rows: 4201,
|
||||
columns: [
|
||||
{ name: 'tenant_id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'p25_price_eur', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'p50_price_eur', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'p75_price_eur', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'peak_price_eur', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'avg_occupancy', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'sample_days', type: 'INTEGER', nullable: false },
|
||||
],
|
||||
},
|
||||
city_market_profile: {
|
||||
layer: 'serving', materialization: 'table', rows: 987,
|
||||
columns: [
|
||||
{ name: 'city_hk', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'city_name', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'country_code', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'venue_count', type: 'INTEGER', nullable: false },
|
||||
{ name: 'avg_price_eur', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'median_occ_rate', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'population', type: 'INTEGER', nullable: true },
|
||||
{ name: 'income_eur', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'opportunity_score',type:'DOUBLE', nullable: true },
|
||||
],
|
||||
},
|
||||
pseo_city_costs_de: {
|
||||
layer: 'serving', materialization: 'table', rows: 847,
|
||||
columns: [
|
||||
{ name: 'city_slug', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'city_name_de', type: 'VARCHAR', nullable: true },
|
||||
{ name: 'avg_build_cost', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'land_cost_m2', type: 'DOUBLE', nullable: true },
|
||||
{ name: 'venue_count', type: 'INTEGER', nullable: false },
|
||||
{ name: 'opportunity_score',type:'DOUBLE', nullable: true },
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
// fallback for models not in mock
|
||||
function getSchema(model) {
|
||||
if (SCHEMA[model]) return SCHEMA[model];
|
||||
const layer = model.startsWith('stg_') ? 'staging'
|
||||
: (model.startsWith('dim_') || model.startsWith('fct_')) ? 'foundation'
|
||||
: 'serving';
|
||||
return { layer, materialization: 'view', rows: null,
|
||||
columns: [
|
||||
{ name: 'id', type: 'VARCHAR', nullable: false },
|
||||
{ name: 'created_at', type: 'TIMESTAMP', nullable: false },
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
// ── DAG definition ────────────────────────────────────────────────────────
|
||||
const DAG = {
|
||||
stg_padel_courts: [],
|
||||
stg_playtomic_venues: [],
|
||||
stg_playtomic_resources: [],
|
||||
stg_playtomic_opening_hours: [],
|
||||
stg_playtomic_availability: [],
|
||||
stg_population: [],
|
||||
stg_population_usa: [],
|
||||
stg_population_uk: [],
|
||||
stg_population_geonames: [],
|
||||
stg_income: [],
|
||||
stg_income_usa: [],
|
||||
stg_city_labels: [],
|
||||
stg_nuts2_boundaries: [],
|
||||
stg_regional_income: [],
|
||||
stg_tennis_courts: [],
|
||||
dim_venues: ['stg_playtomic_venues','stg_playtomic_resources','stg_padel_courts'],
|
||||
dim_cities: ['dim_venues','stg_income','stg_city_labels','stg_population','stg_population_usa','stg_population_uk','stg_population_geonames'],
|
||||
dim_locations: ['stg_population_geonames','stg_income','stg_nuts2_boundaries','stg_regional_income','stg_income_usa','stg_padel_courts','stg_tennis_courts'],
|
||||
dim_venue_capacity: ['stg_playtomic_venues','stg_playtomic_resources','stg_playtomic_opening_hours'],
|
||||
fct_availability_slot: ['stg_playtomic_availability'],
|
||||
fct_daily_availability: ['fct_availability_slot','dim_venue_capacity'],
|
||||
venue_pricing_benchmarks: ['fct_daily_availability'],
|
||||
city_market_profile: ['dim_cities','venue_pricing_benchmarks'],
|
||||
planner_defaults: ['venue_pricing_benchmarks','city_market_profile'],
|
||||
location_opportunity_profile: ['dim_locations'],
|
||||
pseo_city_costs_de: ['city_market_profile','planner_defaults','location_opportunity_profile'],
|
||||
pseo_city_pricing: ['venue_pricing_benchmarks','city_market_profile'],
|
||||
pseo_country_overview: ['pseo_city_costs_de'],
|
||||
};
|
||||
|
||||
// Compute downstream map
|
||||
const DOWNSTREAM = {};
|
||||
Object.keys(DAG).forEach(n => DOWNSTREAM[n] = []);
|
||||
Object.entries(DAG).forEach(([name, deps]) => {
|
||||
deps.forEach(dep => {
|
||||
if (!DOWNSTREAM[dep]) DOWNSTREAM[dep] = [];
|
||||
DOWNSTREAM[dep].push(name);
|
||||
});
|
||||
});
|
||||
|
||||
function classifyLayer(name) {
|
||||
if (name.startsWith('stg_')) return 'staging';
|
||||
if (name.startsWith('dim_') || name.startsWith('fct_')) return 'foundation';
|
||||
return 'serving';
|
||||
}
|
||||
|
||||
// ── SVG rendering (mirrors Python logic) ─────────────────────────────────
|
||||
const COLORS = {
|
||||
staging: { bg:'#F0FDF4', border:'#BBF7D0', accent:'#16A34A', fill:'#DCFCE7', text:'#14532D' },
|
||||
foundation: { bg:'#EFF6FF', border:'#BFDBFE', accent:'#1D4ED8', fill:'#DBEAFE', text:'#1E3A8A' },
|
||||
serving: { bg:'#FFFBEB', border:'#FDE68A', accent:'#D97706', fill:'#FEF3C7', text:'#78350F' },
|
||||
};
|
||||
const LANE_ORDER = ['staging','foundation','serving'];
|
||||
const LANE_LABELS = { staging:'STAGING', foundation:'FOUNDATION', serving:'SERVING' };
|
||||
|
||||
function buildSVG() {
|
||||
const CW = 7.4, PAD_H = 10, NH = 26, VGAP = 10, PAD_TOP = 52, PAD_BOT = 24;
|
||||
const INNER_W = 210, LANE_GAP = 40, LANE_PAD_L = 16;
|
||||
|
||||
// downstream counts
|
||||
const dnCount = {};
|
||||
Object.keys(DAG).forEach(n => dnCount[n] = 0);
|
||||
Object.values(DAG).forEach(deps => deps.forEach(d => dnCount[d] = (dnCount[d]||0)+1));
|
||||
|
||||
const layers = { staging:[], foundation:[], serving:[] };
|
||||
Object.keys(DAG).forEach(n => layers[classifyLayer(n)].push(n));
|
||||
LANE_ORDER.forEach(l => layers[l].sort((a,b) => (dnCount[b]||0)-(dnCount[a]||0)||a.localeCompare(b)));
|
||||
|
||||
const nodeW = n => Math.max(n.length * CW + PAD_H*2, 80);
|
||||
|
||||
const laneX = {};
|
||||
let xc = 0;
|
||||
LANE_ORDER.forEach(l => { laneX[l] = xc; xc += INNER_W + LANE_PAD_L*2 + LANE_GAP; });
|
||||
|
||||
const pos = {};
|
||||
const laneH = {};
|
||||
LANE_ORDER.forEach(l => {
|
||||
let y = PAD_TOP;
|
||||
layers[l].forEach(n => { pos[n] = [laneX[l]+LANE_PAD_L, y]; y += NH+VGAP; });
|
||||
laneH[l] = y + PAD_BOT - VGAP;
|
||||
});
|
||||
|
||||
const W = xc - LANE_GAP;
|
||||
const H = Math.max(...Object.values(laneH));
|
||||
|
||||
let parts = [];
|
||||
|
||||
parts.push(`<defs>
|
||||
<marker id="arr" markerWidth="6" markerHeight="6" refX="5" refY="3" orient="auto">
|
||||
<path d="M0,0 L0,6 L6,3 z" fill="#CBD5E1"/>
|
||||
</marker>
|
||||
<marker id="arr-hi" markerWidth="6" markerHeight="6" refX="5" refY="3" orient="auto">
|
||||
<path d="M0,0 L0,6 L6,3 z" fill="#1D4ED8"/>
|
||||
</marker>
|
||||
</defs>`);
|
||||
|
||||
LANE_ORDER.forEach(l => {
|
||||
const c = COLORS[l], lx = laneX[l], lw = INNER_W+LANE_PAD_L*2, lh = laneH[l];
|
||||
parts.push(`<rect x="${lx}" y="0" width="${lw}" height="${lh}" rx="10" fill="${c.bg}" stroke="${c.border}" stroke-width="1"/>`);
|
||||
parts.push(`<text x="${lx+lw/2}" y="28" text-anchor="middle" font-family="'DM Sans',sans-serif" font-size="10" font-weight="700" letter-spacing="1.5" fill="${c.accent}">${LANE_LABELS[l]}</text>`);
|
||||
parts.push(`<line x1="${lx+12}" y1="36" x2="${lx+lw-12}" y2="36" stroke="${c.border}" stroke-width="1"/>`);
|
||||
});
|
||||
|
||||
// Edges
|
||||
Object.entries(DAG).forEach(([name, deps]) => {
|
||||
const [tx, ty] = pos[name];
|
||||
const tgt_cx = tx, tgt_cy = ty + NH/2;
|
||||
deps.forEach(dep => {
|
||||
if (!pos[dep]) return;
|
||||
const [sx, sy] = pos[dep];
|
||||
const sw = nodeW(dep);
|
||||
const src_cx = sx+sw, src_cy = sy+NH/2;
|
||||
const cpx1 = src_cx+(tgt_cx-src_cx)*0.45;
|
||||
const cpx2 = tgt_cx-(tgt_cx-src_cx)*0.45;
|
||||
parts.push(`<path class="lineage-edge" data-from="${dep}" data-to="${name}" d="M${src_cx},${src_cy} C${cpx1},${src_cy} ${cpx2},${tgt_cy} ${tgt_cx},${tgt_cy}" fill="none" stroke="#CBD5E1" stroke-width="1" marker-end="url(#arr)"/>`);
|
||||
});
|
||||
});
|
||||
|
||||
// Nodes
|
||||
Object.keys(DAG).forEach(name => {
|
||||
const l = classifyLayer(name);
|
||||
const c = COLORS[l];
|
||||
const [rx, ry] = pos[name];
|
||||
const rw = nodeW(name);
|
||||
const tx = rx+PAD_H, ty = ry+NH/2+4;
|
||||
parts.push(`<g class="lineage-node" data-model="${name}" tabindex="0" role="button" aria-label="${name}">
|
||||
<rect x="${rx}" y="${ry}" width="${rw}" height="${NH}" rx="5" fill="${c.fill}" stroke="${c.border}" stroke-width="1"/>
|
||||
<rect x="${rx}" y="${ry}" width="3" height="${NH}" rx="5" fill="${c.accent}"/>
|
||||
<text x="${tx}" y="${ty}" font-family="'DM Mono',monospace" font-size="11" fill="${c.text}">${name}</text>
|
||||
</g>`);
|
||||
});
|
||||
|
||||
return `<svg class="lineage-svg" viewBox="0 0 ${W} ${H}" xmlns="http://www.w3.org/2000/svg" style="width:100%;height:auto;min-width:${Math.ceil(W)}px">${parts.join('\n')}</svg>`;
|
||||
}
|
||||
|
||||
// ── Inject SVG ────────────────────────────────────────────────────────────
|
||||
document.getElementById('svg-wrap').innerHTML = buildSVG();
|
||||
|
||||
// ── Tooltip logic ─────────────────────────────────────────────────────────
|
||||
const tooltip = document.getElementById('tooltip');
|
||||
const ttBox = document.getElementById('tt-box');
|
||||
const PREVIEW_COUNT = 4;
|
||||
|
||||
function showTooltip(model, x, y) {
|
||||
const s = getSchema(model);
|
||||
const preview = s.columns.slice(0, PREVIEW_COUNT);
|
||||
const extra = s.columns.length - PREVIEW_COUNT;
|
||||
const layerClass = s.layer;
|
||||
|
||||
ttBox.innerHTML = `
|
||||
<div class="tt-header">
|
||||
<span class="tt-name">${model}</span>
|
||||
<span class="tt-layer ${layerClass}">${s.layer}</span>
|
||||
</div>
|
||||
${preview.map(c => `
|
||||
<div class="tt-row">
|
||||
<span class="tt-col">${c.name}</span>
|
||||
<span class="tt-type">${c.type}</span>
|
||||
</div>
|
||||
`).join('')}
|
||||
${extra > 0 ? `<div class="tt-more">+${extra} more column${extra===1?'':'s'} — click to view all</div>` : ''}
|
||||
${extra <= 0 && s.columns.length > 0 ? `<div class="tt-more" style="color:#94A3B8;font-style:italic">click to inspect</div>` : ''}
|
||||
`;
|
||||
|
||||
// Position: prefer right of cursor, flip if near right edge
|
||||
const W = tooltip.offsetWidth || 260, H = tooltip.offsetHeight || 120;
|
||||
const vw = window.innerWidth, vh = window.innerHeight;
|
||||
let left = x + 14, top = y - 10;
|
||||
if (left + W > vw - 12) left = x - W - 14;
|
||||
if (top + H > vh - 12) top = vh - H - 12;
|
||||
tooltip.style.left = left + 'px';
|
||||
tooltip.style.top = top + 'px';
|
||||
tooltip.classList.add('visible');
|
||||
}
|
||||
|
||||
function hideTooltip() {
|
||||
tooltip.classList.remove('visible');
|
||||
}
|
||||
|
||||
// ── Panel logic ───────────────────────────────────────────────────────────
|
||||
const panel = document.getElementById('detail-panel');
|
||||
const canvasWrap = document.getElementById('canvas-wrap');
|
||||
const panelModelName = document.getElementById('panel-model-name');
|
||||
const panelLayerChip = document.getElementById('panel-layer-chip');
|
||||
const panelMat = document.getElementById('panel-materialization');
|
||||
const panelBody = document.getElementById('panel-body');
|
||||
|
||||
let activeModel = null;
|
||||
|
||||
function fmt(n) {
|
||||
return n == null ? '—' : n.toLocaleString();
|
||||
}
|
||||
|
||||
function openPanel(model) {
|
||||
activeModel = model;
|
||||
const s = getSchema(model);
|
||||
panelModelName.textContent = model;
|
||||
panelLayerChip.textContent = s.layer;
|
||||
panelLayerChip.className = 'meta-chip ' + s.layer;
|
||||
panelMat.textContent = s.materialization;
|
||||
|
||||
const ups = DAG[model] || [];
|
||||
const downs = DOWNSTREAM[model] || [];
|
||||
|
||||
panelBody.innerHTML = `
|
||||
<div class="panel-section">
|
||||
<div class="section-label">Row count</div>
|
||||
<div class="stat-row">
|
||||
<span class="stat-val">${fmt(s.rows)}</span>
|
||||
${s.rows != null ? '<span class="stat-unit">rows</span>' : ''}
|
||||
</div>
|
||||
${s.rows == null ? '<div class="empty-state" style="margin-top:2px">staging views have no row count</div>' : ''}
|
||||
</div>
|
||||
|
||||
<div class="panel-section">
|
||||
<div class="section-label">Schema · ${s.columns.length} columns</div>
|
||||
<table class="schema-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>column</th>
|
||||
<th>type</th>
|
||||
<th style="text-align:right">nullable</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
${s.columns.map(c => `
|
||||
<tr>
|
||||
<td class="col-name">${c.name}</td>
|
||||
<td class="col-type">${c.type}</td>
|
||||
<td class="col-null ${c.nullable?'yes':''}">${c.nullable?'null':'—'}</td>
|
||||
</tr>
|
||||
`).join('')}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="panel-section">
|
||||
<div class="section-label">Upstream · ${ups.length}</div>
|
||||
${ups.length ? `<div class="dep-list">${ups.map(d => `
|
||||
<div class="dep-item" data-model="${d}" onclick="openPanel('${d}')">
|
||||
<span class="dep-dot ${classifyLayer(d)}"></span>${d}
|
||||
</div>
|
||||
`).join('')}</div>` : '<div class="empty-state">no upstream dependencies</div>'}
|
||||
</div>
|
||||
|
||||
<div class="panel-section">
|
||||
<div class="section-label">Downstream · ${downs.length}</div>
|
||||
${downs.length ? `<div class="dep-list">${downs.map(d => `
|
||||
<div class="dep-item" data-model="${d}" onclick="openPanel('${d}')">
|
||||
<span class="dep-dot ${classifyLayer(d)}"></span>${d}
|
||||
</div>
|
||||
`).join('')}</div>` : '<div class="empty-state">nothing depends on this model</div>'}
|
||||
</div>
|
||||
`;
|
||||
|
||||
panel.classList.add('open');
|
||||
canvasWrap.classList.add('panel-open');
|
||||
|
||||
// Highlight selected node
|
||||
document.querySelectorAll('.lineage-node').forEach(n => {
|
||||
n.classList.toggle('selected', n.dataset.model === model);
|
||||
});
|
||||
}
|
||||
|
||||
function closePanel() {
|
||||
panel.classList.remove('open');
|
||||
canvasWrap.classList.remove('panel-open');
|
||||
document.querySelectorAll('.lineage-node').forEach(n => n.classList.remove('selected'));
|
||||
activeModel = null;
|
||||
}
|
||||
|
||||
document.getElementById('panel-close').addEventListener('click', closePanel);
|
||||
|
||||
// ── Wire up SVG nodes ─────────────────────────────────────────────────────
|
||||
const svg = document.querySelector('.lineage-svg');
|
||||
const nodes = svg.querySelectorAll('.lineage-node');
|
||||
const edges = svg.querySelectorAll('.lineage-edge');
|
||||
|
||||
nodes.forEach(g => {
|
||||
const model = g.dataset.model;
|
||||
|
||||
g.addEventListener('mouseenter', e => {
|
||||
// Highlight edges
|
||||
edges.forEach(edge => {
|
||||
if (edge.dataset.from === model || edge.dataset.to === model) {
|
||||
edge.classList.add('hi');
|
||||
edge.classList.remove('dim');
|
||||
edge.setAttribute('marker-end', 'url(#arr-hi)');
|
||||
} else {
|
||||
edge.classList.add('dim');
|
||||
edge.classList.remove('hi');
|
||||
}
|
||||
});
|
||||
showTooltip(model, e.clientX, e.clientY);
|
||||
});
|
||||
|
||||
g.addEventListener('mousemove', e => {
|
||||
showTooltip(model, e.clientX, e.clientY);
|
||||
});
|
||||
|
||||
g.addEventListener('mouseleave', () => {
|
||||
edges.forEach(e => {
|
||||
e.classList.remove('hi', 'dim');
|
||||
e.setAttribute('marker-end', 'url(#arr)');
|
||||
});
|
||||
hideTooltip();
|
||||
});
|
||||
|
||||
g.addEventListener('click', () => {
|
||||
hideTooltip();
|
||||
if (activeModel === model) {
|
||||
closePanel();
|
||||
} else {
|
||||
openPanel(model);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Close panel on Escape
|
||||
document.addEventListener('keydown', e => {
|
||||
if (e.key === 'Escape') closePanel();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -192,9 +192,9 @@ def run_workflow(conn, workflow: dict) -> None:
|
||||
entry_fn = getattr(module, entry_name)
|
||||
entry_fn()
|
||||
logger.info("Workflow %s completed successfully", workflow["name"])
|
||||
except Exception:
|
||||
except Exception as exc:
|
||||
logger.exception("Workflow %s failed", workflow["name"])
|
||||
send_alert(f"Workflow '{workflow['name']}' failed")
|
||||
send_alert(f"[extract] {type(exc).__name__}: {str(exc)[:100]}")
|
||||
raise
|
||||
|
||||
|
||||
@@ -233,8 +233,8 @@ def run_due_workflows(conn, workflows: list[dict]) -> bool:
|
||||
# Transform + Export + Deploy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_shell(cmd: str, timeout_seconds: int = SUBPROCESS_TIMEOUT_SECONDS) -> bool:
|
||||
"""Run a shell command. Returns True on success."""
|
||||
def run_shell(cmd: str, timeout_seconds: int = SUBPROCESS_TIMEOUT_SECONDS) -> tuple[bool, str]:
|
||||
"""Run a shell command. Returns (success, error_snippet)."""
|
||||
logger.info("Shell: %s", cmd)
|
||||
result = subprocess.run(
|
||||
cmd, shell=True, capture_output=True, text=True, timeout=timeout_seconds
|
||||
@@ -242,29 +242,31 @@ def run_shell(cmd: str, timeout_seconds: int = SUBPROCESS_TIMEOUT_SECONDS) -> bo
|
||||
if result.returncode != 0:
|
||||
logger.error("Shell failed (rc=%d): %s\nstdout: %s\nstderr: %s",
|
||||
result.returncode, cmd, result.stdout[-500:], result.stderr[-500:])
|
||||
return False
|
||||
return True
|
||||
raw = (result.stderr or result.stdout).strip()
|
||||
snippet = next((ln.strip() for ln in raw.splitlines() if ln.strip()), raw)[:120]
|
||||
return False, snippet
|
||||
return True, ""
|
||||
|
||||
|
||||
def run_transform() -> None:
|
||||
"""Run SQLMesh — it evaluates model staleness internally."""
|
||||
logger.info("Running SQLMesh transform")
|
||||
ok = run_shell(
|
||||
f"uv run sqlmesh -p transform/sqlmesh_padelnomics run",
|
||||
ok, err = run_shell(
|
||||
"uv run sqlmesh -p transform/sqlmesh_padelnomics plan prod --auto-apply",
|
||||
)
|
||||
if not ok:
|
||||
send_alert("SQLMesh transform failed")
|
||||
send_alert(f"[transform] {err}")
|
||||
|
||||
|
||||
def run_export() -> None:
|
||||
"""Export serving tables to analytics.duckdb."""
|
||||
logger.info("Exporting serving tables")
|
||||
ok = run_shell(
|
||||
ok, err = run_shell(
|
||||
f"DUCKDB_PATH={DUCKDB_PATH} SERVING_DUCKDB_PATH={SERVING_DUCKDB_PATH} "
|
||||
f"uv run python src/padelnomics/export_serving.py"
|
||||
)
|
||||
if not ok:
|
||||
send_alert("Serving export failed")
|
||||
send_alert(f"[export] {err}")
|
||||
|
||||
|
||||
def web_code_changed() -> bool:
|
||||
@@ -317,6 +319,7 @@ def git_pull_and_sync() -> None:
|
||||
|
||||
logger.info("New tag %s available (current: %s) — deploying", latest, current)
|
||||
run_shell(f"git checkout --detach {latest}")
|
||||
run_shell("sops --input-type dotenv --output-type dotenv -d .env.prod.sops > .env")
|
||||
run_shell("uv sync --all-packages")
|
||||
|
||||
|
||||
@@ -365,11 +368,11 @@ def tick() -> None:
|
||||
# Deploy web app if code changed
|
||||
if os.getenv("SUPERVISOR_GIT_PULL") and web_code_changed():
|
||||
logger.info("Web code changed — deploying")
|
||||
ok = run_shell("./deploy.sh")
|
||||
ok, err = run_shell("./deploy.sh")
|
||||
if ok:
|
||||
send_alert("Deploy succeeded")
|
||||
send_alert("[deploy] ok")
|
||||
else:
|
||||
send_alert("Deploy FAILED — check journalctl -u padelnomics-supervisor")
|
||||
send_alert(f"[deploy] failed: {err}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
@@ -386,9 +389,9 @@ def supervisor_loop() -> None:
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Supervisor stopped (KeyboardInterrupt)")
|
||||
break
|
||||
except Exception:
|
||||
except Exception as exc:
|
||||
logger.exception("Supervisor tick failed — backing off %ds", BACKOFF_SECONDS)
|
||||
send_alert("Supervisor tick failed")
|
||||
send_alert(f"[supervisor] {type(exc).__name__}: {str(exc)[:100]}")
|
||||
time.sleep(BACKOFF_SECONDS)
|
||||
else:
|
||||
time.sleep(TICK_INTERVAL_SECONDS)
|
||||
|
||||
@@ -34,6 +34,7 @@ SELECT
|
||||
v.tenant_id,
|
||||
v.country_code,
|
||||
v.city,
|
||||
v.city_slug,
|
||||
cc.active_court_count,
|
||||
ROUND(wh.hours_open_per_week, 1) AS hours_open_per_week,
|
||||
ROUND(wh.avg_hours_open_per_day, 1) AS avg_hours_open_per_day,
|
||||
@@ -42,6 +43,6 @@ SELECT
|
||||
ROUND(cc.active_court_count * wh.avg_hours_open_per_day, 1) AS capacity_court_hours_per_day,
|
||||
-- Total bookable court-hours per week
|
||||
ROUND(cc.active_court_count * wh.hours_open_per_week, 1) AS capacity_court_hours_per_week
|
||||
FROM staging.stg_playtomic_venues v
|
||||
FROM foundation.dim_venues v
|
||||
JOIN court_counts cc ON v.tenant_id = cc.tenant_id
|
||||
JOIN weekly_hours wh ON v.tenant_id = wh.tenant_id
|
||||
|
||||
@@ -98,6 +98,8 @@ SELECT
|
||||
court_count,
|
||||
indoor_court_count,
|
||||
outdoor_court_count,
|
||||
-- Conformed city key: enables deterministic joins to dim_cities / venue_pricing_benchmarks
|
||||
LOWER(REGEXP_REPLACE(LOWER(COALESCE(city, '')), '[^a-z0-9]+', '-')) AS city_slug,
|
||||
extracted_date
|
||||
FROM ranked
|
||||
QUALIFY ROW_NUMBER() OVER (
|
||||
|
||||
@@ -44,6 +44,7 @@ SELECT
|
||||
sa.tenant_id,
|
||||
cap.country_code,
|
||||
cap.city,
|
||||
cap.city_slug,
|
||||
cap.active_court_count,
|
||||
cap.capacity_court_hours_per_day,
|
||||
sa.available_slot_count,
|
||||
|
||||
@@ -57,7 +57,7 @@ WITH base AS (
|
||||
FROM foundation.dim_cities c
|
||||
LEFT JOIN serving.venue_pricing_benchmarks vpb
|
||||
ON c.country_code = vpb.country_code
|
||||
AND LOWER(TRIM(c.city_name)) = LOWER(TRIM(vpb.city))
|
||||
AND c.city_slug = vpb.city_slug
|
||||
WHERE c.padel_venue_count > 0
|
||||
),
|
||||
scored AS (
|
||||
|
||||
@@ -21,6 +21,7 @@ city_benchmarks AS (
|
||||
SELECT
|
||||
country_code,
|
||||
city,
|
||||
city_slug,
|
||||
median_peak_rate,
|
||||
median_offpeak_rate,
|
||||
median_occupancy_rate,
|
||||
@@ -128,7 +129,7 @@ SELECT
|
||||
FROM city_profiles cp
|
||||
LEFT JOIN city_benchmarks cb
|
||||
ON cp.country_code = cb.country_code
|
||||
AND LOWER(TRIM(cp.city_name)) = LOWER(TRIM(cb.city))
|
||||
AND cp.city_slug = cb.city_slug
|
||||
LEFT JOIN country_benchmarks ctb
|
||||
ON cp.country_code = ctb.country_code
|
||||
LEFT JOIN hardcoded_fallbacks hf
|
||||
|
||||
@@ -41,6 +41,6 @@ FROM serving.venue_pricing_benchmarks vpb
|
||||
-- Join city_market_profile to get the canonical city_slug and country metadata
|
||||
INNER JOIN serving.city_market_profile c
|
||||
ON vpb.country_code = c.country_code
|
||||
AND LOWER(TRIM(vpb.city)) = LOWER(TRIM(c.city_name))
|
||||
AND vpb.city_slug = c.city_slug
|
||||
-- Only cities with enough venues for meaningful pricing statistics
|
||||
WHERE vpb.venue_count >= 2
|
||||
|
||||
@@ -17,6 +17,7 @@ WITH venue_stats AS (
|
||||
da.tenant_id,
|
||||
da.country_code,
|
||||
da.city,
|
||||
da.city_slug,
|
||||
da.price_currency,
|
||||
AVG(da.occupancy_rate) AS avg_occupancy_rate,
|
||||
MEDIAN(da.median_price) AS median_hourly_rate,
|
||||
@@ -29,12 +30,13 @@ WITH venue_stats AS (
|
||||
WHERE TRY_CAST(da.snapshot_date AS DATE) >= CURRENT_DATE - INTERVAL '30 days'
|
||||
AND da.occupancy_rate IS NOT NULL
|
||||
AND da.occupancy_rate BETWEEN 0 AND 1.5
|
||||
GROUP BY da.tenant_id, da.country_code, da.city, da.price_currency
|
||||
GROUP BY da.tenant_id, da.country_code, da.city, da.city_slug, da.price_currency
|
||||
HAVING COUNT(DISTINCT da.snapshot_date) >= 3
|
||||
)
|
||||
SELECT
|
||||
country_code,
|
||||
city,
|
||||
city_slug,
|
||||
price_currency,
|
||||
COUNT(*) AS venue_count,
|
||||
-- Pricing benchmarks
|
||||
@@ -54,4 +56,4 @@ SELECT
|
||||
SUM(days_observed) AS total_venue_days_observed,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM venue_stats
|
||||
GROUP BY country_code, city, price_currency
|
||||
GROUP BY country_code, city, city_slug, price_currency
|
||||
|
||||
@@ -2,22 +2,14 @@
|
||||
-- One row per available 60-minute booking slot per court per venue per day.
|
||||
-- "Available" = the slot was NOT booked at capture time. Missing slots = booked.
|
||||
--
|
||||
-- Reads BOTH morning snapshots and recheck files:
|
||||
-- Morning (new): availability_{date}.jsonl.gz → snapshot_type = 'morning'
|
||||
-- Morning (old): availability_{date}.json.gz → snapshot_type = 'morning'
|
||||
-- Recheck (new): availability_{date}_recheck_{HH}.jsonl.gz → snapshot_type = 'recheck'
|
||||
-- Recheck (old): availability_{date}_recheck_{HH}.json.gz → snapshot_type = 'recheck'
|
||||
-- Reads morning snapshots and recheck files (JSONL format):
|
||||
-- Morning: availability_{date}.jsonl.gz → snapshot_type = 'morning'
|
||||
-- Recheck: availability_{date}_recheck_{HH}.jsonl.gz → snapshot_type = 'recheck'
|
||||
--
|
||||
-- Only 60-min duration slots are kept (canonical hourly rate + occupancy unit).
|
||||
-- Price parsed from strings like "14.56 EUR" or "48 GBP".
|
||||
--
|
||||
-- Supports two morning landing formats (UNION ALL during migration):
|
||||
-- New: availability_{date}.jsonl.gz — one venue per line, columns: tenant_id, slots, date, captured_at_utc
|
||||
-- Old: availability_{date}.json.gz — {"date":..., "venues": [...]} blob (UNNEST required)
|
||||
--
|
||||
-- Requires: at least one availability file in the landing zone.
|
||||
-- A seed file (data/landing/playtomic/1970/01/availability_1970-01-01.json.gz)
|
||||
-- with empty venues[] ensures this model runs before real data arrives.
|
||||
-- Source: data/landing/playtomic/{year}/{month}/availability_*.jsonl.gz
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_availability,
|
||||
@@ -27,7 +19,6 @@ MODEL (
|
||||
);
|
||||
|
||||
WITH
|
||||
-- New format: one venue per JSONL line — no outer UNNEST needed
|
||||
morning_jsonl AS (
|
||||
SELECT
|
||||
date AS snapshot_date,
|
||||
@@ -50,35 +41,6 @@ morning_jsonl AS (
|
||||
WHERE filename NOT LIKE '%_recheck_%'
|
||||
AND tenant_id IS NOT NULL
|
||||
),
|
||||
-- Old format: {"date":..., "venues": [...]} blob — kept for transition
|
||||
morning_blob AS (
|
||||
SELECT
|
||||
af.date AS snapshot_date,
|
||||
af.captured_at_utc,
|
||||
'morning' AS snapshot_type,
|
||||
NULL::INTEGER AS recheck_hour,
|
||||
venue_json ->> 'tenant_id' AS tenant_id,
|
||||
venue_json -> 'slots' AS slots_json
|
||||
FROM (
|
||||
SELECT date, captured_at_utc, venues
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/availability_*.json.gz',
|
||||
format = 'auto',
|
||||
columns = {
|
||||
date: 'VARCHAR',
|
||||
captured_at_utc: 'VARCHAR',
|
||||
venues: 'JSON[]'
|
||||
},
|
||||
filename = true,
|
||||
maximum_object_size = 134217728 -- 128 MB; daily files grow with venue count
|
||||
)
|
||||
WHERE filename NOT LIKE '%_recheck_%'
|
||||
AND venues IS NOT NULL
|
||||
AND json_array_length(venues) > 0
|
||||
) af,
|
||||
LATERAL UNNEST(af.venues) AS t(venue_json)
|
||||
),
|
||||
-- Recheck snapshots (new JSONL format — one venue per line)
|
||||
recheck_jsonl AS (
|
||||
SELECT
|
||||
date AS snapshot_date,
|
||||
@@ -101,43 +63,10 @@ recheck_jsonl AS (
|
||||
)
|
||||
WHERE tenant_id IS NOT NULL
|
||||
),
|
||||
-- Recheck snapshots (old blob format, kept for transition)
|
||||
recheck_blob AS (
|
||||
SELECT
|
||||
rf.date AS snapshot_date,
|
||||
rf.captured_at_utc,
|
||||
'recheck' AS snapshot_type,
|
||||
TRY_CAST(
|
||||
regexp_extract(rf.filename, '_recheck_(\d+)', 1) AS INTEGER
|
||||
) AS recheck_hour,
|
||||
venue_json ->> 'tenant_id' AS tenant_id,
|
||||
venue_json -> 'slots' AS slots_json
|
||||
FROM (
|
||||
SELECT date, captured_at_utc, venues, filename
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/availability_*_recheck_*.json.gz',
|
||||
format = 'auto',
|
||||
columns = {
|
||||
date: 'VARCHAR',
|
||||
captured_at_utc: 'VARCHAR',
|
||||
venues: 'JSON[]'
|
||||
},
|
||||
filename = true,
|
||||
maximum_object_size = 134217728 -- 128 MB; matches morning snapshot limit
|
||||
)
|
||||
WHERE venues IS NOT NULL
|
||||
AND json_array_length(venues) > 0
|
||||
) rf,
|
||||
LATERAL UNNEST(rf.venues) AS t(venue_json)
|
||||
),
|
||||
all_venues AS (
|
||||
SELECT * FROM morning_jsonl
|
||||
UNION ALL
|
||||
SELECT * FROM morning_blob
|
||||
UNION ALL
|
||||
SELECT * FROM recheck_jsonl
|
||||
UNION ALL
|
||||
SELECT * FROM recheck_blob
|
||||
),
|
||||
raw_resources AS (
|
||||
SELECT
|
||||
|
||||
@@ -5,11 +5,7 @@
|
||||
-- DuckDB auto-infers opening_hours as STRUCT, so we access each day by literal
|
||||
-- key (no dynamic access) and UNION ALL to unpivot.
|
||||
--
|
||||
-- Supports two landing formats (UNION ALL during migration):
|
||||
-- New: tenants.jsonl.gz — one tenant per line, opening_hours is a top-level JSON column
|
||||
-- Old: tenants.json.gz — {"tenants": [...]} blob (UNNEST required)
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/tenants.{jsonl,json}.gz
|
||||
-- Source: data/landing/playtomic/{year}/{month}/{day}/tenants.jsonl.gz
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_opening_hours,
|
||||
@@ -19,40 +15,18 @@ MODEL (
|
||||
);
|
||||
|
||||
WITH
|
||||
-- New format: one tenant per JSONL line
|
||||
jsonl_venues AS (
|
||||
venues AS (
|
||||
SELECT
|
||||
tenant_id,
|
||||
opening_hours AS oh
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.jsonl.gz',
|
||||
@LANDING_DIR || '/playtomic/*/*/*/tenants.jsonl.gz',
|
||||
format = 'newline_delimited',
|
||||
columns = {tenant_id: 'VARCHAR', opening_hours: 'JSON'}
|
||||
)
|
||||
WHERE tenant_id IS NOT NULL
|
||||
AND opening_hours IS NOT NULL
|
||||
),
|
||||
-- Old format: blob
|
||||
blob_venues AS (
|
||||
SELECT
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
tenant -> 'opening_hours' AS oh
|
||||
FROM (
|
||||
SELECT UNNEST(tenants) AS tenant
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto',
|
||||
maximum_object_size = 134217728
|
||||
)
|
||||
)
|
||||
WHERE (tenant ->> 'tenant_id') IS NOT NULL
|
||||
AND (tenant -> 'opening_hours') IS NOT NULL
|
||||
),
|
||||
venues AS (
|
||||
SELECT * FROM jsonl_venues
|
||||
UNION ALL
|
||||
SELECT * FROM blob_venues
|
||||
),
|
||||
-- Unpivot by UNION ALL — 7 literal key accesses
|
||||
unpivoted AS (
|
||||
SELECT tenant_id, 'MONDAY' AS day_of_week, 1 AS day_number,
|
||||
@@ -104,6 +78,4 @@ SELECT
|
||||
FROM unpivoted
|
||||
WHERE opening_time IS NOT NULL
|
||||
AND closing_time IS NOT NULL
|
||||
-- Enforce grain: if both old blob and new JSONL exist for the same month,
|
||||
-- the UNION ALL produces duplicate (tenant_id, day_of_week) pairs — deduplicate.
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY tenant_id, day_of_week ORDER BY tenant_id) = 1
|
||||
|
||||
@@ -2,11 +2,7 @@
|
||||
-- Reads resources array from the landing zone to extract court type, size,
|
||||
-- surface, and booking config.
|
||||
--
|
||||
-- Supports two landing formats (UNION ALL during migration):
|
||||
-- New: tenants.jsonl.gz — one tenant per line, resources is a top-level JSON column
|
||||
-- Old: tenants.json.gz — {"tenants": [...]} blob (double UNNEST: tenants → resources)
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/tenants.{jsonl,json}.gz
|
||||
-- Source: data/landing/playtomic/{year}/{month}/{day}/tenants.jsonl.gz
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_resources,
|
||||
@@ -16,41 +12,18 @@ MODEL (
|
||||
);
|
||||
|
||||
WITH
|
||||
-- New format: one tenant per JSONL line — single UNNEST for resources
|
||||
jsonl_unnested AS (
|
||||
unnested AS (
|
||||
SELECT
|
||||
tenant_id,
|
||||
UPPER(address ->> 'country_code') AS country_code,
|
||||
UNNEST(from_json(resources, '["JSON"]')) AS resource_json
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.jsonl.gz',
|
||||
@LANDING_DIR || '/playtomic/*/*/*/tenants.jsonl.gz',
|
||||
format = 'newline_delimited',
|
||||
columns = {tenant_id: 'VARCHAR', address: 'JSON', resources: 'JSON'}
|
||||
)
|
||||
WHERE tenant_id IS NOT NULL
|
||||
AND resources IS NOT NULL
|
||||
),
|
||||
-- Old format: blob — double UNNEST (tenants → resources)
|
||||
blob_unnested AS (
|
||||
SELECT
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
UPPER(tenant -> 'address' ->> 'country_code') AS country_code,
|
||||
UNNEST(from_json(tenant -> 'resources', '["JSON"]')) AS resource_json
|
||||
FROM (
|
||||
SELECT UNNEST(tenants) AS tenant
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto',
|
||||
maximum_object_size = 134217728
|
||||
)
|
||||
)
|
||||
WHERE (tenant ->> 'tenant_id') IS NOT NULL
|
||||
AND (tenant -> 'resources') IS NOT NULL
|
||||
),
|
||||
unnested AS (
|
||||
SELECT * FROM jsonl_unnested
|
||||
UNION ALL
|
||||
SELECT * FROM blob_unnested
|
||||
)
|
||||
SELECT
|
||||
tenant_id,
|
||||
@@ -68,6 +41,4 @@ SELECT
|
||||
FROM unnested
|
||||
WHERE (resource_json ->> 'resource_id') IS NOT NULL
|
||||
AND (resource_json ->> 'sport_id') = 'PADEL'
|
||||
-- Enforce grain: if both old blob and new JSONL exist for the same month,
|
||||
-- the UNION ALL produces duplicate (tenant_id, resource_id) pairs — deduplicate.
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY tenant_id, resource_json ->> 'resource_id' ORDER BY tenant_id) = 1
|
||||
|
||||
@@ -3,11 +3,7 @@
|
||||
-- including address, opening hours, court resources, VAT rate, and facilities.
|
||||
-- Deduplicates on tenant_id (keeps most recent extraction).
|
||||
--
|
||||
-- Supports two landing formats (UNION ALL during migration):
|
||||
-- New: tenants.jsonl.gz — one tenant JSON object per line (no UNNEST needed)
|
||||
-- Old: tenants.json.gz — {"tenants": [{...}]} blob (UNNEST required)
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/tenants.{jsonl,json}.gz
|
||||
-- Source: data/landing/playtomic/{year}/{month}/{day}/tenants.jsonl.gz
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_venues,
|
||||
@@ -17,8 +13,7 @@ MODEL (
|
||||
);
|
||||
|
||||
WITH
|
||||
-- New format: one tenant per JSONL line — no UNNEST, access columns directly
|
||||
jsonl_parsed AS (
|
||||
parsed AS (
|
||||
SELECT
|
||||
tenant_id,
|
||||
tenant_name,
|
||||
@@ -45,7 +40,7 @@ jsonl_parsed AS (
|
||||
filename AS source_file,
|
||||
CURRENT_DATE AS extracted_date
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.jsonl.gz',
|
||||
@LANDING_DIR || '/playtomic/*/*/*/tenants.jsonl.gz',
|
||||
format = 'newline_delimited',
|
||||
filename = true,
|
||||
columns = {
|
||||
@@ -59,49 +54,6 @@ jsonl_parsed AS (
|
||||
)
|
||||
WHERE tenant_id IS NOT NULL
|
||||
),
|
||||
-- Old format: {"tenants": [...]} blob — keep for transition until old files rotate out
|
||||
blob_parsed AS (
|
||||
SELECT
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
tenant ->> 'tenant_name' AS tenant_name,
|
||||
tenant ->> 'slug' AS slug,
|
||||
tenant ->> 'tenant_type' AS tenant_type,
|
||||
tenant ->> 'tenant_status' AS tenant_status,
|
||||
tenant ->> 'playtomic_status' AS playtomic_status,
|
||||
tenant ->> 'booking_type' AS booking_type,
|
||||
tenant -> 'address' ->> 'street' AS street,
|
||||
tenant -> 'address' ->> 'city' AS city,
|
||||
tenant -> 'address' ->> 'postal_code' AS postal_code,
|
||||
UPPER(tenant -> 'address' ->> 'country_code') AS country_code,
|
||||
tenant -> 'address' ->> 'timezone' AS timezone,
|
||||
tenant -> 'address' ->> 'administrative_area' AS administrative_area,
|
||||
TRY_CAST(tenant -> 'address' -> 'coordinate' ->> 'lat' AS DOUBLE) AS lat,
|
||||
TRY_CAST(tenant -> 'address' -> 'coordinate' ->> 'lon' AS DOUBLE) AS lon,
|
||||
TRY_CAST(tenant ->> 'vat_rate' AS DOUBLE) AS vat_rate,
|
||||
tenant ->> 'default_currency' AS default_currency,
|
||||
TRY_CAST(tenant -> 'booking_settings' ->> 'booking_ahead_limit' AS INTEGER) AS booking_ahead_limit_minutes,
|
||||
tenant -> 'opening_hours' AS opening_hours_json,
|
||||
tenant -> 'resources' AS resources_json,
|
||||
tenant ->> 'created_at' AS created_at,
|
||||
tenant ->> 'is_playtomic_partner' AS is_playtomic_partner_raw,
|
||||
filename AS source_file,
|
||||
CURRENT_DATE AS extracted_date
|
||||
FROM (
|
||||
SELECT UNNEST(tenants) AS tenant, filename
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto',
|
||||
filename = true,
|
||||
maximum_object_size = 134217728
|
||||
)
|
||||
)
|
||||
WHERE (tenant ->> 'tenant_id') IS NOT NULL
|
||||
),
|
||||
parsed AS (
|
||||
SELECT * FROM jsonl_parsed
|
||||
UNION ALL
|
||||
SELECT * FROM blob_parsed
|
||||
),
|
||||
deduped AS (
|
||||
SELECT *,
|
||||
ROW_NUMBER() OVER (PARTITION BY tenant_id ORDER BY source_file DESC) AS rn
|
||||
|
||||
@@ -2,12 +2,9 @@
|
||||
-- Used as a "racket sport culture" signal in the opportunity score:
|
||||
-- areas with high tennis court density are prime padel adoption markets.
|
||||
--
|
||||
-- Supports two landing formats (UNION ALL during migration):
|
||||
-- New: courts.jsonl.gz — one OSM element per line; nodes have lat/lon directly,
|
||||
-- ways/relations have center.lat/center.lon (Overpass out center)
|
||||
-- Old: courts.json.gz — {"elements": [...]} blob (UNNEST required)
|
||||
--
|
||||
-- Source: data/landing/overpass_tennis/{year}/{month}/courts.{jsonl,json}.gz
|
||||
-- Source: data/landing/overpass_tennis/{year}/{month}/courts.jsonl.gz
|
||||
-- Format: one OSM element per line; nodes have lat/lon directly,
|
||||
-- ways/relations have center.lat/center.lon (Overpass out center)
|
||||
|
||||
MODEL (
|
||||
name staging.stg_tennis_courts,
|
||||
@@ -17,8 +14,7 @@ MODEL (
|
||||
);
|
||||
|
||||
WITH
|
||||
-- New format: one OSM element per JSONL line
|
||||
jsonl_elements AS (
|
||||
parsed AS (
|
||||
SELECT
|
||||
type AS osm_type,
|
||||
TRY_CAST(id AS BIGINT) AS osm_id,
|
||||
@@ -47,33 +43,6 @@ jsonl_elements AS (
|
||||
)
|
||||
WHERE type IS NOT NULL
|
||||
),
|
||||
-- Old format: {"elements": [...]} blob — kept for transition
|
||||
blob_elements AS (
|
||||
SELECT
|
||||
elem ->> 'type' AS osm_type,
|
||||
(elem ->> 'id')::BIGINT AS osm_id,
|
||||
TRY_CAST(elem ->> 'lat' AS DOUBLE) AS lat,
|
||||
TRY_CAST(elem ->> 'lon' AS DOUBLE) AS lon,
|
||||
elem -> 'tags' ->> 'name' AS name,
|
||||
elem -> 'tags' ->> 'addr:country' AS country_code,
|
||||
elem -> 'tags' ->> 'addr:city' AS city_tag,
|
||||
filename AS source_file,
|
||||
CURRENT_DATE AS extracted_date
|
||||
FROM (
|
||||
SELECT UNNEST(elements) AS elem, filename
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/overpass_tennis/*/*/courts.json.gz',
|
||||
format = 'auto',
|
||||
filename = true
|
||||
)
|
||||
)
|
||||
WHERE (elem ->> 'type') IS NOT NULL
|
||||
),
|
||||
parsed AS (
|
||||
SELECT * FROM jsonl_elements
|
||||
UNION ALL
|
||||
SELECT * FROM blob_elements
|
||||
),
|
||||
deduped AS (
|
||||
SELECT *,
|
||||
ROW_NUMBER() OVER (PARTITION BY osm_id ORDER BY extracted_date DESC) AS rn
|
||||
|
||||
@@ -100,7 +100,7 @@ _DAG: dict[str, list[str]] = {
|
||||
"stg_regional_income", "stg_income_usa", "stg_padel_courts", "stg_tennis_courts",
|
||||
],
|
||||
"dim_venue_capacity": [
|
||||
"stg_playtomic_venues", "stg_playtomic_resources", "stg_playtomic_opening_hours",
|
||||
"dim_venues", "stg_playtomic_resources", "stg_playtomic_opening_hours",
|
||||
],
|
||||
"fct_availability_slot": ["stg_playtomic_availability"],
|
||||
"fct_daily_availability": ["fct_availability_slot", "dim_venue_capacity"],
|
||||
@@ -767,6 +767,12 @@ async def pipeline_trigger_extract():
|
||||
|
||||
# ── Lineage tab ───────────────────────────────────────────────────────────────
|
||||
|
||||
# Compute downstream map once at import time (DAG is static).
|
||||
_DOWNSTREAM: dict[str, list[str]] = {n: [] for n in _DAG}
|
||||
for _name, _deps in _DAG.items():
|
||||
for _dep in _deps:
|
||||
_DOWNSTREAM.setdefault(_dep, []).append(_name)
|
||||
|
||||
|
||||
@bp.route("/lineage")
|
||||
@role_required("admin")
|
||||
@@ -780,6 +786,67 @@ async def pipeline_lineage():
|
||||
)
|
||||
|
||||
|
||||
@bp.route("/lineage/schema/<model>")
|
||||
@role_required("admin")
|
||||
async def pipeline_lineage_schema(model: str):
|
||||
"""JSON: schema details for a lineage node.
|
||||
|
||||
Returns columns + types from information_schema (serving models only —
|
||||
staging/foundation live in lakehouse.duckdb which the web app cannot open).
|
||||
Row count is included for serving models when the table exists.
|
||||
"""
|
||||
from quart import jsonify
|
||||
|
||||
from ..analytics import fetch_analytics
|
||||
|
||||
if model not in _DAG:
|
||||
return jsonify({"error": "unknown model"}), 404
|
||||
|
||||
layer = _classify_layer(model)
|
||||
upstream = _DAG[model]
|
||||
downstream = _DOWNSTREAM.get(model, [])
|
||||
|
||||
row_count = None
|
||||
columns: list[dict] = []
|
||||
|
||||
if layer == "serving":
|
||||
col_rows = await fetch_analytics(
|
||||
"""
|
||||
SELECT column_name, data_type, is_nullable
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'serving' AND table_name = ?
|
||||
ORDER BY ordinal_position
|
||||
""",
|
||||
[model],
|
||||
)
|
||||
columns = [
|
||||
{
|
||||
"name": r["column_name"],
|
||||
"type": r["data_type"],
|
||||
"nullable": r["is_nullable"] == "YES",
|
||||
}
|
||||
for r in col_rows
|
||||
]
|
||||
if columns:
|
||||
# model is validated against _DAG keys — safe to interpolate
|
||||
count_rows = await fetch_analytics(
|
||||
f"SELECT count(*) AS n FROM serving.{model}"
|
||||
)
|
||||
if count_rows:
|
||||
row_count = count_rows[0]["n"]
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"model": model,
|
||||
"layer": layer,
|
||||
"upstream": upstream,
|
||||
"downstream": downstream,
|
||||
"row_count": row_count,
|
||||
"columns": columns,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
# ── Catalog tab ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -881,7 +881,7 @@ async def marketplace_activity():
|
||||
FROM lead_forwards lf
|
||||
JOIN suppliers s ON s.id = lf.supplier_id
|
||||
UNION ALL
|
||||
SELECT 'credit' as event_type, id as ref_id, supplier_id as ref2_id,
|
||||
SELECT 'credit' as event_type, cl.id as ref_id, cl.supplier_id as ref2_id,
|
||||
s.name as actor, cl.event_type as detail,
|
||||
CAST(cl.delta AS TEXT) as extra, cl.created_at
|
||||
FROM credit_ledger cl
|
||||
|
||||
@@ -156,7 +156,7 @@
|
||||
|
||||
<a href="{{ url_for('pipeline.pipeline_dashboard') }}" class="{% if active_section == 'pipeline' %}active{% endif %}">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M20.25 6.375c0 2.278-3.694 4.125-8.25 4.125S3.75 8.653 3.75 6.375m16.5 0c0-2.278-3.694-4.125-8.25-4.125S3.75 4.097 3.75 6.375m16.5 0v11.25c0 2.278-3.694 4.125-8.25 4.125s-8.25-1.847-8.25-4.125V6.375m16.5 0v3.75m-16.5-3.75v3.75m16.5 0v3.75C20.25 16.153 16.556 18 12 18s-8.25-1.847-8.25-4.125v-3.75m16.5 0c0 2.278-3.694 4.125-8.25 4.125s-8.25-1.847-8.25-4.125"/></svg>
|
||||
Pipeline
|
||||
Data Platform
|
||||
</a>
|
||||
|
||||
<div class="admin-sidebar__divider"></div>
|
||||
|
||||
@@ -5,45 +5,296 @@
|
||||
<span class="text-xs font-normal text-slate ml-2">
|
||||
{{ node_count }} models — staging → foundation → serving
|
||||
</span>
|
||||
<span class="text-xs font-normal text-slate" style="margin-left:auto">
|
||||
hover to preview · click to inspect
|
||||
</span>
|
||||
</p>
|
||||
<div style="overflow-x:auto;padding:1rem 0.5rem 0.5rem">
|
||||
{{ lineage_svg | safe }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Detail panel: fixed right, slides in on node click -->
|
||||
<div id="ln-panel" style="
|
||||
position:fixed;top:0;right:0;bottom:0;width:320px;
|
||||
background:#fff;border-left:1px solid #E2E8F0;
|
||||
display:flex;flex-direction:column;
|
||||
transform:translateX(100%);
|
||||
transition:transform 0.2s cubic-bezier(0.4,0,0.2,1);
|
||||
z-index:200;overflow:hidden;
|
||||
box-shadow:-4px 0 24px rgba(0,0,0,0.06);
|
||||
">
|
||||
<div style="padding:0.75rem 1rem;border-bottom:1px solid #F1F5F9;flex-shrink:0">
|
||||
<div style="display:flex;align-items:center;gap:0.5rem;margin-bottom:0.375rem">
|
||||
<span id="ln-model-name" style="
|
||||
font-family:'Commit Mono',ui-monospace,monospace;
|
||||
font-size:0.8125rem;font-weight:500;color:#1E293B;
|
||||
flex:1;min-width:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;
|
||||
">—</span>
|
||||
<button id="ln-close" style="
|
||||
background:none;border:none;cursor:pointer;color:#94A3B8;
|
||||
font-size:1rem;padding:2px 4px;border-radius:4px;line-height:1;
|
||||
transition:color 0.1s,background 0.1s;
|
||||
" title="Close (Esc)">✕</button>
|
||||
</div>
|
||||
<div style="display:flex;align-items:center;gap:0.5rem">
|
||||
<span id="ln-layer-chip" style="
|
||||
font-size:0.625rem;font-weight:700;letter-spacing:0.06em;
|
||||
text-transform:uppercase;padding:2px 7px;border-radius:99px;
|
||||
">—</span>
|
||||
<span id="ln-mat" style="font-size:0.6875rem;color:#64748B"></span>
|
||||
</div>
|
||||
</div>
|
||||
<div id="ln-body" style="flex:1;overflow-y:auto"></div>
|
||||
</div>
|
||||
|
||||
<!-- Hover tooltip -->
|
||||
<div id="ln-tooltip" style="
|
||||
position:fixed;z-index:300;pointer-events:none;
|
||||
opacity:0;transition:opacity 0.08s;
|
||||
filter:drop-shadow(0 4px 14px rgba(0,0,0,0.14));
|
||||
">
|
||||
<div id="ln-tt-inner" style="
|
||||
background:#0F172A;color:#fff;border-radius:8px;
|
||||
padding:10px 12px;min-width:190px;max-width:250px;
|
||||
"></div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.lineage-node { cursor: default; }
|
||||
.lineage-node rect:first-of-type { transition: filter 0.12s; }
|
||||
.lineage-node:hover rect:first-of-type { filter: brightness(0.94); }
|
||||
.lineage-node { cursor: pointer; }
|
||||
.lineage-node rect:first-of-type { transition: filter 0.1s; }
|
||||
.lineage-node:hover rect:first-of-type { filter: brightness(0.92); }
|
||||
.lineage-node.ln-selected rect:first-of-type { filter: brightness(0.86) !important; }
|
||||
.lineage-edge { transition: stroke 0.12s, stroke-width 0.12s, opacity 0.12s; }
|
||||
.lineage-edge.hi { stroke: #1D4ED8 !important; stroke-width: 2 !important; marker-end: url(#arr-hi) !important; }
|
||||
.lineage-edge.dim { opacity: 0.12; }
|
||||
.lineage-edge.dim { opacity: 0.1; }
|
||||
|
||||
.ln-section { border-bottom: 1px solid #F1F5F9; padding: 0.75rem 1rem; }
|
||||
.ln-section:last-child { border-bottom: none; }
|
||||
.ln-label {
|
||||
font-size: 0.5875rem; font-weight: 700; letter-spacing: 0.08em;
|
||||
text-transform: uppercase; color: #94A3B8; margin-bottom: 0.5rem;
|
||||
}
|
||||
.ln-schema-table { width: 100%; border-collapse: collapse; font-size: 0.6875rem; }
|
||||
.ln-schema-table th {
|
||||
text-align: left; font-weight: 600; color: #64748B;
|
||||
padding: 0 0 0.375rem; font-size: 0.5875rem; letter-spacing: 0.04em; text-transform: uppercase;
|
||||
}
|
||||
.ln-schema-table td { padding: 3px 4px 3px 0; vertical-align: middle; }
|
||||
.ln-schema-table tr + tr td { border-top: 1px solid #F1F5F9; }
|
||||
.ln-col-name { font-family: 'Commit Mono', ui-monospace, monospace; color: #1E293B; font-weight: 500; }
|
||||
.ln-col-type { font-family: 'Commit Mono', ui-monospace, monospace; color: #94A3B8; font-size: 0.625rem; }
|
||||
.ln-col-null { font-size: 0.5625rem; color: #CBD5E1; text-align: right; }
|
||||
.ln-col-null.yes { color: #D97706; }
|
||||
|
||||
.ln-dep-item {
|
||||
display: flex; align-items: center; gap: 6px; padding: 4px 6px;
|
||||
border-radius: 5px; cursor: pointer;
|
||||
font-family: 'Commit Mono', ui-monospace, monospace;
|
||||
font-size: 0.6875rem; color: #334155; transition: background 0.1s;
|
||||
}
|
||||
.ln-dep-item:hover { background: #F8FAFC; }
|
||||
.ln-dep-dot { width: 6px; height: 6px; border-radius: 50%; flex-shrink: 0; }
|
||||
.ln-dep-dot.staging { background: #16A34A; }
|
||||
.ln-dep-dot.foundation { background: #1D4ED8; }
|
||||
.ln-dep-dot.serving { background: #D97706; }
|
||||
|
||||
.ln-chip-staging { background: #DCFCE7; color: #14532D; border: 1px solid #BBF7D0; }
|
||||
.ln-chip-foundation { background: #DBEAFE; color: #1E3A8A; border: 1px solid #BFDBFE; }
|
||||
.ln-chip-serving { background: #FEF3C7; color: #78350F; border: 1px solid #FDE68A; }
|
||||
</style>
|
||||
|
||||
<script>
|
||||
(function () {
|
||||
var SCHEMA_BASE = "{{ url_for('pipeline.pipeline_lineage_schema', model='MODEL') }}".replace('/MODEL', '/');
|
||||
var svg = document.querySelector('.lineage-svg');
|
||||
if (!svg) return;
|
||||
var nodes = svg.querySelectorAll('.lineage-node');
|
||||
var edges = svg.querySelectorAll('.lineage-edge');
|
||||
var panel = document.getElementById('ln-panel');
|
||||
var panelBody = document.getElementById('ln-body');
|
||||
var tooltip = document.getElementById('ln-tooltip');
|
||||
var ttInner = document.getElementById('ln-tt-inner');
|
||||
var activeModel = null;
|
||||
var cache = {};
|
||||
|
||||
// ── Helpers ────────────────────────────────────────────────────────────
|
||||
function layer(model) {
|
||||
if (model.startsWith('stg_')) return 'staging';
|
||||
if (model.startsWith('dim_') || model.startsWith('fct_')) return 'foundation';
|
||||
return 'serving';
|
||||
}
|
||||
|
||||
function fmt(n) {
|
||||
return n == null ? '—' : Number(n).toLocaleString();
|
||||
}
|
||||
|
||||
// ── Edge highlight ─────────────────────────────────────────────────────
|
||||
function highlightEdges(model) {
|
||||
edges.forEach(function (e) {
|
||||
if (e.dataset.from === model || e.dataset.to === model) {
|
||||
e.classList.add('hi'); e.classList.remove('dim');
|
||||
} else {
|
||||
e.classList.add('dim'); e.classList.remove('hi');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function clearEdges() {
|
||||
edges.forEach(function (e) { e.classList.remove('hi', 'dim'); });
|
||||
}
|
||||
|
||||
// ── Schema fetch (cached) ──────────────────────────────────────────────
|
||||
function fetchSchema(model, cb) {
|
||||
if (cache[model]) { cb(cache[model]); return; }
|
||||
fetch(SCHEMA_BASE + encodeURIComponent(model))
|
||||
.then(function (r) { return r.json(); })
|
||||
.then(function (d) { cache[model] = d; cb(d); })
|
||||
.catch(function () {
|
||||
var fallback = { model: model, layer: layer(model), columns: [], upstream: [], downstream: [], row_count: null };
|
||||
cache[model] = fallback;
|
||||
cb(fallback);
|
||||
});
|
||||
}
|
||||
|
||||
// ── Tooltip ────────────────────────────────────────────────────────────
|
||||
function showTooltip(data, x, y) {
|
||||
var cols = data.columns || [];
|
||||
var preview = cols.slice(0, 4);
|
||||
var extra = cols.length - 4;
|
||||
var lc = layer(data.model);
|
||||
var badge = lc === 'staging'
|
||||
? 'background:rgba(22,163,74,0.25);color:#86EFAC'
|
||||
: lc === 'foundation'
|
||||
? 'background:rgba(29,78,216,0.3);color:#93C5FD'
|
||||
: 'background:rgba(217,119,6,0.25);color:#FCD34D';
|
||||
|
||||
ttInner.innerHTML =
|
||||
'<div style="display:flex;align-items:center;gap:6px;margin-bottom:8px;padding-bottom:7px;border-bottom:1px solid rgba(255,255,255,0.08)">' +
|
||||
'<span style="font-family:\'Commit Mono\',monospace;font-size:0.6875rem;font-weight:500;flex:1;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">' + data.model + '</span>' +
|
||||
'<span style="font-size:0.5625rem;font-weight:700;letter-spacing:0.07em;text-transform:uppercase;padding:2px 5px;border-radius:3px;flex-shrink:0;' + badge + '">' + lc + '</span>' +
|
||||
'</div>' +
|
||||
(cols.length === 0
|
||||
? '<div style="font-size:0.6875rem;color:#475569;font-style:italic">schema in lakehouse only</div>'
|
||||
: preview.map(function (c) {
|
||||
return '<div style="display:flex;align-items:baseline;gap:6px;padding:2px 0">' +
|
||||
'<span style="font-family:\'Commit Mono\',monospace;font-size:0.6875rem;color:#CBD5E1;flex:1;overflow:hidden;text-overflow:ellipsis">' + c.name + '</span>' +
|
||||
'<span style="font-family:\'Commit Mono\',monospace;font-size:0.625rem;color:#475569;flex-shrink:0">' + c.type + '</span>' +
|
||||
'</div>';
|
||||
}).join('') +
|
||||
(extra > 0
|
||||
? '<div style="font-size:0.625rem;color:#475569;margin-top:5px;padding-top:5px;border-top:1px solid rgba(255,255,255,0.06)">+' + extra + ' more — click to view all</div>'
|
||||
: '<div style="font-size:0.625rem;color:#334155;margin-top:5px;padding-top:5px;border-top:1px solid rgba(255,255,255,0.06);font-style:italic">click to inspect</div>')
|
||||
);
|
||||
|
||||
var vw = window.innerWidth, vh = window.innerHeight;
|
||||
var left = x + 14, top = y - 10;
|
||||
if (left + 252 > vw - 12) left = x - 252 - 14;
|
||||
if (top + 160 > vh - 12) top = vh - 160 - 12;
|
||||
tooltip.style.left = left + 'px';
|
||||
tooltip.style.top = top + 'px';
|
||||
tooltip.style.opacity = '1';
|
||||
}
|
||||
|
||||
function hideTooltip() { tooltip.style.opacity = '0'; }
|
||||
|
||||
// ── Panel ──────────────────────────────────────────────────────────────
|
||||
function depItems(list) {
|
||||
if (!list.length) return '<div style="font-size:0.6875rem;color:#94A3B8;font-style:italic">none</div>';
|
||||
return list.map(function (d) {
|
||||
return '<div class="ln-dep-item" data-model="' + d + '"><span class="ln-dep-dot ' + layer(d) + '"></span>' + d + '</div>';
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function renderPanel(data) {
|
||||
var cols = data.columns || [];
|
||||
var lc = data.layer || layer(data.model);
|
||||
|
||||
document.getElementById('ln-mat').textContent =
|
||||
cols.length > 0 ? 'table' : (lc === 'serving' ? '' : '');
|
||||
|
||||
panelBody.innerHTML =
|
||||
'<div class="ln-section">' +
|
||||
'<div class="ln-label">Row count</div>' +
|
||||
(data.row_count != null
|
||||
? '<div style="font-family:\'Commit Mono\',monospace;font-size:1rem;font-weight:500;color:#1E293B">' + fmt(data.row_count) +
|
||||
' <span style="font-size:0.6875rem;font-weight:400;color:#64748B">rows</span></div>'
|
||||
: '<div style="font-size:0.6875rem;color:#94A3B8;font-style:italic">' +
|
||||
(lc !== 'serving' ? 'staging/foundation — in lakehouse.duckdb' : 'not yet built') + '</div>') +
|
||||
'</div>' +
|
||||
|
||||
'<div class="ln-section">' +
|
||||
'<div class="ln-label">Schema · ' + cols.length + ' columns</div>' +
|
||||
(cols.length > 0
|
||||
? '<table class="ln-schema-table"><thead><tr><th>column</th><th>type</th><th style="text-align:right">null?</th></tr></thead><tbody>' +
|
||||
cols.map(function (c) {
|
||||
return '<tr><td class="ln-col-name">' + c.name + '</td><td class="ln-col-type">' + c.type + '</td>' +
|
||||
'<td class="ln-col-null' + (c.nullable ? ' yes' : '') + '">' + (c.nullable ? 'null' : '—') + '</td></tr>';
|
||||
}).join('') + '</tbody></table>'
|
||||
: '<div style="font-size:0.6875rem;color:#94A3B8;font-style:italic">schema available in lakehouse.duckdb only</div>') +
|
||||
'</div>' +
|
||||
|
||||
'<div class="ln-section"><div class="ln-label">Upstream · ' + (data.upstream || []).length + '</div>' + depItems(data.upstream || []) + '</div>' +
|
||||
'<div class="ln-section"><div class="ln-label">Downstream · ' + (data.downstream || []).length + '</div>' + depItems(data.downstream || []) + '</div>';
|
||||
|
||||
panelBody.querySelectorAll('.ln-dep-item').forEach(function (el) {
|
||||
el.addEventListener('click', function () { openPanel(el.dataset.model); });
|
||||
});
|
||||
}
|
||||
|
||||
function openPanel(model) {
|
||||
activeModel = model;
|
||||
document.getElementById('ln-model-name').textContent = model;
|
||||
var chip = document.getElementById('ln-layer-chip');
|
||||
var lc = layer(model);
|
||||
chip.textContent = lc;
|
||||
chip.className = 'ln-chip-' + lc;
|
||||
chip.setAttribute('style',
|
||||
'font-size:0.625rem;font-weight:700;letter-spacing:0.06em;text-transform:uppercase;padding:2px 7px;border-radius:99px;');
|
||||
document.getElementById('ln-mat').textContent = '';
|
||||
panelBody.innerHTML = '<div style="padding:2rem 1rem;font-size:0.75rem;color:#94A3B8">Loading\u2026</div>';
|
||||
panel.style.transform = 'translateX(0)';
|
||||
|
||||
nodes.forEach(function (n) { n.classList.toggle('ln-selected', n.dataset.model === model); });
|
||||
highlightEdges(model);
|
||||
|
||||
fetchSchema(model, renderPanel);
|
||||
}
|
||||
|
||||
function closePanel() {
|
||||
panel.style.transform = 'translateX(100%)';
|
||||
clearEdges();
|
||||
nodes.forEach(function (n) { n.classList.remove('ln-selected'); });
|
||||
activeModel = null;
|
||||
}
|
||||
|
||||
document.getElementById('ln-close').addEventListener('click', closePanel);
|
||||
document.addEventListener('keydown', function (e) { if (e.key === 'Escape') closePanel(); });
|
||||
|
||||
// ── Node wiring ────────────────────────────────────────────────────────
|
||||
nodes.forEach(function (g) {
|
||||
var model = g.dataset.model;
|
||||
g.addEventListener('mouseenter', function () {
|
||||
edges.forEach(function (e) {
|
||||
if (e.dataset.from === model || e.dataset.to === model) {
|
||||
e.classList.add('hi');
|
||||
e.classList.remove('dim');
|
||||
} else {
|
||||
e.classList.add('dim');
|
||||
e.classList.remove('hi');
|
||||
}
|
||||
});
|
||||
|
||||
g.addEventListener('mouseenter', function (e) {
|
||||
if (activeModel === null) highlightEdges(model);
|
||||
// prefetch so tooltip has data on arrive
|
||||
fetchSchema(model, function (data) { showTooltip(data, e.clientX, e.clientY); });
|
||||
});
|
||||
|
||||
g.addEventListener('mousemove', function (e) {
|
||||
if (cache[model]) showTooltip(cache[model], e.clientX, e.clientY);
|
||||
});
|
||||
|
||||
g.addEventListener('mouseleave', function () {
|
||||
edges.forEach(function (e) {
|
||||
e.classList.remove('hi', 'dim');
|
||||
});
|
||||
hideTooltip();
|
||||
if (activeModel === null) clearEdges();
|
||||
else highlightEdges(activeModel);
|
||||
});
|
||||
|
||||
g.addEventListener('click', function () {
|
||||
hideTooltip();
|
||||
if (activeModel === model) closePanel();
|
||||
else openPanel(model);
|
||||
});
|
||||
});
|
||||
})();
|
||||
|
||||
@@ -24,9 +24,11 @@ sup = _ilu.module_from_spec(_spec)
|
||||
_spec.loader.exec_module(sup)
|
||||
|
||||
from padelnomics_extract.proxy import ( # noqa: E402
|
||||
load_proxy_urls,
|
||||
fetch_webshare_proxies,
|
||||
load_proxy_tiers,
|
||||
make_round_robin_cycler,
|
||||
make_sticky_selector,
|
||||
make_tiered_cycler,
|
||||
)
|
||||
|
||||
# ── load_workflows ────────────────────────────────────────────────
|
||||
@@ -198,28 +200,112 @@ class TestTopologicalWaves:
|
||||
# ── proxy.py ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestLoadProxyUrls:
|
||||
def test_returns_empty_when_unset(self, monkeypatch):
|
||||
monkeypatch.delenv("PROXY_URLS", raising=False)
|
||||
assert load_proxy_urls() == []
|
||||
class TestFetchWebshareProxies:
|
||||
def test_parses_ip_port_user_pass_format(self):
|
||||
raw = "1.2.3.4:1080:user1:pass1\n5.6.7.8:1080:user2:pass2\n"
|
||||
with patch("urllib.request.urlopen") as mock_open:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = raw.encode("utf-8")
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
mock_open.return_value = mock_resp
|
||||
urls = fetch_webshare_proxies("http://example.com/proxy-list")
|
||||
assert urls == [
|
||||
"http://user1:pass1@1.2.3.4:1080",
|
||||
"http://user2:pass2@5.6.7.8:1080",
|
||||
]
|
||||
|
||||
def test_parses_comma_separated_urls(self, monkeypatch):
|
||||
monkeypatch.setenv(
|
||||
"PROXY_URLS",
|
||||
"http://p1:8080,http://p2:8080,http://p3:8080",
|
||||
)
|
||||
urls = load_proxy_urls()
|
||||
assert urls == ["http://p1:8080", "http://p2:8080", "http://p3:8080"]
|
||||
def test_network_error_returns_empty(self):
|
||||
import urllib.error
|
||||
with patch("urllib.request.urlopen", side_effect=urllib.error.URLError("timeout")):
|
||||
result = fetch_webshare_proxies("http://example.com/proxy-list")
|
||||
assert result == []
|
||||
|
||||
def test_strips_whitespace(self, monkeypatch):
|
||||
monkeypatch.setenv("PROXY_URLS", " http://p1:8080 , http://p2:8080 ")
|
||||
urls = load_proxy_urls()
|
||||
assert urls == ["http://p1:8080", "http://p2:8080"]
|
||||
def test_malformed_lines_are_skipped(self):
|
||||
raw = "bad_line\n1.2.3.4:1080:user:pass\nonly:three:parts\n"
|
||||
with patch("urllib.request.urlopen") as mock_open:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = raw.encode("utf-8")
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
mock_open.return_value = mock_resp
|
||||
urls = fetch_webshare_proxies("http://example.com/proxy-list")
|
||||
assert urls == ["http://user:pass@1.2.3.4:1080"]
|
||||
|
||||
def test_ignores_empty_segments(self, monkeypatch):
|
||||
monkeypatch.setenv("PROXY_URLS", "http://p1:8080,,http://p2:8080,")
|
||||
urls = load_proxy_urls()
|
||||
assert urls == ["http://p1:8080", "http://p2:8080"]
|
||||
def test_max_proxies_respected(self):
|
||||
lines = "\n".join(f"10.0.0.{i}:1080:u{i}:p{i}" for i in range(10))
|
||||
with patch("urllib.request.urlopen") as mock_open:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = lines.encode("utf-8")
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
mock_open.return_value = mock_resp
|
||||
urls = fetch_webshare_proxies("http://example.com/proxy-list", max_proxies=3)
|
||||
assert len(urls) == 3
|
||||
|
||||
def test_empty_lines_skipped(self):
|
||||
raw = "\n\n1.2.3.4:1080:user:pass\n\n"
|
||||
with patch("urllib.request.urlopen") as mock_open:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = raw.encode("utf-8")
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
mock_open.return_value = mock_resp
|
||||
urls = fetch_webshare_proxies("http://example.com/proxy-list")
|
||||
assert urls == ["http://user:pass@1.2.3.4:1080"]
|
||||
|
||||
|
||||
class TestLoadProxyTiers:
|
||||
def _clear_proxy_env(self, monkeypatch):
|
||||
for var in ("WEBSHARE_DOWNLOAD_URL", "PROXY_URLS_DATACENTER", "PROXY_URLS_RESIDENTIAL"):
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
|
||||
def test_returns_empty_when_all_unset(self, monkeypatch):
|
||||
self._clear_proxy_env(monkeypatch)
|
||||
assert load_proxy_tiers() == []
|
||||
|
||||
def test_single_datacenter_tier(self, monkeypatch):
|
||||
self._clear_proxy_env(monkeypatch)
|
||||
monkeypatch.setenv("PROXY_URLS_DATACENTER", "http://dc1:8080,http://dc2:8080")
|
||||
tiers = load_proxy_tiers()
|
||||
assert len(tiers) == 1
|
||||
assert tiers[0] == ["http://dc1:8080", "http://dc2:8080"]
|
||||
|
||||
def test_residential_only(self, monkeypatch):
|
||||
self._clear_proxy_env(monkeypatch)
|
||||
monkeypatch.setenv("PROXY_URLS_RESIDENTIAL", "http://res1:8080")
|
||||
tiers = load_proxy_tiers()
|
||||
assert len(tiers) == 1
|
||||
assert tiers[0] == ["http://res1:8080"]
|
||||
|
||||
def test_empty_tiers_skipped(self, monkeypatch):
|
||||
self._clear_proxy_env(monkeypatch)
|
||||
monkeypatch.setenv("PROXY_URLS_DATACENTER", "")
|
||||
monkeypatch.setenv("PROXY_URLS_RESIDENTIAL", "http://res1:8080")
|
||||
tiers = load_proxy_tiers()
|
||||
assert len(tiers) == 1
|
||||
assert tiers[0] == ["http://res1:8080"]
|
||||
|
||||
def test_three_tiers_correct_order(self, monkeypatch):
|
||||
self._clear_proxy_env(monkeypatch)
|
||||
with patch("padelnomics_extract.proxy.fetch_webshare_proxies", return_value=["http://user:pass@1.2.3.4:1080"]):
|
||||
monkeypatch.setenv("WEBSHARE_DOWNLOAD_URL", "http://example.com/list")
|
||||
monkeypatch.setenv("PROXY_URLS_DATACENTER", "http://dc1:8080")
|
||||
monkeypatch.setenv("PROXY_URLS_RESIDENTIAL", "http://res1:8080")
|
||||
tiers = load_proxy_tiers()
|
||||
assert len(tiers) == 3
|
||||
assert tiers[0] == ["http://user:pass@1.2.3.4:1080"] # free
|
||||
assert tiers[1] == ["http://dc1:8080"] # datacenter
|
||||
assert tiers[2] == ["http://res1:8080"] # residential
|
||||
|
||||
def test_webshare_fetch_failure_skips_tier(self, monkeypatch):
|
||||
self._clear_proxy_env(monkeypatch)
|
||||
with patch("padelnomics_extract.proxy.fetch_webshare_proxies", return_value=[]):
|
||||
monkeypatch.setenv("WEBSHARE_DOWNLOAD_URL", "http://example.com/list")
|
||||
monkeypatch.setenv("PROXY_URLS_DATACENTER", "http://dc1:8080")
|
||||
tiers = load_proxy_tiers()
|
||||
assert len(tiers) == 1
|
||||
assert tiers[0] == ["http://dc1:8080"]
|
||||
|
||||
|
||||
class TestRoundRobinCycler:
|
||||
@@ -279,3 +365,138 @@ class TestStickySelectorProxy:
|
||||
fn = make_sticky_selector(urls)
|
||||
for i in range(20):
|
||||
assert fn(f"key_{i}") in urls
|
||||
|
||||
|
||||
class TestTieredCyclerNTier:
|
||||
def test_starts_on_first_tier(self):
|
||||
tiers = [["http://t0a", "http://t0b"], ["http://t1a"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=3)
|
||||
assert cycler["active_tier_index"]() == 0
|
||||
assert not cycler["is_exhausted"]()
|
||||
assert cycler["next_proxy"]() in tiers[0]
|
||||
|
||||
def test_escalates_after_threshold(self):
|
||||
tiers = [["http://t0"], ["http://t1"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=3)
|
||||
# Two failures — stays on tier 0
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"]()
|
||||
assert cycler["active_tier_index"]() == 0
|
||||
# Third failure — escalates
|
||||
escalated = cycler["record_failure"]()
|
||||
assert escalated is True
|
||||
assert cycler["active_tier_index"]() == 1
|
||||
assert cycler["next_proxy"]() == "http://t1"
|
||||
|
||||
def test_escalates_through_all_tiers(self):
|
||||
tiers = [["http://t0"], ["http://t1"], ["http://t2"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=2)
|
||||
# Exhaust tier 0
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"]()
|
||||
assert cycler["active_tier_index"]() == 1
|
||||
# Exhaust tier 1
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"]()
|
||||
assert cycler["active_tier_index"]() == 2
|
||||
# Exhaust tier 2
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"]()
|
||||
assert cycler["is_exhausted"]()
|
||||
assert cycler["next_proxy"]() is None
|
||||
|
||||
def test_success_resets_counter(self):
|
||||
tiers = [["http://t0"], ["http://t1"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=3)
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"]()
|
||||
cycler["record_success"]()
|
||||
# Counter reset — need threshold more failures to escalate
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"]()
|
||||
assert cycler["active_tier_index"]() == 0 # still on tier 0
|
||||
cycler["record_failure"]()
|
||||
assert cycler["active_tier_index"]() == 1 # now escalated
|
||||
|
||||
def test_counter_resets_on_escalation(self):
|
||||
"""After escalating, failure counter resets so new tier gets a fresh start."""
|
||||
tiers = [["http://t0"], ["http://t1"], ["http://t2"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=2)
|
||||
# Exhaust tier 0
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"]()
|
||||
assert cycler["active_tier_index"]() == 1
|
||||
# One failure on tier 1 — should NOT escalate yet (counter reset)
|
||||
cycler["record_failure"]()
|
||||
assert cycler["active_tier_index"]() == 1
|
||||
# Second failure on tier 1 — escalates to tier 2
|
||||
cycler["record_failure"]()
|
||||
assert cycler["active_tier_index"]() == 2
|
||||
|
||||
def test_is_exhausted_false_when_tiers_remain(self):
|
||||
tiers = [["http://t0"], ["http://t1"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=1)
|
||||
assert not cycler["is_exhausted"]()
|
||||
cycler["record_failure"]() # escalates to tier 1
|
||||
assert not cycler["is_exhausted"]()
|
||||
|
||||
def test_is_exhausted_true_after_all_tiers_fail(self):
|
||||
tiers = [["http://t0"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=1)
|
||||
assert not cycler["is_exhausted"]()
|
||||
cycler["record_failure"]()
|
||||
assert cycler["is_exhausted"]()
|
||||
assert cycler["next_proxy"]() is None
|
||||
|
||||
def test_empty_tiers_immediately_exhausted(self):
|
||||
cycler = make_tiered_cycler([], threshold=3)
|
||||
assert cycler["is_exhausted"]()
|
||||
assert cycler["next_proxy"]() is None
|
||||
assert cycler["tier_count"]() == 0
|
||||
|
||||
def test_single_tier_cycles_within_tier(self):
|
||||
tiers = [["http://p1", "http://p2", "http://p3"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10)
|
||||
results = [cycler["next_proxy"]() for _ in range(6)]
|
||||
assert results == ["http://p1", "http://p2", "http://p3"] * 2
|
||||
|
||||
def test_tier_count_reflects_input(self):
|
||||
assert make_tiered_cycler([], threshold=1)["tier_count"]() == 0
|
||||
assert make_tiered_cycler([["a"]], threshold=1)["tier_count"]() == 1
|
||||
assert make_tiered_cycler([["a"], ["b"], ["c"]], threshold=1)["tier_count"]() == 3
|
||||
|
||||
def test_record_failure_noop_when_exhausted(self):
|
||||
tiers = [["http://t0"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=1)
|
||||
cycler["record_failure"]() # exhausts
|
||||
assert cycler["is_exhausted"]()
|
||||
# Further failures are no-ops, not exceptions
|
||||
result = cycler["record_failure"]()
|
||||
assert result is False
|
||||
assert cycler["is_exhausted"]()
|
||||
|
||||
def test_thread_safety(self):
|
||||
"""Concurrent next_proxy and record calls do not raise or corrupt state."""
|
||||
import threading
|
||||
tiers = [["http://t0a", "http://t0b"], ["http://t1a", "http://t1b"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=5)
|
||||
errors = []
|
||||
lock = threading.Lock()
|
||||
|
||||
def worker():
|
||||
try:
|
||||
for _ in range(20):
|
||||
cycler["next_proxy"]()
|
||||
cycler["record_failure"]()
|
||||
cycler["record_success"]()
|
||||
except Exception as e:
|
||||
with lock:
|
||||
errors.append(e)
|
||||
|
||||
threads = [threading.Thread(target=worker) for _ in range(8)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert errors == [], f"Thread safety errors: {errors}"
|
||||
|
||||
Reference in New Issue
Block a user