From d30ec9b66bc1305053cba0844099209f6b014d23 Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 20 Oct 2025 22:45:30 +0200 Subject: [PATCH] Add R2 upload support with landing bucket path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes 1. **Support ESC environment variable names** - Fallback to R2_ADMIN_ACCESS_KEY_ID if R2_ACCESS_KEY not set - Fallback to R2_ADMIN_SECRET_ACCESS_KEY if R2_SECRET_KEY not set - Allows script to work with Pulumi ESC (beanflows/prod) variables 2. **Use landing bucket path** - Changed R2 path from `psd/{etag}.zip` to `landing/psd/{etag}.zip` - All extracted data goes to landing bucket for consistent organization 3. **Updated Pulumi ESC environment** - Added R2_BUCKET=beanflows-data-prod - Fixed R2_ENDPOINT to remove bucket path (now just account URL) ## Testing - ✅ R2 upload works: Uploaded to landing/psd/316039e2612edc1_0.zip - ✅ R2 deduplication works: Skips upload if file exists - ✅ Local mode still works without credentials 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- extract/psdonline/src/psdonline/execute.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extract/psdonline/src/psdonline/execute.py b/extract/psdonline/src/psdonline/execute.py index 71fa273..5d7c978 100644 --- a/extract/psdonline/src/psdonline/execute.py +++ b/extract/psdonline/src/psdonline/execute.py @@ -24,8 +24,8 @@ logger.info(f"Output dir: {OUTPUT_DIR}") # R2 configuration from environment R2_ENDPOINT = os.getenv('R2_ENDPOINT') R2_BUCKET = os.getenv('R2_BUCKET') -R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY') -R2_SECRET_KEY = os.getenv('R2_SECRET_KEY') +R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY') or os.getenv('R2_ADMIN_ACCESS_KEY_ID') +R2_SECRET_KEY = os.getenv('R2_SECRET_KEY') or os.getenv('R2_ADMIN_SECRET_ACCESS_KEY') PSD_HISTORICAL_URL = "https://apps.fas.usda.gov/psdonline/downloads/archives/{year}/{month:02d}/psd_alldata_csv.zip" FIRST_YEAR = 2006 @@ -33,7 +33,7 @@ FIRST_MONTH = 8 def check_r2_file_exists(etag: str, s3_client) -> bool: """Check if file exists in R2.""" - r2_key = f"psd/{etag}.zip" + r2_key = f"landing/psd/{etag}.zip" try: s3_client.head_object(Bucket=R2_BUCKET, Key=r2_key) logger.info(f"File {r2_key} already exists in R2, skipping") @@ -46,7 +46,7 @@ def check_r2_file_exists(etag: str, s3_client) -> bool: def upload_to_r2(content: bytes, etag: str, s3_client): """Upload file content to R2.""" - r2_key = f"psd/{etag}.zip" + r2_key = f"landing/psd/{etag}.zip" logger.info(f"Uploading to R2: {r2_key}") s3_client.put_object(Bucket=R2_BUCKET, Key=r2_key, Body=content) logger.info("Upload complete")