Add R2 upload support with landing bucket path

## Changes

1. **Support ESC environment variable names**
   - Fallback to R2_ADMIN_ACCESS_KEY_ID if R2_ACCESS_KEY not set
   - Fallback to R2_ADMIN_SECRET_ACCESS_KEY if R2_SECRET_KEY not set
   - Allows script to work with Pulumi ESC (beanflows/prod) variables

2. **Use landing bucket path**
   - Changed R2 path from `psd/{etag}.zip` to `landing/psd/{etag}.zip`
   - All extracted data goes to landing bucket for consistent organization

3. **Updated Pulumi ESC environment**
   - Added R2_BUCKET=beanflows-data-prod
   - Fixed R2_ENDPOINT to remove bucket path (now just account URL)

## Testing

-  R2 upload works: Uploaded to landing/psd/316039e2612edc1_0.zip
-  R2 deduplication works: Skips upload if file exists
-  Local mode still works without credentials

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Deeman
2025-10-20 22:45:30 +02:00
parent 57f2909001
commit d30ec9b66b

View File

@@ -24,8 +24,8 @@ logger.info(f"Output dir: {OUTPUT_DIR}")
# R2 configuration from environment
R2_ENDPOINT = os.getenv('R2_ENDPOINT')
R2_BUCKET = os.getenv('R2_BUCKET')
R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY')
R2_SECRET_KEY = os.getenv('R2_SECRET_KEY')
R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY') or os.getenv('R2_ADMIN_ACCESS_KEY_ID')
R2_SECRET_KEY = os.getenv('R2_SECRET_KEY') or os.getenv('R2_ADMIN_SECRET_ACCESS_KEY')
PSD_HISTORICAL_URL = "https://apps.fas.usda.gov/psdonline/downloads/archives/{year}/{month:02d}/psd_alldata_csv.zip"
FIRST_YEAR = 2006
@@ -33,7 +33,7 @@ FIRST_MONTH = 8
def check_r2_file_exists(etag: str, s3_client) -> bool:
"""Check if file exists in R2."""
r2_key = f"psd/{etag}.zip"
r2_key = f"landing/psd/{etag}.zip"
try:
s3_client.head_object(Bucket=R2_BUCKET, Key=r2_key)
logger.info(f"File {r2_key} already exists in R2, skipping")
@@ -46,7 +46,7 @@ def check_r2_file_exists(etag: str, s3_client) -> bool:
def upload_to_r2(content: bytes, etag: str, s3_client):
"""Upload file content to R2."""
r2_key = f"psd/{etag}.zip"
r2_key = f"landing/psd/{etag}.zip"
logger.info(f"Uploading to R2: {r2_key}")
s3_client.put_object(Bucket=R2_BUCKET, Key=r2_key, Body=content)
logger.info("Upload complete")