Add R2 upload support with landing bucket path
## Changes
1. **Support ESC environment variable names**
- Fallback to R2_ADMIN_ACCESS_KEY_ID if R2_ACCESS_KEY not set
- Fallback to R2_ADMIN_SECRET_ACCESS_KEY if R2_SECRET_KEY not set
- Allows script to work with Pulumi ESC (beanflows/prod) variables
2. **Use landing bucket path**
- Changed R2 path from `psd/{etag}.zip` to `landing/psd/{etag}.zip`
- All extracted data goes to landing bucket for consistent organization
3. **Updated Pulumi ESC environment**
- Added R2_BUCKET=beanflows-data-prod
- Fixed R2_ENDPOINT to remove bucket path (now just account URL)
## Testing
- ✅ R2 upload works: Uploaded to landing/psd/316039e2612edc1_0.zip
- ✅ R2 deduplication works: Skips upload if file exists
- ✅ Local mode still works without credentials
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -24,8 +24,8 @@ logger.info(f"Output dir: {OUTPUT_DIR}")
|
|||||||
# R2 configuration from environment
|
# R2 configuration from environment
|
||||||
R2_ENDPOINT = os.getenv('R2_ENDPOINT')
|
R2_ENDPOINT = os.getenv('R2_ENDPOINT')
|
||||||
R2_BUCKET = os.getenv('R2_BUCKET')
|
R2_BUCKET = os.getenv('R2_BUCKET')
|
||||||
R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY')
|
R2_ACCESS_KEY = os.getenv('R2_ACCESS_KEY') or os.getenv('R2_ADMIN_ACCESS_KEY_ID')
|
||||||
R2_SECRET_KEY = os.getenv('R2_SECRET_KEY')
|
R2_SECRET_KEY = os.getenv('R2_SECRET_KEY') or os.getenv('R2_ADMIN_SECRET_ACCESS_KEY')
|
||||||
|
|
||||||
PSD_HISTORICAL_URL = "https://apps.fas.usda.gov/psdonline/downloads/archives/{year}/{month:02d}/psd_alldata_csv.zip"
|
PSD_HISTORICAL_URL = "https://apps.fas.usda.gov/psdonline/downloads/archives/{year}/{month:02d}/psd_alldata_csv.zip"
|
||||||
FIRST_YEAR = 2006
|
FIRST_YEAR = 2006
|
||||||
@@ -33,7 +33,7 @@ FIRST_MONTH = 8
|
|||||||
|
|
||||||
def check_r2_file_exists(etag: str, s3_client) -> bool:
|
def check_r2_file_exists(etag: str, s3_client) -> bool:
|
||||||
"""Check if file exists in R2."""
|
"""Check if file exists in R2."""
|
||||||
r2_key = f"psd/{etag}.zip"
|
r2_key = f"landing/psd/{etag}.zip"
|
||||||
try:
|
try:
|
||||||
s3_client.head_object(Bucket=R2_BUCKET, Key=r2_key)
|
s3_client.head_object(Bucket=R2_BUCKET, Key=r2_key)
|
||||||
logger.info(f"File {r2_key} already exists in R2, skipping")
|
logger.info(f"File {r2_key} already exists in R2, skipping")
|
||||||
@@ -46,7 +46,7 @@ def check_r2_file_exists(etag: str, s3_client) -> bool:
|
|||||||
|
|
||||||
def upload_to_r2(content: bytes, etag: str, s3_client):
|
def upload_to_r2(content: bytes, etag: str, s3_client):
|
||||||
"""Upload file content to R2."""
|
"""Upload file content to R2."""
|
||||||
r2_key = f"psd/{etag}.zip"
|
r2_key = f"landing/psd/{etag}.zip"
|
||||||
logger.info(f"Uploading to R2: {r2_key}")
|
logger.info(f"Uploading to R2: {r2_key}")
|
||||||
s3_client.put_object(Bucket=R2_BUCKET, Key=r2_key, Body=content)
|
s3_client.put_object(Bucket=R2_BUCKET, Key=r2_key, Body=content)
|
||||||
logger.info("Upload complete")
|
logger.info("Upload complete")
|
||||||
|
|||||||
Reference in New Issue
Block a user