more simplification
This commit is contained in:
@@ -35,6 +35,7 @@ def extract_psd_file(url:str, extract_to_path: pathlib.Path, http_session: nique
|
|||||||
return
|
return
|
||||||
etag = response.headers.get("etag").replace('"',"").replace(":","_")
|
etag = response.headers.get("etag").replace('"',"").replace(":","_")
|
||||||
if etag in extracted_etags:
|
if etag in extracted_etags:
|
||||||
|
logger.info("File already extracted, skipping download.")
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
response = http_session.get(url)
|
response = http_session.get(url)
|
||||||
@@ -49,13 +50,12 @@ def extract_psd_dataset():
|
|||||||
today = datetime.now()
|
today = datetime.now()
|
||||||
years = list(range(FIRST_YEAR, today.year+1))
|
years = list(range(FIRST_YEAR, today.year+1))
|
||||||
for year in years:
|
for year in years:
|
||||||
if year == years[0]:
|
months = list(range(1,13))
|
||||||
months = list(range(8, 13))
|
if year == FIRST_YEAR:
|
||||||
|
months = list(range(FIRST_MONTH, 13))
|
||||||
if year == years[-1]:
|
if year == years[-1]:
|
||||||
months = list(range(1, today.month+1))
|
months = list(range(1, today.month+1))
|
||||||
else:
|
logger.info(f"Year {year}, extracting months: {months}")
|
||||||
months = list(range(1,13))
|
|
||||||
|
|
||||||
for month in months:
|
for month in months:
|
||||||
url = PSD_HISTORICAL_URL.format(year=year, month=month)
|
url = PSD_HISTORICAL_URL.format(year=year, month=month)
|
||||||
target_dir = OUTPUT_DIR / f"{year}"/f"{month:02d}"
|
target_dir = OUTPUT_DIR / f"{year}"/f"{month:02d}"
|
||||||
|
|||||||
Reference in New Issue
Block a user