diff --git a/extract/psdonline/src/psdonline/execute.py b/extract/psdonline/src/psdonline/execute.py index 7425621..24ba294 100644 --- a/extract/psdonline/src/psdonline/execute.py +++ b/extract/psdonline/src/psdonline/execute.py @@ -35,6 +35,7 @@ def extract_psd_file(url:str, extract_to_path: pathlib.Path, http_session: nique return etag = response.headers.get("etag").replace('"',"").replace(":","_") if etag in extracted_etags: + logger.info("File already extracted, skipping download.") return else: response = http_session.get(url) @@ -49,13 +50,12 @@ def extract_psd_dataset(): today = datetime.now() years = list(range(FIRST_YEAR, today.year+1)) for year in years: - if year == years[0]: - months = list(range(8, 13)) + months = list(range(1,13)) + if year == FIRST_YEAR: + months = list(range(FIRST_MONTH, 13)) if year == years[-1]: months = list(range(1, today.month+1)) - else: - months = list(range(1,13)) - + logger.info(f"Year {year}, extracting months: {months}") for month in months: url = PSD_HISTORICAL_URL.format(year=year, month=month) target_dir = OUTPUT_DIR / f"{year}"/f"{month:02d}"