{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\ndf.rename(columns={\\n \\'Commodity_Description\\': \\'commodity\\',\\n \\'Country_Name\\': \\'country\\',\\n \\'Attribute_Description\\': \\'attribute\\',\\n \\'Value\\': \\'value\\',\\n \\'Market_Year\\': \\'market_year\\'\\n}, inplace=True)\\n\\ndf[\\'report_date\\'] = pd.to_datetime(df[\\'market_year\\'].astype(int).astype(str) + \\'-01-01\\')\\ndf = df.dropna(subset=[\\'commodity\\', \\'country\\', \\'attribute\\', \\'value\\'])\\n\\n# === 2. Create Pivot Tables Per Commodity (Silver Layer) ===\\nsilver_dir = \"silver_layer_pivots\"\\nos.makedirs(silver_dir, exist_ok=True)\\ncommodity_pivots = {}\\n\\nfor commodity, group in df.groupby(\\'commodity\\'):\\n pivot = group.pivot_table(\\n index=[\\'report_date\\', \\'country\\'],\\n columns=\\'attribute\\',\\n values=\\'value\\',\\n aggfunc=\\'sum\\',\\n fill_value=0 # fill missing combinations with 0\\n ).reset_index()\\n\\n # Save each silver-layer file\\n safe_name = commodity.replace(\" \", \"_\").replace(\"/\", \"_\").lower()\\n filepath = os.path.join(silver_dir, f\"{safe_name}.csv\")\\n pivot.to_csv(filepath, index=False)\\n commodity_pivots[commodity] = filepath\\n\\nprint(\"✅ Silver layer saved to \\'silver_layer_pivots/\\'\")\\n\\n# === 3. Create Gold Layer with Derived Metrics ===\\ngold_dir = \"gold_layer_pivots\"\\nos.makedirs(gold_dir, exist_ok=True)\\n\\nall_global_summaries = []\\n\\nfor commodity, filepath in commodity_pivots.items():\\n df = pd.read_csv(filepath)\\n\\n # Ensure numeric types\\n required_cols = [\\'Production\\', \\'Imports\\', \\'Exports\\', \\'Total Distribution\\', \\'Ending Stocks\\']\\n for col in required_cols:\\n if col not in df.columns:\\n df[col] = 0 # Fill missing columns with 0\\n else:\\n df[col] = pd.to_numeric(df[col], errors=\\'coerce\\').fillna(0)\\n\\n # Derived metrics per country\\n df[\\'Net Supply\\'] = df[\\'Production\\'] + df[\\'Imports\\'] - df[\\'Exports\\']\\n df[\\'Trade Balance\\'] = df[\\'Exports\\'] - df[\\'Imports\\']\\n df[\\'Supply-Demand Balance\\'] = df[\\'Net Supply\\'] - df[\\'Total Distribution\\']\\n df[\\'Stock-to-Use Ratio (%)\\'] = df[\\'Ending Stocks\\'] / df[\\'Total Distribution\\'].replace(0, pd.NA) * 100\\n\\n # Global aggregates\\n global_summary = df.groupby(\\'report_date\\').agg({\\n \\'Production\\': \\'sum\\',\\n \\'Imports\\': \\'sum\\',\\n \\'Exports\\': \\'sum\\',\\n \\'Total Distribution\\': \\'sum\\',\\n \\'Ending Stocks\\': \\'sum\\'\\n }).reset_index()\\n\\n global_summary[\\'Net Supply\\'] = global_summary[\\'Production\\'] + global_summary[\\'Imports\\'] - global_summary[\\'Exports\\']\\n global_summary[\\'Supply-Demand Balance\\'] = global_summary[\\'Net Supply\\'] - global_summary[\\'Total Distribution\\']\\n global_summary[\\'Stock-to-Use Ratio (%)\\'] = global_summary[\\'Ending Stocks\\'] / global_summary[\\'Total Distribution\\'].replace(0, pd.NA) * 100\\n global_summary[\\'country\\'] = \\'Global\\'\\n global_summary[\\'commodity\\'] = commodity\\n\\n all_global_summaries.append(global_summary)\\n\\n # Add YoY Production change\\n df.sort_values([\\'country\\', \\'report_date\\'], inplace=True)\\n df[\\'Production_YoY (%)\\'] = df.groupby(\\'country\\')[\\'Production\\'].transform(lambda x: x.pct_change() * 100)\\n\\n # Save gold-layer file\\n safe_name = commodity.replace(\" \", \"_\").replace(\"/\", \"_\").lower()\\n filepath = os.path.join(gold_dir, f\"{safe_name}.csv\")\\n df.to_csv(filepath, index=False)\\n\\nprint(\"🌟 Gold layer saved to \\'gold_layer_pivots/\\'\")\\n\\n# === 4. Combined Global Summary\\ncombined_global = pd.concat(all_global_summaries, ignore_index=True)\\ncombined_global = combined_global[[\\'commodity\\', \\'report_date\\', \\'Production\\', \\'Imports\\', \\'Exports\\',\\n \\'Total Distribution\\', \\'Ending Stocks\\', \\'Net Supply\\',\\n \\'Supply-Demand Balance\\', \\'Stock-to-Use Ratio (%)\\']]\\ncombined_global.to_csv(\"global_summary_all.csv\", index=False)\\nprint(\"🌐 Combined global summary saved as \\'global_summary_all.csv\\'\")\\n'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import os\n", "\n", "# === 1. Load and Clean the Dataset ===\n", "\n", "data = \"../data/\"\n", "df = pd.read_csv(\"../data/psd_alldata.csv\", encoding=\"latin1\")\n", "\n", "df.rename(columns={\n", " 'Commodity_Description': 'commodity',\n", " 'Country_Name': 'country',\n", " 'Attribute_Description': 'attribute',\n", " 'Value': 'value',\n", " 'Market_Year': 'market_year'\n", "}, inplace=True)\n", "\n", "df['report_date'] = pd.to_datetime(df['market_year'].astype(int).astype(str) + '-01-01')\n", "df = df.dropna(subset=['commodity', 'country', 'attribute', 'value'])\n", "\n", "# === 2. Create Pivot Tables Per Commodity (Silver Layer) ===\n", "silver_dir = \"silver_layer_pivots\"\n", "os.makedirs(silver_dir, exist_ok=True)\n", "commodity_pivots = {}\n", "\n", "for commodity, group in df.groupby('commodity'):\n", " pivot = group.pivot_table(\n", " index=['report_date', 'country'],\n", " columns='attribute',\n", " values='value',\n", " aggfunc='sum',\n", " fill_value=0 # fill missing combinations with 0\n", " ).reset_index()\n", "\n", " # Save each silver-layer file\n", " safe_name = commodity.replace(\" \", \"_\").replace(\"/\", \"_\").lower()\n", " filepath = os.path.join(silver_dir, f\"{safe_name}.csv\")\n", " pivot.to_csv(filepath, index=False)\n", " commodity_pivots[commodity] = filepath\n", "\n", "print(\"✅ Silver layer saved to 'silver_layer_pivots/'\")\n", "\n", "# === 3. Create Gold Layer with Derived Metrics ===\n", "gold_dir = \"gold_layer_pivots\"\n", "os.makedirs(gold_dir, exist_ok=True)\n", "\n", "all_global_summaries = []\n", "\n", "for commodity, filepath in commodity_pivots.items():\n", " df = pd.read_csv(filepath)\n", "\n", " # Ensure numeric types\n", " required_cols = ['Production', 'Imports', 'Exports', 'Total Distribution', 'Ending Stocks']\n", " for col in required_cols:\n", " if col not in df.columns:\n", " df[col] = 0 # Fill missing columns with 0\n", " else:\n", " df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)\n", "\n", " # Derived metrics per country\n", " df['Net Supply'] = df['Production'] + df['Imports'] - df['Exports']\n", " df['Trade Balance'] = df['Exports'] - df['Imports']\n", " df['Supply-Demand Balance'] = df['Net Supply'] - df['Total Distribution']\n", " df['Stock-to-Use Ratio (%)'] = df['Ending Stocks'] / df['Total Distribution'].replace(0, pd.NA) * 100\n", "\n", " # Global aggregates\n", " global_summary = df.groupby('report_date').agg({\n", " 'Production': 'sum',\n", " 'Imports': 'sum',\n", " 'Exports': 'sum',\n", " 'Total Distribution': 'sum',\n", " 'Ending Stocks': 'sum'\n", " }).reset_index()\n", "\n", " global_summary['Net Supply'] = global_summary['Production'] + global_summary['Imports'] - global_summary['Exports']\n", " global_summary['Supply-Demand Balance'] = global_summary['Net Supply'] - global_summary['Total Distribution']\n", " global_summary['Stock-to-Use Ratio (%)'] = global_summary['Ending Stocks'] / global_summary['Total Distribution'].replace(0, pd.NA) * 100\n", " global_summary['country'] = 'Global'\n", " global_summary['commodity'] = commodity\n", "\n", " all_global_summaries.append(global_summary)\n", "\n", " # Add YoY Production change\n", " df.sort_values(['country', 'report_date'], inplace=True)\n", " df['Production_YoY (%)'] = df.groupby('country')['Production'].transform(lambda x: x.pct_change() * 100)\n", "\n", " # Save gold-layer file\n", " safe_name = commodity.replace(\" \", \"_\").replace(\"/\", \"_\").lower()\n", " filepath = os.path.join(gold_dir, f\"{safe_name}.csv\")\n", " df.to_csv(filepath, index=False)\n", "\n", "print(\"🌟 Gold layer saved to 'gold_layer_pivots/'\")\n", "\n", "# === 4. Combined Global Summary\n", "combined_global = pd.concat(all_global_summaries, ignore_index=True)\n", "combined_global = combined_global[['commodity', 'report_date', 'Production', 'Imports', 'Exports',\n", " 'Total Distribution', 'Ending Stocks', 'Net Supply',\n", " 'Supply-Demand Balance', 'Stock-to-Use Ratio (%)']]\n", "combined_global.to_csv(\"global_summary_all.csv\", index=False)\n", "print(\"🌐 Combined global summary saved as 'global_summary_all.csv'\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.3" } }, "nbformat": 4, "nbformat_minor": 2 }