Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

POF Forecast

Importing all the necessary python libraries

import xarray as xr
import numpy as np
import pandas as pd
import joblib
from pathlib import Path
import cftime
from xarray.coding.times import CFDatetimeCoder
import os
import cdsapi
import zipfile

Configuration Options

Here we define the paths for files and the dates we will use for our forecast.

We also load the model we created in the Trainer notebook

# Folder structure
base_path = Path("./data/")
os.makedirs("./outputs/", exist_ok=True)

# Dates of Forecast
year = 2019
years=[year]
months = [12]
cds_months=[f'{m:02d}' for m in months]
cds_days= [f"{d:02d}" for d in range(1, 32)]

# Where we will store our predictions
output_file = f"./outputs/POF_prediction_{year}_{month:02d}.nc"

# Load trained model
model = joblib.load("./data/POF_model.joblib")

cds_kwargs = {
    'url': 'https://cds.climate.copernicus.eu/api',
    'key': '14e44bc7-0f33-4a57-847f-f41f44c5ec4d',
}

xds_kwargs = {
    'url': 'https://xds-preprod.ecmwf.int/api',
    'key': '081c061b-d9b9-42eb-a483-a78e2b61cfd1',
}
for year in years:
    for month in cds_months: 
        for shortname,var in [["T2M","2m_temperature"],
                      ["D2M","2m_dewpoint_temperature"],
                      ["10U","10m_u_component_of_wind"],
                      ["10V","10m_v_component_of_wind"],
                      ["P","total_precipitation"]]:

            dataset = "reanalysis-era5-land"
            request = {
                "variable": [
                    var,
                ],
                "year": year,
                "month": month,
                "day": cds_days,
                "time": [
                    "00:00"
                ],
                "data_format": "netcdf",
                "download_format": "unarchived"
            }
            client = cdsapi.Client(**cds_kwargs)
            if not os.path.exists(f"data/{shortname}_{year}_{month}.nc"):
                client.retrieve(dataset, request, target=f"data/{shortname}_{year}_{month}.nc")
        if not os.path.exists(f"data/WS_{year}_{month}.nc"):
            with xr.open_dataset(f"data/10U_{year}_{month}.nc") as dsu,\
                xr.open_dataset(f"data/10V_{year}_{month}.nc") as dsv:
                dsw=dsu.u10**2+ dsv.v10**2
                dsw=dsw.rename("ws")
                dsw.to_netcdf(f"data/WS_{year}_{month}.nc")
                del dsw

        if not os.path.exists(f"data/RH_{year}_{month}.nc"):
            with xr.open_dataset(f"data/T2M_{year}_{month}.nc") as dst,\
                xr.open_dataset(f"data/D2M_{year}_{month}.nc") as dsd:
                es = 10**(7.5*(dst.t2m-273.15)/(237.7+(dst.t2m-273.15))) * 6.11
                e =  10**(7.5*(dsd.d2m-273.15)/(237.7+(dsd.d2m-273.15))) * 6.11
                dsrh = (e/es)*100
                dsrh = dsrh.rename("rh")
                dsrh.to_netcdf(f"data/RH_{year}_{month}.nc")
                del dsrh

This section will download data from the XDS-Preprod.

The data is large and so may take some time to download.

lat_lon_data = xr.open_dataset(f"data/T2M_{years[0]}_{cds_months[0]}.nc").sortby("latitude").sortby("longitude")
lat_lon_data=lat_lon_data.rename(valid_time="time",latitude='lat',longitude='lon')
lat_lon_data=lat_lon_data.drop_vars(['expver','number'])

for year in years:
    for month in cds_months: 
        for shortname, var in \
            [["DFMC_MAP","dead_fuel_moisture_content_group"],
                ["LFMC_MAP","live_fuel_moisture_content_group"],
                ["FUEL_MAP","fuel_group"]]:


            dataset = "derived-fire-fuel-biomass"
            request = {
                "variable": [
                    var,
                ],
                "version": ["1"],
                "year": [
                    f'{year}'
                ],
                "month": [
                    month
                ]
            }

            client = cdsapi.Client(**xds_kwargs)
            if not os.path.exists(f"data/{shortname}_{year}_{month}_R.nc"):
                client.retrieve(dataset, request, target=f"data/{shortname}_{year}_{month}.zip")

                with zipfile.ZipFile(f"data/{shortname}_{year}_{month}.zip", 'r') as zip_ref:
                    zip_ref.extractall("data/")
                print(f"Regridding {shortname} for {year}-{month}")
                with xr.open_dataset(f"data/{shortname}_{year}_{month}.nc") as ds_temp:
                    ds_regrid=ds_temp.interp_like(lat_lon_data, method="linear")
                    ds_regrid.to_netcdf(f"data/{shortname}_{year}_{month}_R.nc")
                    del ds_regrid

Load Static Data

We will load some CLIMATE data files

# Open the Climate files
time_coder = CFDatetimeCoder(use_cftime=True)
PO = xr.open_dataset(base_path / "CLIMATE/POP_2020.nc")
RD = xr.open_dataset(base_path / "CLIMATE/road_density_2015_agg_r.nc")

# Extract the arrays
PO_arr = PO.population_density.values  
RD_arr = RD.road_length.values         

Load Dynamic Data

Here we open the data files we retrieved from the XDS and the CDS as well as the Active Fire Maps

We iterate through each day within the dataset and store the into an output NetCDF file.

for year in years:
    for month in months:
    # Define the paths of the files
    ds_paths = {
        "AF": f"ACTIVE_FIRE_MAP_{year}_{month:02d}_R.nc",
        "FU": f"FUEL_MAP_{year}_{month:02d}_R.nc",
        "DF": f"DFMC_MAP_{year}_{month:02d}_R.nc",
        "LF": f"LFMC_MAP_{year}_{month:02d}_R.nc",
        "PR": f"P_{year}_{month:02d}.nc",
        "T2": f"T2M_{year}_{month:02d}.nc",
        "RH": f"RH_{year}_{month:02d}.nc",
        "WS": f"WS_{year}_{month:02d}.nc",
    }
    # Open all dynamic datasets
    with xr.open_dataset(base_path / ds_paths["FU"]) as FU, \
        xr.open_dataset(base_path / ds_paths["DF"]) as DF, \
        xr.open_dataset(base_path / ds_paths["LF"]) as LF, \
        xr.open_dataset(base_path / ds_paths["PR"]) as PR, \
        xr.open_dataset(base_path / ds_paths["T2"]) as T2, \
        xr.open_dataset(base_path / ds_paths["RH"]) as RH, \
        xr.open_dataset(base_path / ds_paths["WS"]) as WS, \
        xr.open_dataset(base_path / ds_paths["AF"]) as AF:

        n_days = len(AF.ACTIVE_FIRE)
        all_grids = []

        for i in range(n_days):
            # Extract arrays for timestep i
            FU_LL = FU.Live_Leaf[i].values
            FU_LW = FU.Live_Wood[i].values
            FU_DF = FU.Dead_Foliage[i].values
            FU_DW = FU.Dead_Wood[i].values
            DF_ = DF.DFMC_Foliage[i].values
            DW_ = DF.DFMC_Wood[i].values
            LF_ = LF.LFMC[i].values
            PR_ = PR.tp[i].values
            T2_ = T2.t2m[i].values
            RH_ = RH.rh[i].values
            WS_ = WS.ws[i].values

            # Mask where total fuel > 0
            ft = FU_LL + FU_LW + FU_DF + FU_DW
            mask = ft > 0

            # Flatten and build dataframe for prediction
            feature_arrays = {
                "PR": PR_[mask],
                "T2": T2_[mask],
                "RH": RH_[mask],
                "WS": WS_[mask],
                "FU_LL": FU_LL[mask],
                "FU_LW": FU_LW[mask],
                "FU_DF": FU_DF[mask],
                "FU_DW": FU_DW[mask],
                "DF": DF_[mask],
                "DW": DW_[mask],
                "LF": LF_[mask],
                "PO": PO_arr[mask],
                "RD": RD_arr[mask],
            }
            X_pred = pd.DataFrame(feature_arrays)

            # Predict probability
            y_proba = model.predict_proba(X_pred)[:, 1]

            # Create full grid and fill masked values
            fire_prob_grid = np.full(ft.shape, np.nan, dtype=float)
            fire_prob_grid[mask] = y_proba
            all_grids.append(fire_prob_grid)

        # Stack all timesteps into 3D array (time, lat, lon)
        fire_prob_array = np.stack(all_grids, axis=0)
        
        # -----------------------------
        # SAVE TO NETCDF
        # -----------------------------
        time = [cftime.DatetimeJulian(year, month, day+1) for day in range(n_days)]

        ds_out = xr.Dataset(
            {"fire_probability": (["time", "latitude", "longitude"], fire_prob_array)},
            coords={
                "time": time,
                "latitude": PO.latitude,
                "longitude": PO.longitude,
            }
        )
        os.remove(output_file) if os.path.exists(output_file) else None
        ds_out.to_netcdf(output_file)
        print(f"✅ Prediction saved → {output_file}")

month