Importing all the necessary python libraries
import xarray as xr
import numpy as np
import pandas as pd
import joblib
from pathlib import Path
import cftime
from xarray.coding.times import CFDatetimeCoder
import os
import cdsapi
import zipfile
Configuration Options
Here we define the paths for files and the dates we will use for our forecast.
We also load the model we created in the Trainer notebook
# Folder structure
base_path = Path("./data/")
os.makedirs("./outputs/", exist_ok=True)
# Dates of Forecast
year = 2019
years=[year]
months = [12]
cds_months=[f'{m:02d}' for m in months]
cds_days= [f"{d:02d}" for d in range(1, 32)]
# Where we will store our predictions
output_file = f"./outputs/POF_prediction_{year}_{month:02d}.nc"
# Load trained model
model = joblib.load("./data/POF_model.joblib")
cds_kwargs = {
'url': 'https://cds.climate.copernicus.eu/api',
'key': '14e44bc7-0f33-4a57-847f-f41f44c5ec4d',
}
xds_kwargs = {
'url': 'https://xds-preprod.ecmwf.int/api',
'key': '081c061b-d9b9-42eb-a483-a78e2b61cfd1',
}for year in years:
for month in cds_months:
for shortname,var in [["T2M","2m_temperature"],
["D2M","2m_dewpoint_temperature"],
["10U","10m_u_component_of_wind"],
["10V","10m_v_component_of_wind"],
["P","total_precipitation"]]:
dataset = "reanalysis-era5-land"
request = {
"variable": [
var,
],
"year": year,
"month": month,
"day": cds_days,
"time": [
"00:00"
],
"data_format": "netcdf",
"download_format": "unarchived"
}
client = cdsapi.Client(**cds_kwargs)
if not os.path.exists(f"data/{shortname}_{year}_{month}.nc"):
client.retrieve(dataset, request, target=f"data/{shortname}_{year}_{month}.nc")
if not os.path.exists(f"data/WS_{year}_{month}.nc"):
with xr.open_dataset(f"data/10U_{year}_{month}.nc") as dsu,\
xr.open_dataset(f"data/10V_{year}_{month}.nc") as dsv:
dsw=dsu.u10**2+ dsv.v10**2
dsw=dsw.rename("ws")
dsw.to_netcdf(f"data/WS_{year}_{month}.nc")
del dsw
if not os.path.exists(f"data/RH_{year}_{month}.nc"):
with xr.open_dataset(f"data/T2M_{year}_{month}.nc") as dst,\
xr.open_dataset(f"data/D2M_{year}_{month}.nc") as dsd:
es = 10**(7.5*(dst.t2m-273.15)/(237.7+(dst.t2m-273.15))) * 6.11
e = 10**(7.5*(dsd.d2m-273.15)/(237.7+(dsd.d2m-273.15))) * 6.11
dsrh = (e/es)*100
dsrh = dsrh.rename("rh")
dsrh.to_netcdf(f"data/RH_{year}_{month}.nc")
del dsrhThis section will download data from the XDS-Preprod.
The data is large and so may take some time to download.
lat_lon_data = xr.open_dataset(f"data/T2M_{years[0]}_{cds_months[0]}.nc").sortby("latitude").sortby("longitude")
lat_lon_data=lat_lon_data.rename(valid_time="time",latitude='lat',longitude='lon')
lat_lon_data=lat_lon_data.drop_vars(['expver','number'])
for year in years:
for month in cds_months:
for shortname, var in \
[["DFMC_MAP","dead_fuel_moisture_content_group"],
["LFMC_MAP","live_fuel_moisture_content_group"],
["FUEL_MAP","fuel_group"]]:
dataset = "derived-fire-fuel-biomass"
request = {
"variable": [
var,
],
"version": ["1"],
"year": [
f'{year}'
],
"month": [
month
]
}
client = cdsapi.Client(**xds_kwargs)
if not os.path.exists(f"data/{shortname}_{year}_{month}_R.nc"):
client.retrieve(dataset, request, target=f"data/{shortname}_{year}_{month}.zip")
with zipfile.ZipFile(f"data/{shortname}_{year}_{month}.zip", 'r') as zip_ref:
zip_ref.extractall("data/")
print(f"Regridding {shortname} for {year}-{month}")
with xr.open_dataset(f"data/{shortname}_{year}_{month}.nc") as ds_temp:
ds_regrid=ds_temp.interp_like(lat_lon_data, method="linear")
ds_regrid.to_netcdf(f"data/{shortname}_{year}_{month}_R.nc")
del ds_regridLoad Static Data
We will load some CLIMATE data files
# Open the Climate files
time_coder = CFDatetimeCoder(use_cftime=True)
PO = xr.open_dataset(base_path / "CLIMATE/POP_2020.nc")
RD = xr.open_dataset(base_path / "CLIMATE/road_density_2015_agg_r.nc")
# Extract the arrays
PO_arr = PO.population_density.values
RD_arr = RD.road_length.values Load Dynamic Data
Here we open the data files we retrieved from the XDS and the CDS as well as the Active Fire Maps
We iterate through each day within the dataset and store the into an output NetCDF file.
for year in years:
for month in months:
# Define the paths of the files
ds_paths = {
"AF": f"ACTIVE_FIRE_MAP_{year}_{month:02d}_R.nc",
"FU": f"FUEL_MAP_{year}_{month:02d}_R.nc",
"DF": f"DFMC_MAP_{year}_{month:02d}_R.nc",
"LF": f"LFMC_MAP_{year}_{month:02d}_R.nc",
"PR": f"P_{year}_{month:02d}.nc",
"T2": f"T2M_{year}_{month:02d}.nc",
"RH": f"RH_{year}_{month:02d}.nc",
"WS": f"WS_{year}_{month:02d}.nc",
}
# Open all dynamic datasets
with xr.open_dataset(base_path / ds_paths["FU"]) as FU, \
xr.open_dataset(base_path / ds_paths["DF"]) as DF, \
xr.open_dataset(base_path / ds_paths["LF"]) as LF, \
xr.open_dataset(base_path / ds_paths["PR"]) as PR, \
xr.open_dataset(base_path / ds_paths["T2"]) as T2, \
xr.open_dataset(base_path / ds_paths["RH"]) as RH, \
xr.open_dataset(base_path / ds_paths["WS"]) as WS, \
xr.open_dataset(base_path / ds_paths["AF"]) as AF:
n_days = len(AF.ACTIVE_FIRE)
all_grids = []
for i in range(n_days):
# Extract arrays for timestep i
FU_LL = FU.Live_Leaf[i].values
FU_LW = FU.Live_Wood[i].values
FU_DF = FU.Dead_Foliage[i].values
FU_DW = FU.Dead_Wood[i].values
DF_ = DF.DFMC_Foliage[i].values
DW_ = DF.DFMC_Wood[i].values
LF_ = LF.LFMC[i].values
PR_ = PR.tp[i].values
T2_ = T2.t2m[i].values
RH_ = RH.rh[i].values
WS_ = WS.ws[i].values
# Mask where total fuel > 0
ft = FU_LL + FU_LW + FU_DF + FU_DW
mask = ft > 0
# Flatten and build dataframe for prediction
feature_arrays = {
"PR": PR_[mask],
"T2": T2_[mask],
"RH": RH_[mask],
"WS": WS_[mask],
"FU_LL": FU_LL[mask],
"FU_LW": FU_LW[mask],
"FU_DF": FU_DF[mask],
"FU_DW": FU_DW[mask],
"DF": DF_[mask],
"DW": DW_[mask],
"LF": LF_[mask],
"PO": PO_arr[mask],
"RD": RD_arr[mask],
}
X_pred = pd.DataFrame(feature_arrays)
# Predict probability
y_proba = model.predict_proba(X_pred)[:, 1]
# Create full grid and fill masked values
fire_prob_grid = np.full(ft.shape, np.nan, dtype=float)
fire_prob_grid[mask] = y_proba
all_grids.append(fire_prob_grid)
# Stack all timesteps into 3D array (time, lat, lon)
fire_prob_array = np.stack(all_grids, axis=0)
# -----------------------------
# SAVE TO NETCDF
# -----------------------------
time = [cftime.DatetimeJulian(year, month, day+1) for day in range(n_days)]
ds_out = xr.Dataset(
{"fire_probability": (["time", "latitude", "longitude"], fire_prob_array)},
coords={
"time": time,
"latitude": PO.latitude,
"longitude": PO.longitude,
}
)
os.remove(output_file) if os.path.exists(output_file) else None
ds_out.to_netcdf(output_file)
print(f"✅ Prediction saved → {output_file}")
month