⚠️This is under development!⚠️

logo

Producing the ENSO index verification heatmaps#

In this example we use the computed El Niño–Southern Oscillation (ENSO) indices from the previous Notebook to produce heatmaps of temporal correlations, as included on the C3S seasonal verification page.

Some information on ENSO impacts in Europe can be found on a page in the C3S documentation.

Configuration#

Import required modules and define some parameters related to the indices previously computed.

import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import regionmask
import seaborn as sns
from matplotlib.colors import BoundaryNorm
from dateutils import relativedelta

# dictionary of systems, versions and attributes
providers = {
    'ecmf.s51': {'cds_name': 'ecmwf', 'plot_name': 'ECMWF', 'plot_system': 'SEAS5', 'cds_system': '51', 'hcsize': 25,
                 'fcsize': 51, 'lagged': False},
    'lfpw.s8': {'cds_name': 'meteo_france', 'plot_name': 'Météo-France', 'plot_system': 'System 8', 'cds_system': '8',
                'hcsize': 25, 'fcsize': 51, 'lagged': False},
    'egrr.s602': {'cds_name': 'ukmo', 'plot_name': 'Met Office', 'plot_system': 'GloSea6', 'cds_system': '602',
                  'hcsize': 28, 'fcsize': 60, 'lagged': True},
    # 'egrr.s603': {'cds_name': 'ukmo', 'plot_name': 'Met Office', 'plot_system': 'GloSea6', 'cds_system': '603',
    #               'hcsize': 28, 'fcsize': 60, 'lagged': True},
    'edzw.s21': {'cds_name': 'dwd', 'plot_name': 'DWD', 'plot_system': 'GCFS2.1', 'cds_system': '21', 'hcsize': 30,
                 'lagged': False},
    'cmcc.s35': {'cds_name': 'cmcc', 'plot_name': 'CMCC', 'plot_system': 'SPS3.5', 'cds_system': '35', 'hcsize': 40,
                 'fcsize': 50, 'lagged': False},
   'kwbc.s2': {'cds_name': 'ncep', 'plot_name': 'NCEP', 'plot_system': 'CFSv2', 'cds_system': '2', 'hcsize': 20,
               'lagged': True},
    'rjtd.s3': {'cds_name': 'jma', 'plot_name': 'JMA', 'plot_system': 'CPS3', 'cds_system': '3', 'hcsize': 10,
                'fcsize': 150, 'lagged': True},
    'cwao.s2': {'cds_name': 'eccc', 'plot_name': 'ECCC', 'plot_system': 'CanCM4i', 'cds_system': '2', 'hcsize': 10,
                'fcsize': 10, 'lagged': False},
    'cwao.s3': {'cds_name': 'eccc', 'plot_name': 'ECCC', 'plot_system': 'GEM5-NEMO', 'cds_system': '3', 'hcsize': 10,
                'fcsize': 10, 'lagged': False},
    # 'cwao.s4': {'cds_name': 'eccc', 'plot_name': 'ECCC', 'plot_system': 'CanESM5.1p1bc', 'cds_system': '4', 'hcsize': 10,
    #             'fcsize': 10, 'lagged': False},
    # 'cwao.s5': {'cds_name': 'eccc', 'plot_name': 'ECCC', 'plot_system': 'GEM5.2-NEMO', 'cds_system': '5', 'hcsize': 10,
    #             'fcsize': 10, 'lagged': False},
}

# select a system and version
prov = 'lfpw.s8'
centre = providers[prov]['cds_name']
version = providers[prov]['cds_system']
# define the common hindcast period
hc_period = ['1993', '2016']
hc_str = '_'.join([hc_period[0], hc_period[1]])
# the data path where the indices were saved
data_path = '/data'

Load the pre-computed ERA5 indices and prepare to compute correlations#

# load in reanalysis
era5_file_name = '/era_5_nino_ind_1993_2016.nc'
era5_ind = xr.open_dataarray(data_path + era5_file_name)
era5_ind
# create a correlation array
corr = xr.Dataset()

Loop over the hindcast start months and compute correlations#

Loop over the start months. For each start month do the following:

  • deal with the missing Jan 1993 start if the centre is ukmo

  • create a copy of the ERA5 array (to rearrange to match the valid times of the start month in question)

  • read in the hindcast index for the given start, and refactor to include valid time

  • sub-select qualifying ensemble members, if needed (for lagged start systems where not all dates are used in the graphical products)

  • refactor the ERA5 array to allign with the hindcast leadtimes

  • for each leadmonth compute the temporal correlation

  • combine the lead month correlations and append to the array of correlations by start month

The result is an array of temporal correlations for each start month and lead month.

for stmonth in range(1, 13):
    print('START MONTH = {}'.format(stmonth))
    # Note that if the chosen system does not yet have all starts then this needs to be dealt with here
    # This also includes January 1993 for UKMO meaning the ERA5 data needs to be adjusted to match this
    # Met office systems missing data - start month
    if centre == 'ukmo' and stmonth == 1:
        # edit config to deal with Jan 1993 data gap in met office data
        hc_period[0] = '1994'
        o = era5_ind.sel(time=slice(hc_period[0], hc_period[1])).copy()
    else:
        o = era5_ind.copy()

    # READ HCST DATAFRAME FOR GIVEN START
    hc_file_name = '/{}_mm_{}_{}_{}_st_{}_nino_ind.nc'.format('sst', centre, version, hc_str, stmonth)
    hcst = xr.open_dataarray(data_path + hc_file_name)
    hcst = hcst.rename({'time': 'start_date'})

    #hcst = hcst.reset_coords(names="valid_time", drop=True)
    vt = xr.DataArray(dims=('start_date', 'forecastMonth'),
                      coords={'forecastMonth': hcst.forecastMonth, 'start_date': hcst.start_date})
    vt.data = [[pd.to_datetime(std) + relativedelta(months=fcmonth - 1) for fcmonth in vt.forecastMonth.values] for
               std
               in vt.start_date.values]
    hcst = hcst.assign_coords(valid_time=vt)
    # select qualifying ensemble members
    if prov == 'kwbc.s2':  # Only case where member sub-selection is needed from the CDS data
        hcst = hcst.sel(number=slice(None, 19))

    # refactor reanalysis to allign with hindcast leadtimes
    o = o.rename({'time': 'start_date'}).swap_dims({'start_date': 'valid_time'})
    fcmonths = [mm + 1 if mm >= 0 else mm + 13 for mm in
                [t.month - stmonth for t in pd.to_datetime(o.valid_time.values)]]
    o = o.assign_coords(forecastMonth=('valid_time', fcmonths))
    o = o.loc[(o['forecastMonth'] <= 6)]
    
    # loop over forecast month, define the hindcast mean over the ensemble members, and compute the correlation over the hindcast years
    l_corr = list()
    l_corr_det = list()
    for this_fcmonth in hcst.forecastMonth.values:
        thishcst = (hcst.sel(forecastMonth=this_fcmonth).swap_dims({'start_date': 'valid_time'}))
        thishcst = thishcst.mean('number')
        thisobs = o.where(o.valid_time == thishcst.valid_time, drop=True)
        l_corr.append(xr.corr(thishcst, thisobs, dim='valid_time'))
        
    thiscorr = xr.concat(l_corr, dim='forecastMonth')
    thiscorr = thiscorr.assign_coords(forecastMonth=[1, 2, 3, 4, 5, 6])
    thiscorr = thiscorr.expand_dims(dim={'start_month': [stmonth]}, axis=0)
    corr = xr.merge([corr, thiscorr])
START MONTH = 1
START MONTH = 2
START MONTH = 3
START MONTH = 4
START MONTH = 5
START MONTH = 6
START MONTH = 7
START MONTH = 8
START MONTH = 9
START MONTH = 10
START MONTH = 11
START MONTH = 12

Save the result, and inspect it.

file_name = '/{}_mm_{}_{}_{}_nino_ind_corr.nc'.format('sst', centre, version, hc_str)
corr.to_netcdf(data_path + file_name, mode='w')
corr
<xarray.Dataset> Size: 3kB
Dimensions:        (start_month: 12, region: 4, forecastMonth: 6)
Coordinates:
  * start_month    (start_month) int64 96B 1 2 3 4 5 6 7 8 9 10 11 12
  * region         (region) int64 32B 0 1 2 3
  * forecastMonth  (forecastMonth) int64 48B 1 2 3 4 5 6
    surface        float64 8B 0.0
    abbrevs        (region) <U3 48B '1+2' '3' '3.4' '4'
    names          (region) <U7 112B 'NINO1+2' 'NINO3' 'NINO3.4' 'NINO4'
    number         int64 8B 0
    step           timedelta64[ns] 8B 00:00:00
Data variables:
    sst            (start_month, forecastMonth, region) float64 2kB 0.8843 .....

Plot the correlation heatmaps#

Using the computed correlations, construct correlation heatmaps for each NINO index computed in the previous Notebook.

Define labels for the valid months or start months, and load the correlations.

# month labels for the plots
mon_labels = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

# load the correlations
file_name = '/{}_mm_{}_{}_{}_nino_ind_corr.nc'.format('sst', centre, version, hc_str)
corr = xr.open_dataarray(data_path + file_name)

Loop over the regions for which the indices and correlations were computed:

  • refactor the correlations for each region/index in a pandas dataframe (remove unnecessary dimensions and variables)

  • plot correlation as a function of start month and leadtime using a seaborn heatmap

  • refactor the correlations again to include valid month instead of start month

  • plot correlation as a function of valid month and leadtime

# make the plot - loop over nino regions
for ind in corr.region.values:
    print(ind, corr.sel(region=ind).names.values)
    ind_name = str(corr.sel(region=ind).names.values)
    hm_cm = plt.get_cmap('RdYlBu_r')
    hm_levs = np.linspace(0, 1, 11)
    norm = BoundaryNorm(hm_levs, ncolors=hm_cm.N, clip=True)

    # HEATMAP (correlation as a function of start_month and leadtime)
    df = (corr.sel(region=ind).to_dataframe()
          .drop(['region', 'surface', 'abbrevs', 'step', 'names', 'number'], axis=1))
    df = df.unstack().droplevel(0, axis=1)
    fig = plt.figure(figsize=(10, 10))
    ax = sns.heatmap(df.T, square=True, cmap=hm_cm, vmin=hm_levs.min(), vmax=hm_levs.max(), norm=norm,
                     linewidth=0.5,
                     annot=True, fmt=".2f", cbar=False)
    ax.invert_yaxis()

    tit_txt1 = '{} index (temporal correlation with ERA5) '.format(ind_name)
    if centre == 'ukmo':
        tit_txt2 = '{} {} (system={}), '.format(providers[prov]['plot_name'], providers[prov]['plot_system'],
                                                providers[prov]['cds_system'])
    if prov == 'ecmwf.51':
        tit_txt2 = '{} {} (C3Sv{}), '.format(providers[prov]['plot_name'], providers[prov]['plot_system'],
                                             providers[prov]['cds_system'])
    else:
        tit_txt2 = '{} {} , '.format(providers[prov]['plot_name'], providers[prov]['plot_system'])
    plt.title('\n'.join([tit_txt2 + tit_txt1, '']))
    plt.xlabel('start month')
    plt.ylabel('leadtime')
    # Set ticks labels for x-axis
    ax.set_xticklabels(mon_labels)  
    ax.set_yticklabels(['1', '2', '3', '4', '5', '6'])
    plt.show()

    # HEATMAP (correlation as a function of valid_month and leadtime)
    df_vt = df.stack(future_stack=True).reset_index()
    df_vt['valid_month'] = df_vt.apply(lambda row: row['start_month'] + row['forecastMonth'] - 1, axis=1).astype(
        'int32')
    df_vt['valid_month'] = [int(mm) if mm < 13 else mm - 12 for mm in df_vt['valid_month']]
    df_vt = df_vt.drop('start_month', axis=1).set_index(
        ['valid_month', 'forecastMonth']).unstack().transpose().droplevel(0)
    fig = plt.figure(figsize=(10, 10))
    ax = sns.heatmap(df_vt, square=True, cmap=hm_cm, vmin=hm_levs.min(), vmax=hm_levs.max(), norm=norm,
                     linewidth=0.5,
                     annot=True, fmt=".2f", cbar=False)
    ax.invert_yaxis()
    plt.title('\n'.join([tit_txt2 + tit_txt1, '']))
    plt.xlabel('valid month')
    plt.ylabel('leadtime')
    # Set ticks labels for x-axis
    ax.set_xticklabels(mon_labels)  # , rotation='vertical', fontsize=18)
    plt.show()
0 NINO1+2
../_images/7445299a9bbd896ed02278aa3232bcbfdd7dbcfec8f9a532a31d08ab02045923.png ../_images/93a166e05e9c0e085e7a282f29502a5456988797e3d6b21e57837a937208ce7c.png
1 NINO3
../_images/e7ac314c58017a6d48d52aa6c76f29774f4af3920c2c5b276334126fc05ba0bc.png ../_images/b15e0dd04ef6e6dbb0bf1598b9dd1b859acc1d82077d89896d21ca53d7b97478.png
2 NINO3.4
../_images/b38d4da038007646924c96f927cbe0b7b227b0087662a99a764490ff8847f8d3.png ../_images/7f8d794edd5ee3e23eb538b686f0f658a2e9aafabb8d2dc47d335f197fbcfdcb.png
3 NINO4
../_images/a12f6e9863f82b26f090d8a6ba09fcf5700f4071b1fbb9e4c7f68621301c6e5d.png ../_images/9cfd1b321452dc27455a2c7eb965bb56d87baa55f654ad9972c8edf86c62325f.png

Extensions to these single-system monthly correlations heatmaps would be to compute indices for multiple systems and then combine them into multi-system combinations. The approach to variance-normalisation in the C3S SST index charts is outlined on the products description page, and in the SEAS5 user guide.