Source code for ninolearn.postprocess.pca

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from os.path import join
from sklearn.decomposition.pca import PCA
from mpl_toolkits.basemap import Basemap
from matplotlib import cm
from scipy.signal import detrend

from ninolearn.IO.read_post import data_reader
from ninolearn.pathes import postdir
from ninolearn.utils import generateFileName, scaleMax
from ninolearn.plot.nino_timeseries import nino_background


[docs]class pca(PCA): """ This class extends the PCA class of the sklearn.decomposition.pca module.\ It facilitates the loading of the data from the postprocessed directory,\ wraps the fit function of the PCA class, has a saving routine for the\ computed pca component and can plot the EOF to get more insight into the\ results. """
[docs] def load_data(self, variable, dataset, processed='anom', startyear=1949, endyear=2018, lon_min=120, lon_max=280, lat_min=-30, lat_max=30): """ Load data for PCA analysis from the desired postprocessed data set. :type variable: str :param variable: The variable for which the PCA will be done. :type dataset: str :param dataset: The data set that should be used for the PCA. :type processed: str :param processed: Either '','anom' or 'normanom'. :param startyear: The start year for the time series for which the PCA\ is done. :param endyear: The last year for the time series for which the PCA\ is done. :param lon_min,lon_max: The minimum and the maximum values of the\ longitude grid for which the metrics shell be computed \ (from 0 to 360 degrees east) :param lat_min,lat_max: The min and the max values of the latitude\ grid for which the metrics shell be computed (from -180 to 180 degrees\ east) """ self.variable = variable self.dataset = dataset self.processed = processed self.startdate = pd.to_datetime(str(startyear)) self.enddate = (pd.to_datetime(str(endyear)) + pd.tseries.offsets.YearEnd(0)) self.reader = data_reader(startdate=self.startdate, enddate=self.enddate, lon_min=lon_min, lon_max=lon_max, lat_min=lat_min, lat_max=lat_max) data = self.reader.read_netcdf(variable, dataset, processed) self.set_eof_array(data)
[docs] def set_eof_array(self, data): """ Genrates the array that will be analyzed with the EOF. """ self.time = data['time'] self.lon = data['lon'] self.lat = data['lat'] EOFarr = np.array(data[:, :, :]) self.n_time = len(self.time) self.n_lat = len(self.lat) self.n_lon = len(self.lon) self.nan_index = np.isnan(EOFarr[-1,:,:]) self.EOFarr = EOFarr.reshape((self.n_time, self.n_lat * self.n_lon)) self.EOFarr[np.isnan(self.EOFarr)] = 0 self.EOFarr = detrend(self.EOFarr, axis=0)
[docs] def compute_pca(self): """ Simple wrapper around the PCA.fit() method. """ self.fit(self.EOFarr)
[docs] def save(self, extension='', filename=None): """ Saves the first three pca components to a csv-file. """ # save data to first day of month save_index = self.time.to_index() pca1 = pd.Series(np.matmul(self.EOFarr, self.components_[0, :]), index=save_index) pca2 = pd.Series(np.matmul(self.EOFarr, self.components_[1, :]), index=save_index) pca3 = pd.Series(np.matmul(self.EOFarr, self.components_[2, :]), index=save_index) self.df = pd.DataFrame({'pca1': pca1, 'pca2': pca2, 'pca3': pca3}) if filename is None: filename = generateFileName(self.variable, self.dataset, ''.join((self.processed, extension)), suffix='csv') else: filename = '.'.join((filename,'csv')) filename = '-'.join(['pca', filename]) self.df.to_csv(join(postdir, filename))
[docs] def plot_eof(self): """ Make a plot for the first leading EOFs. """ lon2, lat2 = np.meshgrid(self.lon, self.lat) try: nino34 = self.reader.read_csv('oni') except IndexError: """ Allow error when data is out of range for ONI index. """ pass except AttributeError: """ Allow when now reader was initialized (data was not loaded but directly) provided with the .set_eof_array() method """ pass fig = plt.figure(figsize=(15, 7)) for i in range(0, 2): fig.add_subplot(221+i) plt.title("EOF"+str(i+1)) m = Basemap(projection='robin', lon_0=-180, resolution='c') x, y = m(lon2, lat2) m.drawparallels(np.arange(-90., 120., 30.)) m.drawmeridians(np.arange(0., 360., 60.)) m.drawmapboundary(fill_color='white') m.drawcoastlines() norm = cm.colors.Normalize(vmax=-1, vmin=1.) cmap = cm.bwr cs = m.pcolormesh(x, y, scaleMax( self.components_[i, :].reshape(self.n_lat, self.n_lon)), cmap=cmap, norm=norm) m.colorbar(cs) for i in range(0, 2): fig.add_subplot(223+i) projection = np.matmul(self.EOFarr, self.components_[i, :]) try: nino_background(nino34) except: pass plt.plot(self.time, projection) plt.show()
[docs] def component_map_(self, eof=1): """ Returns the components as a map. :param eof: The leading eof (default:1). """ comp_map = self.components_[eof-1, :].copy().reshape(self.n_lat, self.n_lon) comp_map[self.nan_index] = np.nan return comp_map
[docs] def pc_projection(self, eof=1): """ Returns the amplitude timeseries of the specified eof. :param eof: The nth leading eof (default:1). """ projection = np.matmul(self.EOFarr, self.components_[eof-1, :]) return projection