From 871a490909249c95bb0e4ac247090cde24970f61 Mon Sep 17 00:00:00 2001 From: DLUTzhanweiliu <337891617@qq.com> Date: Mon, 9 Mar 2020 16:20:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BE=9D=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- MyEvalCriteria.py | 86 +++++++++++++++++++++++++++++++++++++ MyPrepaData.py | 107 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 MyEvalCriteria.py create mode 100644 MyPrepaData.py diff --git a/MyEvalCriteria.py b/MyEvalCriteria.py new file mode 100644 index 0000000..1479039 --- /dev/null +++ b/MyEvalCriteria.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# coding: utf-8 +import numpy as np +from sklearn.metrics import mean_absolute_error +def rmse(predictions, targets): + """ + 计算均方根误差亦称标准误差; + RMSE(Root Mean Square Error) + """ + return np.sqrt(((predictions - targets) ** 2).mean()) + +def corr(predictions, targets): + + """ + 计算Pearson相关系数 + """ + return np.corrcoef(predictions.reshape(1,-1),targets.reshape(1,-1))[1,0] + +def mae(predictions, targets): + """ + 计算MSE(Mean Square Error)均方误差 + """ + return mean_absolute_error(predictions,targets) + +def mape(predictions, targets): + ''' + //TODO + ''' + return (abs(predictions - targets)/targets).mean()*100 + +def accuacy(predictions, targets,p=0.2): + + ''' + //TODO + ''' + result = np.array(abs(predictions - targets)/targets).flatten() + return result[resultthreshold + predictions,targets = predictions[idx],targets[idx] + if (((predictions - targets)**2).sum())**0.5==0: + return 0 + return (((predictions - targets)**2*targets**2).sum())**0.25/(((predictions - targets)**2).sum())**0.5 + +def ia(predictions, targets): + ''' + //TODO + ''' + ave = np.mean(targets) + return 1-((predictions - targets) ** 2).sum()/(np.array([(abs(i-ave)+abs(j-ave))**2 for i,j in zip(targets,predictions)])).sum() +def MS4E(predictions, targets): + ''' + //TODO + ''' + n = targets.shape[0] + return ((predictions - targets) ** 4).sum()/n +def BHV(predictions, targets, threshold=1722): + ''' + //ref(doi):10.1029/2007WR006716 + ''' + idx = predictions>threshold + predictions,targets = predictions[idx],targets[idx] + return (predictions - targets).sum()/targets.sum()*100 \ No newline at end of file diff --git a/MyPrepaData.py b/MyPrepaData.py new file mode 100644 index 0000000..e66868a --- /dev/null +++ b/MyPrepaData.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# coding: utf-8 +import pandas as pd +data = pd.read_csv('2011-2018再分析和流量数据.csv',index_col=0) +def Preparation(data,Qlag,Rlag,reanalysis=[],isreanalysis=False,leadtime=1): + """ + Preparation data for model. + Note: for all reanalysis data, default lag 1. + + Parameters + ---------- + data : pandas DataFrame, + All data. + maxlag: int,range=[1,inf] + max lag. + Qlag: list + all inflow lag. + Rlag: list + all rainfall lag. + reanalysis:list + name of selected reanalysis variables. + Returns + ------- + X: array_like + input data. + y:array_like + output data + + See Also + -------- + rely on pandas + """ + X = pd.DataFrame() + for q in Qlag: + X['Q(t-%d)'%(q+leadtime-1)] = data.Q.shift(q+leadtime-1) + for r in Rlag: + X['R(t-%d)'%(r+leadtime-1)] = data.real_p.shift(r+leadtime-1) + X['y'] = data.Q + if not isreanalysis: + X.dropna(axis=0,how='any',inplace=True) + return X.drop(['y'],axis=1),X.y + else: + redata = data[reanalysis].shift(leadtime) + col = ['%s(t-%d)'%(c,leadtime) for c in reanalysis] + redata.columns = col + X = pd.concat([X,redata],axis=1) + X.dropna(axis=0,how='any',inplace=True) + return X.drop(['y'],axis=1),X.y + +def Preparation2(data,Qlag,Rlag,reanalysis={},isreanalysis=False,leadtime=1): + """ + Preparation data for model. + Note: for all reanalysis data, default lag 1. + + Parameters + ---------- + data : pandas DataFrame, + All data. + maxlag: int,range=[1,inf] + max lag. + Qlag: list + all inflow lag. + Rlag: list + all rainfall lag. + reanalysis:dic + name of selected reanalysis variables. + Returns + ------- + X: array_like + input data. + y:array_like + output data + + See Also + -------- + rely on pandas + """ + X = pd.DataFrame() + if leadtime > max(Qlag): + X['Q(t-%d)'%(leadtime)] = data.Q.shift(leadtime) + else: + for q in Qlag: + if leadtime > q: + continue + else: + X['Q(t-%d)'%(q)] = data.Q.shift(q) + if leadtime > max(Rlag): + X['R(t-%d)'%(leadtime)] = data.real_p.shift(leadtime) + else: + for r in Rlag: + if leadtime > r: + continue + else: + X['R(t-%d)'%(r)] = data.real_p.shift(r) + X['y'] = data.Q + if not isreanalysis: + X.dropna(axis=0,how='any',inplace=True) + return X.drop(['y'],axis=1),X.y + else: + for re in reanalysis: + if reanalysis[re] >= leadtime: + X['%s(t-%d)'%(re,reanalysis[re])] = data[re].shift(reanalysis[re]) + else: + X['%s(t-%d)'%(re,leadtime)] = data[re].shift(leadtime) + X.dropna(axis=0,how='any',inplace=True) + return X.drop(['y'],axis=1),X.y +