Skip to content

SK LEARN

Knight.zhou edited this page Sep 22, 2017 · 1 revision
from sklearn import datasets
from sklearn.cross_validation import train_test_split
#Cross Validation
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.cross_validation import train_test_split
#Accuracy
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
#Precision
from sklearn.metrics import precision_score
#Sensitivity or Recall
from sklearn.metrics import recall_score
#F1 score
from sklearn.metrics import f1_score
#Confusion matrix
from sklearn.metrics import confusion_matrix
#Gain and Lift Chart.
#Kolmogorov-Smirnov Chart.
#Area Under the ROC curve (AUC – ROC)
import numpy as np
from sklearn.metrics import roc_auc_score
#Gini Coefficient

#Logarithmic Loss
#Mean Absolute Error
from sklearn.metrics import mean_absolute_error
#Mean Squared Error
from sklearn.metrics import mean_squared_error
#Root Mean Squared Error (RMSE)
#R^2 Metric
from sklearn.metrics import r2_score



data = datasets.load_iris()
X = data['data']
Y = data['target']
# print(X,Y)
X_train,Y_train,X_test,Y_test = train_test_split(X,Y,test_size=3,random_state=0)
# print(X_train,Y_train,X_test,Y_test)
print('Full dataset, features:',len(X))
print('Full dataset, labels:',len(Y))
print('Train dataset, features:',len(X_train))
print('Test dataset, features:',len(X_test))
print('Train dataset, labels:',len(Y_train))
print('Test dataset, features:',len(Y_test))
#Cross Validation
num_instances = len(X_train)
kfold = cross_validation.KFold(len(X_train),n_folds=10, random_state=1)
model = LogisticRegression()
preds = cross_validation.cross_val_score(model, X_train, Y_train, cv=kfold)
print(preds.mean(), preds.std())
#Accuracy
# model = LogisticRegression()
model.fit(X_train,Y_train)
preds = model.predict(X_test)
print(accuracy_score(preds,Y_test))
#Precision
print(precision_score(preds,Y_test, average=None))
#Sensitivity or Recall
print(recall_score(preds,Y_test, average=None))
#F1 score
print(f1_score(preds,Y_test, average=None))
#Confusion matrix
print(confusion_matrix(preds,Y_test))
#Gain and Lift Chart.
#Kolmogorov-Smirnov Chart.
#Area Under the ROC curve (AUC – ROC)
y_true = np.array([0, 0, 1, 1])
y_scores = np.array([0.1, 0.4, 0.35, 0.8])
roc_auc_score(y_true, y_scores)
#Gini
def gini(list_of_values):
    sorted_list = sorted(list(list_of_values))
    height, area = 0, 0
    for value in sorted_list:
        height += value
        area += height - value / 2.
        fair_area = height * len(list_of_values) / 2
    return (fair_area - area) / fair_area

def normalized_gini(y_pred, y):
    normalized_gini = gini(y_pred)/gini(y)
    return normalized_gini
print(normalized_gini(preds,Y_test))
#Mean Absolute Error
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
print(mean_absolute_error(y_true, y_pred))
#Mean Square Error
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
print(mean_squared_error(y_true, y_pred))
#R2 Metric
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
print(r2_score(y_true, y_pred))

https://tensor-flow.com/validation-testing-model

产品需求文档是将商业需求文档(BRD)和市场需求文档(MRD)用更加专业的语言进行描述。该文档是产品项目由“概念化”阶段进入到“图纸化”阶段的最主要的一个文档。当然,这个定义针对的是一个全新的产品。广义上来讲,产品需求的描述,应该包含有产品的战略和战术,战略是指:产品定位、目标市场、目标用户、竞争对手等。战术是指产品的结构、核心业务流程、具体用例描述、功能&内容描述等,本文主要讨论的是战术部分。

  PRD的主要使用对象有:开发、测试、项目经理、交互设计师、运营及其他业务人员。开发可以根据PRD获知整个产品的逻辑;测试可以根据PRD建用例;项目经理可以根据PRD拆分工作包,并分配开发人员;交互设计师可以通过PRD来设计交互细节。PRD是项目启动之前,必须要通过评审确定的最重要文档。

Clone this wiki locally