import numpy as np
import matplotlib.pyplot as plt
[docs]def ROC_plot(state, scores, threshold=None, color=None, legend_on=True,
legend_label="predict", base_line=True, linewidth=1.5, label=None):
"""
Plot ROC curve and calculate the Area under the curve (AUC) from the
prediction scores and true labels.
Parameters
----------
state: `array_like`, (1, )
Label state for the ground truth with binary value
scores: `array_like`, (1, )
Predicted scores for being positive
threshold: float
The suggested threshold to add as a dot on the curve
outlier: float
The proportion of dots as outliers in different color
color: string
Color for the curve and threshold dot
legend_on: bool
If True, show the Pearson's correlatin coefficient in legend
legend_label: string
The legend label to add, replace of old argument *label*
base_line: bool
If True, add the 0.5 baseline as random guess
linewidth: float
The line width
Returns
-------
(fpr, tpr, thresholds, auc) : tuple of values
fpr: array
False positive rate with each threshold
tpr: array
True positive rate with each threshold
thresholds: array
Value of all thresholds
auc: float
The overall area under the curve (AUC)
Examples
--------
.. plot::
>>> import numpy as np
>>> from hilearn.plot import ROC_plot
>>> np.random.seed(1)
>>> score0 = np.random.rand(100) * 0.8
>>> score1 = 1 - 0.4 * np.random.rand(300)
>>> scores = np.append(score0, score1)
>>> state = np.append(np.zeros(100), np.ones(300))
>>> res = ROC_plot(state, scores, threshold=0.5, color='blue')
"""
# if color is None or color=="none":
# color = np.random.rand(3,1)
if label is not None:
legend_label = label
print('Warning: label argument is replaced by legend_label and will ' +
'be moved in future')
score_gap = np.unique(scores)
if len(score_gap) > 2000:
idx = np.random.permutation(len(score_gap))
score_gap = score_gap[idx[:2000]]
score_gap = np.append(np.min(score_gap)-0.1, score_gap)
score_gap = np.append(score_gap, np.max(score_gap)+0.1)
if threshold is not None:
thresholds = np.sort(np.append(threshold, score_gap))
else:
thresholds = np.sort(score_gap)
#thresholds = np.arange(np.min(threshold), 1+2*threshold, threshold)
fpr, tpr = np.zeros(thresholds.shape[0]), np.zeros(thresholds.shape[0])
for i in range(thresholds.shape[0]):
idx = np.where(scores >= thresholds[i])[0]
fpr[i] = np.sum(state[idx] == 0)/np.sum(state == 0).astype('float')
tpr[i] = np.sum(state[idx] == 1)/np.sum(state == 1).astype('float')
auc = 0
for i in range(thresholds.shape[0]-1):
auc = auc + (fpr[i]-fpr[i+1]) * (tpr[i]+tpr[i+1]) / 2.0
if color is None:
plt.plot(fpr, tpr, "-", linewidth=linewidth,
label="%s: AUC=%.3f" %(legend_label,auc))
else:
plt.plot(fpr, tpr, "-", linewidth=linewidth, color=color,
label="%s: AUC=%.3f" %(legend_label,auc))
## Adding dot with given threshold
if threshold is not None:
_idx = np.where(scores >= threshold)[0]
_fpr = np.sum(state[_idx] == 0)/np.sum(state == 0).astype('float')
_tpr = np.sum(state[_idx] == 1)/np.sum(state == 1).astype('float')
if color is None:
plt.plot(_fpr, _tpr, marker='o', markersize=8, mec='k', mfc='none')
else:
plt.plot(_fpr, _tpr, marker='o', markersize=8, mec=color, mfc=color)
## Adding base line plot
if base_line: plt.plot(np.arange(0,2), np.arange(0,2), "k--", linewidth=1.0,
label="random: AUC=0.500")
if legend_on:
plt.legend(loc="best", fancybox=True, ncol=1)
plt.xlabel("False Positive Rate (1-Specificity)")
plt.ylabel("True Positive Rate (Sensitivity)")
return fpr, tpr, thresholds, auc
[docs]def PR_curve(state, scores, threshold=None, color=None, legend_on=True,
legend_label="predict", base_line=False, linewidth=1.5, label=None):
"""
Plot Precision-recall curve and calculate the Area under the curve (AUC)
from the prediction scores and true labels.
Parameters
----------
state: `array_like`, (1, )
Label state for the ground truth with binary value
scores: `array_like`, (1, )
Predicted scores for being positive
threshold: float
The suggested threshold to add as a dot on the curve
outlier: float
The proportion of dots as outliers in different color
color: string
Color for the curve and threshold dot
legend_on: bool
If True, show the Pearson's correlatin coefficient in legend
legend_label: string
The legend label to add, replace of old argument *label*
base_line: bool
If True, add the 0.5 baseline as random guess
linewidth: float
The line width
Returns
-------
(rec, pre, thresholds, auc) : tuple of values
rec: array
Recall values with each threshold
pre: array
Precision values with each threshold
thresholds: array
Value of all thresholds
auc: float
The overall area under the curve (AUC)
Examples
--------
.. plot::
>>> import numpy as np
>>> from hilearn.plot import PR_curve
>>> np.random.seed(1)
>>> score0 = np.random.rand(100) * 0.8
>>> score1 = 1 - 0.4 * np.random.rand(300)
>>> scores = np.append(score0, score1)
>>> state = np.append(np.zeros(100), np.ones(300))
>>> res = PR_curve(state, scores, threshold=0.5, color='blue')
"""
###Test compare
# from sklearn.metrics import precision_recall_curve,average_precision_score
# precision, recall, thresholds = precision_recall_curve(labels, BF_tmp)
# ap = average_precision_score(labels, BF_tmp)
# plt.plot(recall, precision, label="%.3f" %(ap))
# if color is None or color=="none":
# color = np.random.rand(3,1)
if label is not None:
legend_label = label
print('Warning: label argument is replaced by legend_label and will ' +
'be moved in future')
score_gap = np.unique(scores)
if len(score_gap) > 2000:
idx = np.random.permutation(len(score_gap))
score_gap = score_gap[idx[:2000]]
#score_gap = np.append(np.min(score_gap)-0.1, score_gap)
#score_gap = np.append(score_gap, np.max(score_gap)+0.1)
if threshold is not None:
thresholds = np.sort(np.append(threshold, score_gap))
else:
thresholds = np.sort(score_gap)
pre, rec = np.zeros(thresholds.shape[0]), np.zeros(thresholds.shape[0])
for i in range(thresholds.shape[0]):
idx1 = np.where(scores >= thresholds[i])[0]
idx2 = np.where(scores < thresholds[i])[0]
FP = np.sum(state[idx1] == 0)
TP = np.sum(state[idx1] == 1)
FN = np.sum(state[idx2] == 1)
pre[i] = (TP+0.0)/(TP + FP)
rec[i] = (TP+0.0)/(TP + FN)
auc = 0
_rec = np.append(1.0, rec)
_pre = np.append(0.0, pre)
_rec = np.append(_rec, 0.0)
_pre = np.append(_pre, 1.0)
for i in range(_rec.shape[0]-1):
auc = auc + (_rec[i]-_rec[i+1]) * (_pre[i]+_pre[i+1]) / 2.0
if color is None or color=="none":
plt.plot(_rec, _pre, "-", linewidth=linewidth,
label="%s: AUC=%.3f" %(legend_label,auc))
else:
plt.plot(_rec, _pre, "-", linewidth=linewidth, color=color,
label="%s: AUC=%.3f" %(legend_label,auc))
## Adding dot with given threshold
if threshold is not None:
idx1 = np.where(scores >= threshold)[0]
idx2 = np.where(scores < threshold)[0]
FP = np.sum(state[idx1] == 0)
TP = np.sum(state[idx1] == 1)
FN = np.sum(state[idx2] == 1)
_pre = (TP+0.0)/(TP + FP)
_rec = (TP+0.0)/(TP + FN)
if color is None:
plt.plot(_rec, _pre, marker='o', markersize=8, mec='k', mfc='none')
else:
plt.plot(_rec, _pre, marker='o', markersize=8, mec=color, mfc=color)
## Adding base line plot
if base_line: plt.plot(np.arange(0,2), 1-np.arange(0,2), "k--",
linewidth=1.0, label="random: AUC=0.500")
if legend_on:
plt.legend(loc="best", fancybox=True, ncol=1)
plt.ylabel("Precision: TP/(TP+FP)")
plt.xlabel("Recall: TP/(TP+FN)")
return rec, pre, thresholds, auc
def ecdf_plot(data, x=None, **kwargs):
"""
Empirical plot for cumulative distribution function
Parameters
----------
data: array or list
data for the empirical CDF plot
x: array or list (optional)
the points to show the plot
**kwargs:
**kwargs for matplotlib.plot
Returns
-------
x: array
sorted x
ecdf_val:
values of empirical cdf for x
"""
data = np.sort(np.array(data))
if x is None:
x = data
else:
x = np.sort(np.array(x))
ecdf_val = np.zeros(len(x))
for i in range(len(x)):
ecdf_val[i] = np.mean(data < x[i])
plt.plot(x, ecdf_val, **kwargs)
return x, ecdf_val