Source code for hilearn.plot.curve_plot

import numpy as np
import matplotlib.pyplot as plt

[docs]def ROC_plot(state, scores, threshold=None, color=None, legend_on=True, 
    legend_label="predict", base_line=True, linewidth=1.5, label=None):
    """
    Plot ROC curve and calculate the Area under the curve (AUC) from the
    prediction scores and true labels.

    Parameters
    ----------
    state: `array_like`, (1, ) 
        Label state for the ground truth with binary value
    scores: `array_like`, (1, ) 
        Predicted scores for being positive
    threshold: float
        The suggested threshold to add as a dot on the curve
    outlier: float
        The proportion of dots as outliers in different color
    color: string
        Color for the curve and threshold dot
    legend_on: bool
        If True, show the Pearson's correlatin coefficient in legend
    legend_label: string
        The legend label to add, replace of old argument *label*
    base_line: bool
        If True, add the 0.5 baseline as random guess
    linewidth: float
        The line width

    Returns
    -------
    (fpr, tpr, thresholds, auc) : tuple of values
    fpr: array
        False positive rate with each threshold
    tpr: array
        True positive rate with each threshold
    thresholds: array
        Value of all thresholds
    auc: float
        The overall area under the curve (AUC)

    Examples
    --------

    .. plot::

        >>> import numpy as np
        >>> from hilearn.plot import ROC_plot
        >>> np.random.seed(1)
        >>> score0 = np.random.rand(100) * 0.8
        >>> score1 = 1 - 0.4 * np.random.rand(300)
        >>> scores = np.append(score0, score1)
        >>> state = np.append(np.zeros(100), np.ones(300))
        >>> res = ROC_plot(state, scores, threshold=0.5, color='blue')
    """
    # if color is None or color=="none": 
    #     color = np.random.rand(3,1)

    if label is not None:
        legend_label = label
        print('Warning: label argument is replaced by legend_label and will ' +
              'be moved in future')
    
    score_gap = np.unique(scores)
    if len(score_gap) > 2000:
        idx = np.random.permutation(len(score_gap))
        score_gap = score_gap[idx[:2000]]
    score_gap = np.append(np.min(score_gap)-0.1, score_gap)
    score_gap = np.append(score_gap, np.max(score_gap)+0.1)
    if threshold is not None:
        thresholds = np.sort(np.append(threshold, score_gap))
    else:
        thresholds = np.sort(score_gap)
    #thresholds = np.arange(np.min(threshold), 1+2*threshold, threshold)
    
    fpr, tpr = np.zeros(thresholds.shape[0]), np.zeros(thresholds.shape[0])
    for i in range(thresholds.shape[0]):
        idx = np.where(scores >= thresholds[i])[0]
        fpr[i] = np.sum(state[idx] == 0)/np.sum(state == 0).astype('float')
        tpr[i] = np.sum(state[idx] == 1)/np.sum(state == 1).astype('float')
        
    auc = 0
    for i in range(thresholds.shape[0]-1):
        auc = auc + (fpr[i]-fpr[i+1]) * (tpr[i]+tpr[i+1]) / 2.0
        
    if color is None:
        plt.plot(fpr, tpr, "-",  linewidth=linewidth,
                 label="%s: AUC=%.3f" %(legend_label,auc))
    else:
        plt.plot(fpr, tpr, "-",  linewidth=linewidth, color=color,
                 label="%s: AUC=%.3f" %(legend_label,auc))
    
    ## Adding dot with given threshold
    if threshold is not None:
        _idx = np.where(scores >= threshold)[0]
        _fpr = np.sum(state[_idx] == 0)/np.sum(state == 0).astype('float')
        _tpr = np.sum(state[_idx] == 1)/np.sum(state == 1).astype('float')
        if color is None:
            plt.plot(_fpr, _tpr, marker='o', markersize=8, mec='k', mfc='none')
        else:
            plt.plot(_fpr, _tpr, marker='o', markersize=8, mec=color, mfc=color)
        
    ## Adding base line plot
    if base_line: plt.plot(np.arange(0,2), np.arange(0,2), "k--", linewidth=1.0,
        label="random: AUC=0.500")
        
    if legend_on:
        plt.legend(loc="best", fancybox=True, ncol=1)
    
    plt.xlabel("False Positive Rate (1-Specificity)")
    plt.ylabel("True Positive Rate (Sensitivity)")
    return fpr, tpr, thresholds, auc


[docs]def PR_curve(state, scores, threshold=None, color=None, legend_on=True,  
    legend_label="predict", base_line=False, linewidth=1.5, label=None):
    """
    Plot Precision-recall curve and calculate the Area under the curve (AUC) 
    from the prediction scores and true labels.

    Parameters
    ----------
    state: `array_like`, (1, ) 
        Label state for the ground truth with binary value
    scores: `array_like`, (1, ) 
        Predicted scores for being positive
    threshold: float
        The suggested threshold to add as a dot on the curve
    outlier: float
        The proportion of dots as outliers in different color
    color: string
        Color for the curve and threshold dot
    legend_on: bool
        If True, show the Pearson's correlatin coefficient in legend
    legend_label: string
        The legend label to add, replace of old argument *label*
    base_line: bool
        If True, add the 0.5 baseline as random guess
    linewidth: float
        The line width

    Returns
    -------
    (rec, pre, thresholds, auc) : tuple of values
    rec: array
        Recall values with each threshold
    pre: array
        Precision values with each threshold
    thresholds: array
        Value of all thresholds
    auc: float
        The overall area under the curve (AUC)

    Examples
    --------

    .. plot::

        >>> import numpy as np
        >>> from hilearn.plot import PR_curve
        >>> np.random.seed(1)
        >>> score0 = np.random.rand(100) * 0.8
        >>> score1 = 1 - 0.4 * np.random.rand(300)
        >>> scores = np.append(score0, score1)
        >>> state = np.append(np.zeros(100), np.ones(300))
        >>> res = PR_curve(state, scores, threshold=0.5, color='blue')
    """

    ###Test compare
    # from sklearn.metrics import precision_recall_curve,average_precision_score
    # precision, recall, thresholds = precision_recall_curve(labels, BF_tmp)
    # ap = average_precision_score(labels, BF_tmp)
    # plt.plot(recall, precision, label="%.3f" %(ap))

    # if color is None or color=="none": 
    #     color = np.random.rand(3,1)

    if label is not None:
        legend_label = label
        print('Warning: label argument is replaced by legend_label and will ' +
              'be moved in future')
    
    score_gap = np.unique(scores)
    if len(score_gap) > 2000:
        idx = np.random.permutation(len(score_gap))
        score_gap = score_gap[idx[:2000]]
    #score_gap = np.append(np.min(score_gap)-0.1, score_gap)
    #score_gap = np.append(score_gap, np.max(score_gap)+0.1)

    if threshold is not None:
        thresholds = np.sort(np.append(threshold, score_gap))
    else:
        thresholds = np.sort(score_gap)
    
    pre, rec = np.zeros(thresholds.shape[0]), np.zeros(thresholds.shape[0])
    for i in range(thresholds.shape[0]):
        idx1 = np.where(scores >= thresholds[i])[0]
        idx2 = np.where(scores <  thresholds[i])[0]
        FP = np.sum(state[idx1] == 0)
        TP = np.sum(state[idx1] == 1)
        FN = np.sum(state[idx2] == 1)
        pre[i] = (TP+0.0)/(TP + FP)
        rec[i] = (TP+0.0)/(TP + FN)
        
    auc = 0
    _rec = np.append(1.0, rec)
    _pre = np.append(0.0, pre)
    _rec = np.append(_rec, 0.0)
    _pre = np.append(_pre, 1.0)
    for i in range(_rec.shape[0]-1):
        auc = auc + (_rec[i]-_rec[i+1]) * (_pre[i]+_pre[i+1]) / 2.0

    if color is None or color=="none":
        plt.plot(_rec, _pre, "-",  linewidth=linewidth,
                 label="%s: AUC=%.3f" %(legend_label,auc))
    else:
        plt.plot(_rec, _pre, "-",  linewidth=linewidth, color=color,
                 label="%s: AUC=%.3f" %(legend_label,auc))

    ## Adding dot with given threshold
    if threshold is not None:
        idx1 = np.where(scores >= threshold)[0]
        idx2 = np.where(scores <  threshold)[0]
        FP = np.sum(state[idx1] == 0)
        TP = np.sum(state[idx1] == 1)
        FN = np.sum(state[idx2] == 1)
        _pre = (TP+0.0)/(TP + FP)
        _rec = (TP+0.0)/(TP + FN)
        if color is None:
            plt.plot(_rec, _pre, marker='o', markersize=8, mec='k', mfc='none')
        else:
            plt.plot(_rec, _pre, marker='o', markersize=8, mec=color, mfc=color)
        
    ## Adding base line plot
    if base_line: plt.plot(np.arange(0,2), 1-np.arange(0,2), "k--", 
                           linewidth=1.0, label="random: AUC=0.500")
        
    if legend_on:
        plt.legend(loc="best", fancybox=True, ncol=1)
    
    plt.ylabel("Precision: TP/(TP+FP)")
    plt.xlabel("Recall: TP/(TP+FN)")
    return rec, pre, thresholds, auc



def ecdf_plot(data, x=None, **kwargs):
    """
    Empirical plot for cumulative distribution function
    
    Parameters
    ----------
    data: array or list
        data for the empirical CDF plot
    x: array or list (optional)
        the points to show the plot
    **kwargs: 
        **kwargs for matplotlib.plot
        
    Returns
    -------
    x: array
        sorted x
    ecdf_val:
        values of empirical cdf for x
    """
    data = np.sort(np.array(data))
    if x is None:
        x = data
    else:
        x = np.sort(np.array(x))
        
    ecdf_val = np.zeros(len(x))
    for i in range(len(x)):
        ecdf_val[i] = np.mean(data < x[i])
    
    plt.plot(x, ecdf_val, **kwargs)
    return x, ecdf_val