Source code for hilearn.plot.dot_plot

import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
from sklearn import linear_model

[docs]def corr_plot(x, y, max_num=10000, outlier=0.01, line_on=True, legend_on=True, size=30, dot_color=None, outlier_color="r", alpha=0.8, color_rate=10, corr_on=None): """Correlation plot for large number of dots by showing the density and Pearson's correlatin coefficient Parameters ---------- x: `array_like`, (1, ) Values on x-axis y: `array_like`, (1, ) Values on y-axis max_num: int Maximum number of dots to plotting by subsampling outlier: float The proportion of dots as outliers in different color line_on : bool If True, show the regression line legend_on: bool If True, show the Pearson's correlatin coefficient in legend. Replace of *corr_on* size: float The dot size dot_color: string The dot color. If None (by default), density color will be use outlier_color: string The color for outlier dot alpha : float The transparency: 0 (fully transparent) to 1 color_rate: float Color rate for density Returns ------- ax: matplotlib Axes The Axes object containing the plot. Examples -------- .. plot:: >>> import numpy as np >>> from hilearn.plot import corr_plot >>> np.random.seed(1) >>> x = np.append(np.random.normal(size=200), 5 + np.random.normal(size=500)) >>> y = 2 * x + 3 * np.random.normal(size=700) >>> corr_plot(x, y) """ score = st.pearsonr(x, y) np.random.seed(0) if len(x) > max_num: idx = np.random.permutation(len(x))[:max_num] x, y = x[idx], y[idx] outlier = int(len(x) * outlier) xy = np.vstack([x,y]) z = st.gaussian_kde(xy)(xy) idx = z.argsort() idx1, idx2 = idx[outlier:], idx[:outlier] if dot_color is None: #c_score = np.log2(z[idx]+100) c_score = np.log2(z[idx] + color_rate*np.min(z[idx])) else: #idx2 = [] c_score = dot_color plt.set_cmap("Blues") plt.scatter(x[idx], y[idx], c=c_score, edgecolor=None, s=size, alpha=alpha) plt.scatter(x[idx2], y[idx2], c=outlier_color, edgecolor=None, s=size/5, alpha=alpha/3.0)#/5 # plt.grid(alpha=0.4) if line_on: clf = linear_model.LinearRegression() clf.fit(x.reshape(-1,1), y) xx = np.linspace(x.min(), x.max(), 1000).reshape(-1,1) yy = clf.predict(xx) plt.plot(xx, yy, "k--", label="R=%.3f" %score[0]) # plt.plot(xx, yy, "k--") if legend_on or corr_on: plt.legend(loc="best", fancybox=True, ncol=1)
# plt.annotate("R=%.3f\np=%.1e" %score, fontsize='x-large', # xy=(0.97, 0.05), xycoords='axes fraction', # textcoords='offset points', ha='right', va='bottom') def volcano_plot(fold_change, pval, p_min=0.00001, x_log10=True, p_threshold=0.05, h_color="red", label=None): """ Volcano plot between log_fold change and p values, which is often used for hypothesis test between two conditions. """ pval[pval < p_min] = p_min idx1 = pval < p_threshold idx0 = pval >= p_threshold plt.scatter(fold_change[idx0], -np.log10(pval)[idx0], color="grey", alpha=0.7, label=None) plt.scatter(fold_change[idx1], -np.log10(pval)[idx1], color=h_color, alpha=0.7, label=label) plt.ylabel("-log10(p value)") plt.xlabel("Fold change") if x_log10: plt.xscale('log', basex=10)