Photo by Joes Valentine on Unsplash

How to Get Your Data Normally Distributed in Under 5 Minutes

Motivation

Left Skewed Data

Left Skewed Data
plt.figure(figsize=(20,10))plt.subplot(331)
data = left_skewed
ax = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax.set_title('Left Skewed Original Data')
ax.legend(loc='upper left', bbox_to_anchor=(0, 1.00), shadow=True, ncol=2)
plt.subplot(332)
data = np.log(left_skewed)
ax1 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax1.set_title('Logarithm Transformation')
ax1.legend(loc='upper left', bbox_to_anchor=(0, 1.00), shadow=True, ncol=2)
plt.subplot(333)
data = left_skewed.apply(lambda x: x**(1/3))
ax2 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax2.set_title('Cube Root Transformation')
ax2.legend(loc='upper left', bbox_to_anchor=(0, 1.00), shadow=True, ncol=2)
plt.subplot(334)
data = np.sqrt(left_skewed)
ax3 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax3. set_title('Square Root Transformation')
ax3.legend(loc='upper left', bbox_to_anchor=(0, 1.00), shadow=True, ncol=2)
plt.subplot(335)
data = left_skewed.apply(lambda x: x**2)
ax4 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax4.set_title('Squared Transformation')
ax4.legend(loc='upper left', bbox_to_anchor=(0, 1.00), shadow=True, ncol=2)
plt.subplot(336)
data= np.exp(left_skewed)
ax5 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax5.set_title('Exponential Transformation')
ax5.legend(loc='upper left', bbox_to_anchor=(0, 1.00), shadow=True, ncol=2)
plt.subplot(337)
data = left_skewed.apply(lambda x: x**2.71828)
ax6 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax6.set_title('Cubed Transformation')
ax6.legend(loc='upper left', bbox_to_anchor=(0, 1.00), shadow=True, ncol=2)
plt.show()
Output from Code

Right Skewed Data

Right Skewed Data
plt.figure(figsize=(20,10))plt.subplot(331)
data = right_skewed
ax = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax.set_title('Right Skewed Original Data')
#ax.legend(loc='upper left', bbox_to_anchor=(0.65, 1.00), shadow=True, ncol=2)
plt.subplot(332)
data = np.log(right_skewed)
ax1 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax1.set_title('Logarithm Transformation')
ax1.legend(loc='upper left', bbox_to_anchor=(0.65, 1.00), shadow=True, ncol=2)
plt.subplot(333)
data = right_skewed.apply(lambda x: x**(1/3))
ax2 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax2.set_title('Cube Root Transformation')
ax2.legend(loc='upper left', bbox_to_anchor=(0.65, 1.00), shadow=True, ncol=2)
plt.subplot(334)
data = np.sqrt(right_skewed)
ax3 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax3. set_title('Square Root Transformation')
ax3.legend(loc='upper left', bbox_to_anchor=(0.65, 1.00), shadow=True, ncol=2)
plt.subplot(335)
data = right_skewed.apply(lambda x: x**(-1))
ax4 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax4.set_title('Reciprocal Transformation')
ax4.legend(loc='upper left', bbox_to_anchor=(0.65, 1.00), shadow=True, ncol=2)
plt.subplot(336)
data = right_skewed.apply(lambda x: x**2)
ax5 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax5.set_title('Squared Transformation')
ax5.legend(loc='upper left', bbox_to_anchor=(0.65, 1.00), shadow=True, ncol=2)
plt.subplot(337)
data = np.exp(right_skewed)
ax6 = pd.plotting.hist_series(data, bins='auto', label='Skew: '+ str(round(stats.skew(data), 2)))
ax6.set_title('Exponential Transformation')
ax6.legend(loc='upper left', bbox_to_anchor=(0.65, 1.00), shadow=True, ncol=2)
plt.show()
Output from Code

Conclusion

Photo by Bankim Desai on Unsplash

An intraoperative neuromonitor who tinkers with data to see what interesting nuggets he can find.