import matplotlib.pyplot as plt
import numpy as np
# 1000 random numbers (gaussian with mean=10 and sigma=2)
randomdata = np.random.normal(10,2,1000)
# fill the histogram
plt.hist(randomdata, bins=100)
plt.show()
The matplotlib hist function returns 3 objects:
Usually it is enough to get the first 2 for next operations (for the 3rd one, the "_" variable is commonly used to get unuseful object into it)
h, bins, _ = plt.hist(randomdata, bins=100)
plt.show()
print(f'h is a {type(h)}, its shape is {h.shape}')
print(f'bins is a {type(bins)}, its shape is {bins.shape}')
print(f'The 3rd argument (variable _) is a {type(_)} with {len(_)} objects inside of type {type(_[0])}')
Re-plotting a histogram is not straightforward
fig, ax = plt.subplots(1,3, figsize=(20,5))
h, bins, _ = ax[0].hist(randomdata, bins=10)
ax[0].set_title('Histogram created by matplotlib (10 bins)')
ax[1].plot(bins[:-1], h)
ax[1].set_title('Using plt.plot. PROBLEM: it is a line')
ax[2].bar(bins[1:], h)
ax[2].set_title('Using plt.bar. PROBLEM: default bar width is fixed = 0.8')
plt.show()
fig, ax = plt.subplots(1,4, figsize=(22,5))
# 1st plot
h, bins, _ = ax[0].hist(randomdata, bins=10, label='original', color='lightgray')
ax[0].set_title('Using a line')
ax[0].plot(bins[:-1], h, '-r*', label='Edge start')
ax[0].plot(bins[1:], h, '-b*', label='Edge stop')
# creating a list of the bin centers
binw = bins[1]-bins[0] # assuming equal spaced bins
binc = [ b + binw/2 for b in bins[:-1]]
ax[0].plot(binc, h, '-g*', label='Edge center')
ax[0].legend()
# 2nd plot
ax[1].set_title("Line with different drawstyles (using bins[:-1])")
h, bins, _ = ax[1].hist(randomdata, bins=10, label='original', color='lightgray')
for st in ['steps-pre', 'steps-mid', 'steps-post'] :
ax[1].plot(bins[:-1], h, '-',drawstyle=st, label=st)
ax[1].legend()
# 3nd plot
ax[2].set_title("Line with different drawstyles (using bins[1:])")
h, bins, _ = ax[2].hist(randomdata, bins=10, label='original', color='lightgray')
for st in ['steps-pre', 'steps-mid', 'steps-post'] :
ax[2].plot(bins[1:], h, '-',drawstyle=st, label=st)
ax[2].legend()
# 4th plot
ax[3].set_title("Line with different drawstyles (using bin centers)")
h, bins, _ = ax[3].hist(randomdata, bins=10, label='original', color='lightgray')
for st in ['steps-pre', 'steps-mid', 'steps-post'] :
ax[3].plot(binc, h, '-',drawstyle=st, label=st)
ax[3].legend()
plt.show()
fig, ax = plt.subplots(1,4, figsize=(22,5))
# 1st plot
h, bins, _ = ax[0].hist(randomdata, bins=10, label='original', color='lightgray')
ax[0].set_title('Original nbins = 10')
ax[0].bar(bins[:-1], h, color='red', alpha=0.5, label='Bin start, fixed width')
ax[0].legend()
# 2nd plot
h, bins, _ = ax[1].hist(randomdata, bins=100, label='original', color='lightgray')
ax[1].set_title('Original nbins = 100')
ax[1].bar(bins[:-1], h, color='red', alpha=0.5, label='Bin start, fixed width')
ax[1].legend()
# 3rd plot
h, bins, _ = ax[2].hist(randomdata, bins=100, label='original', color='lightgray')
ax[2].set_title('Original nbins = 100')
# getting binw info
binw = bins[1]-bins[0]
ax[2].bar(bins[:-1], h, color='red', alpha=0.5, width=binw, label='Bin start, custom width')
ax[2].legend()
# 4th plot
h, bins, _ = ax[3].hist(randomdata, bins=100, label='original', color='lightgray')
ax[3].set_title('Original nbins = 100')
# getting binw info and bin center
binw = bins[1]-bins[0]
binc = [ b + binw/2 for b in bins[:-1]]
ax[3].bar(binc, h, color='green', alpha=0.5, width=binw, label='Bin center, custom width')
ax[3].legend()
plt.show()
xdata = np.random.normal(1,0.2,10000)
ydata = np.random.uniform(-1,1, 10000)
h, xbins, ybins, _ = plt.hist2d(xdata, ydata, 100)
plt.show()
print(f'h is a {type(h)} with shape {h.shape}')
print(f'xbins is a {type(xbins)} with shape {xbins.shape}')
print(f'ybins is a {type(ybins)} with shape {ybins.shape}')
In order to (re)plot a 2D histogram the matshow (or the almost equivalent imshow which is more image oriented) function can be used.
The origin is set at the upper left hand corner and rows (first dimension of the array) are displayed horizontally. The aspect ratio of the figure window is that of the array.
Moreover, x and y are swapped because both commands expect rows, columns, which is the opposite of the usual x, y.
Then, the extent
parameter has to be explicit if the original axis range has to be represented and x and y has to be inverted:
fig, ax = plt.subplots(1,4, figsize=(20,5))
# fig.suptitle('Using matshow to plot a previously created 2D-histogram')
ax[0].matshow(h)
ax[0].set_title('matshow')
ax[1].matshow(h, extent=[xbins[0], xbins[-1], ybins[0], ybins[-1]])
ax[1].set_title('matshow (explicit extent)')
ax[2].imshow(h)
ax[2].set_title('imshow')
ax[3].imshow(h, extent=[xbins[0], xbins[-1], ybins[0], ybins[-1]])
ax[3].set_title('imshow (explicit extent)')
plt.show()
The difference between matshow
and imshow
is small. Being the latter image oriented, it has a different convention for axis origin and a slightly different rendering effect.
Both of them result in a unconvenient transformation of the image size/proportion which is not useful if not dealing with images.
The image aspect can be tuned with the aspect parameter (can be equal or auto) and the coordinate convention of having 0,0 in the lower-left corner through the origin parameter:
plt.matshow(h, extent=[xbins[0], xbins[-1], ybins[0], ybins[-1]], aspect='auto', origin='lower')
plt.show()
The image has squared proportion since the auto aspect has been set. If a specific axes size is previously defined, the matshow with auto proportion will adapt to the available space:
fig, ax = plt.subplots(figsize=(12,5))
ax.matshow(h, extent=[xbins[0], xbins[-1], ybins[0], ybins[-1]], aspect='auto', origin='lower')
ax.set_title('matshow with explicit extent, equal aspect and lower origin')
plt.show()
Assuming a histogram content + bins have been previously generated:
## using plt.plot
def plothisto1(h, bins, ax=None, **plot_kwargs):
'''Provide explicit axes to choose where to plot it, otherwise the current axes will be used'''
if ax==None:
ax = plt.gca() # get current axes
ax.plot(bins[:-1], h, drawstyle='steps-post', **plot_kwargs)
return ax
## using plt.plot
def plothisto2(h, bins, ax=None, **plot_kwargs):
'''Provide explicit axes to choose where to plot it, otherwise the current axes will be used'''
if ax==None:
ax = plt.gca() # get current axes
# creating a list of the bin centers
binw = bins[1]-bins[0] # assuming equal spaced bins
binc = [ b + binw/2 for b in bins[:-1]]
ax.bar(binc, h, width=binw, **plot_kwargs)
return ax
# plot
fig, ax = plt.subplots(1,2, figsize=(12,5))
fig.suptitle('Showing histogram with dedicated functions', fontsize=16)
# 1st
h, bins, _ = ax[0].hist(randomdata, bins=20, label='original', color='lightgray')
plothisto1(h, bins, ax=ax[0], color='red', label='plothisto1')
ax[0].set_title('Using plt.line')
ax[0].legend()
# 2nd
h, bins, _ = ax[1].hist(randomdata, bins=20, label='original', color='lightgray')
plothisto2(h, bins, ax=ax[1], color='red', label='plothisto2', alpha=0.2)
ax[1].set_title('Using plt.bar')
ax[1].legend()
plt.show()
xdata = np.random.normal(1,0.2,10000)
ydata = np.random.normal(2,0.3,10000)
h, xbins, ybins, _ = plt.hist2d(xdata, ydata, 100)
plt.show()
Comments:
plt.line
(used by the function in the left plot) the line does not cover all the original "bars", the last point is missing. Better result with plt.bar
(right)plt.bar
(used by the function in the right plot) the top line cannot be a different color (unless you want to see the vertical lines too)For the bidimentional case:
def plot2d(h, xbins, ybins, ax=None, **plot_kwargs):
'''Provide explicit axes to choose where to plot it, otherwise the current axes will be used'''
if ax==None:
ax = plt.gca() # get current axes
ax.matshow(h.transpose(), extent=[xbins[0], xbins[-1], ybins[0], ybins[-1]], aspect='auto', origin='lower', **plot_kwargs)
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
return ax
#2d histo
fig, ax = plt.subplots(1,2, figsize=(15,5))
fig.suptitle('Showing 2D histogram with dedicated functions', fontsize=16)
# histo creation/plot
xdata = np.random.normal(1,0.2,10000)
ydata = np.random.uniform(-1,1, 10000)
h, xbins, ybins, _ = ax[0].hist2d(xdata, ydata, 100)
plot2d(h, xbins, ybins, ax=ax[1])
plt.show()
Numpy has a histogram object (or histogram2d), its very basic and the main difference with the matplotlib one is that it only returns histogram content e bins. As a consequence, no plot is created when used.
Example:
# data
data = np.random.normal(10,2,1000)
# fill the 2 histograms
h, bins = np.histogram(data, bins=100)
print(f'h shape is {h.shape}, bins shape is {bins.shape}')
The previously created functions can be used as well (because they just need histo content and bins):
h, bins = np.histogram(np.random.normal(10,2,1000), bins=100)
plothisto2(h, bins, color='cyan')
plt.show()
The Scikit-HEP project provides several useful tools, mplhep is one of them.
This modules consists in a set of helpers for matplotlib to more easily produce plots typically needed in HEP as well as style them in way that's compatible with current collaboration requirements (ROOT-like plots for CMS, ATLAS, LHCb).
Example:
import mplhep as hep
import numpy as np
import matplotlib.pyplot as plt
# histogram creation (using numpy in this case)
data = np.random.normal(10,2,1000)
h, bins = np.histogram(data, bins=30)
# plot
hep.histplot(h, bins)
plt.show()
Several option are available:
fig, ax = plt.subplots(2,2, figsize=(8,8))
fig.suptitle('mplhep histplot function (same histo, different options)', fontsize=16)
hep.histplot(h, bins, ax=ax[0][0])
hep.histplot(h, bins, ax=ax[0][1], histtype='fill', ec='black', fc='red')
hep.histplot(h, bins, ax=ax[1][0], density=True)
hep.histplot(h, bins, ax=ax[1][1], yerr=np.sqrt(h))
plt.show()
Also 2D is supported:
# 2d histogram creation (using numpy in this case)
xdata = np.random.normal(10,2,10000)
ydata = np.random.normal(20,3,10000)
h, xbins, ybins = np.histogram2d(xdata,ydata, bins=100)
# plot
hep.hist2dplot(h, xbins, ybins)
plt.show()
# creating a figure with 2 axes
fig = plt.figure(figsize=(12, 5))
ax1 = fig.add_subplot(121, projection='3d')
ax2 = fig.add_subplot(122, projection='3d')
# generating 2d data
xdata = np.random.normal(10,2,10000)
ydata = np.random.normal(20,3,10000)
h, xbins, ybins = np.histogram2d(xdata,ydata, bins=10)
# a mesh grid with
_xx, _yy = np.meshgrid(xbins[:-1], ybins[:-1])
# bottom will be zero, top the h value
bottom = np.zeros_like(_xx)
top = h
# bars width and depth is the bin size
width = xbins[1]-xbins[0]
depth = ybins[1]-ybins[0]
# bard3d wants 1d arrays
ax1.bar3d(_xx.flatten(), _yy.flatten(), bottom.flatten(), width, depth, h.flatten(), shade=True)
ax1.set_title('shade=True')
ax2.bar3d(_xx.flatten(), _yy.flatten(), bottom.flatten(), width, depth, h.flatten(), shade=False)
ax2.set_title('shade=False')
plt.show()
Let's write a lego function for a 2D histogram:
from mpl_toolkits.mplot3d.axes3d import Axes3D
# (fig, rect=None, *args, azim=-60, elev=30, zscale=None, sharez=None, proj_type='persp', **kwargs)[source]¶
def lego(h, xbins, ybins, ax=None, **plt_kwargs):
'''Function to make a lego plot out of a 2D histo matplotlib/numpy generated
- Provide explicit axes to choose where to plot it, otherwise the current axes will be used'''
if ax==None:
fig = plt.gcf() # get current axes
# ax = fig.add_subplot(111,projection='3d')
ax = Axes3D(fig, rect=None, azim=-60, elev=30, proj_type='persp')
# look for this key in the axes properies
# -> not-so-elegant check
if ax.properties().get('xlim3d',None) == None :
print('Error, ax is not 3d')
return None
_xx, _yy = np.meshgrid(xbins[:-1], ybins[:-1])
bottom = np.zeros_like(_xx)
top = h
width = xbins[1]-xbins[0]
depth = ybins[1]-ybins[0]
ax.bar3d(_xx.flatten(), _yy.flatten(), bottom.flatten(), width, depth, h.flatten(), shade=True)
return ax
... and use it:
# generating 2d data
xdata = np.random.normal(10,2,10000)
ydata = np.random.normal(20,3,10000)
h, xbins, ybins = np.histogram2d(xdata,ydata, bins=20)
# if you want to create your 3d axes in the current figure (plt.gcf()):
from mpl_toolkits.mplot3d.axes3d import Axes3D
ax3d = Axes3D(fig=plt.gcf(), rect=None, azim=-60, elev=30, proj_type='persp')
# lego plot
lego(h, xbins, ybins, ax=ax3d)
plt.show()
Surf-like example
X, Y = np.meshgrid(xbins[:-1], ybins[:-1])
fig = plt.figure(figsize=(6,6))
ax3d = Axes3D(fig=fig, rect=None, azim=-60, elev=30, proj_type='persp')
surf = ax3d.plot_surface(X, Y, h, rstride=1, cstride=1, cmap='viridis')
fig.colorbar(surf, shrink=0.5, aspect=10)
plt.show()