One of the easiest ways to plot data is to use the matplotlib
library:
import matplotlib.pyplot as plt
x = [0, 10, 20, 30, 40, 50]
y = [v*v for v in x]
# If no x-axis is provided, x-values are labeled 0, 1, 2, etc.
plt.plot(y)
plt.title('No x axis')
plt.show()
# If provided, x-values will be taken from first argument
plt.plot(x, y)
plt.title('With x axis')
plt.show()
The font size of individual text entries in a matplotlib figure can usually be adjusted using a dedicated fontsize
flag. To set the overall default for all matplotlib text, you can update the rcParams
dict that stores default display information:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})
x = [0, 10, 20, 30, 40, 50]
y = [v*v for v in x]
plt.plot(y)
plt.show()
Use the subplots()
command:
You can use the figsize
flag in plt.figure()
to control the size and aspect ratio of the figure you create. (The default dimensions are nominally in inches.)
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 12})
x = [0, 10, 20, 30, 40, 50]
y = [v*v for v in x]
plt.figure(figsize=(8,3))
ax1 = plt.subplot(1,2,1)
plt.title('No x axis')
plt.plot(y)
ax2 = plt.subplot(1,2,2)
plt.title('With x axis')
plt.plot(x,y)
plt.show()
The optional argument immediately following the y-axis data controls the line/marker style:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 12})
x = [0, 10, 20, 30, 40, 50]
y = [v*v for v in x]
plt.figure(figsize=(6,6))
plt.subplot(3,3,1)
plt.plot(x, y, '.')
plt.title('Dots')
plt.subplot(3,3,2)
plt.plot(x, y, '-')
plt.title('Solid Line')
plt.subplot(3,3,3)
plt.plot(x, y, '--')
plt.title('Dashed Line')
plt.subplot(3,3,4)
plt.plot(x, y, 'o')
plt.title('Filled circles')
plt.subplot(3,3,5)
plt.plot(x, y, 'o', markerfacecolor='none')
plt.title('Empty circles')
plt.subplot(3,3,6)
plt.plot(x, y, 'o-')
plt.title('Circles + Solid Line')
plt.subplot(3,3,7)
plt.plot(x, y, 'x')
plt.title('X marks')
plt.subplot(3,3,8)
plt.plot(x, y, 'X')
plt.title('Bold X Marks')
plt.subplot(3,3,9)
plt.plot(x, y, '+')
plt.title('Crosses')
plt.tight_layout()
plt.savefig('test.png', dpi=300)
plt.show()
A quick shortcut is the plt.tight_layout()
command. This should be called before plt.show().
Call the plt.savefig()
command, as illustrated above for export to the file test.png
. (You can export to eps file format just by changing the extension.) The dpi
flag specifies the dots-per-inch. (Increase this number for higher-resolution images.)
Note that plt.savefig()
should be called before calling plt.show().
The same argument that controls line/marker style also controls color. Default options include:
You can specify custom colors by adding a separate flag colors=[RR, GG, BB]
, where RR
, GG
, and BB
are the RGB values (between 0 and 1) that indicate the contribution of red, green, and blue to the hue.
import matplotlib.pyplot as plt
x = [0, 10, 20, 30, 40, 50]
y = [v*v for v in x]
plt.figure(figsize=(7,5))
plt.subplot(3,3,1)
plt.plot(x, y, '-ok')
plt.subplot(3,3,2)
plt.plot(x, y, '-ob')
plt.subplot(3,3,3)
plt.plot(x, y, '-or')
plt.subplot(3,3,4)
plt.plot(x, y, '-og')
plt.subplot(3,3,5)
plt.plot(x, y, '-om')
plt.subplot(3,3,6)
plt.plot(x, y, '-oy')
# Custom: Gray
plt.subplot(3,3,7)
plt.plot(x, y, '-o', color=[0.5,0.5,0.5])
# Custom: Dark red
plt.subplot(3,3,8)
plt.plot(x, y, '-o', color=[0.5,0.0,0.0])
# Custom: Purple
plt.subplot(3,3,9)
plt.plot(x, y, '-o', color=[0.5, 0.0, 0.5])
plt.tight_layout()
plt.show()
Use the linewidth
flag. E.g.
plt.plot(vaxis, spec, linewidth=3)
will plot a line with 3 times the default thickness.
Use the plt.text
command. The first and second arguments are x
and y
coordinates on the plot. The third argument is the string you want placed in the image. You can use the ha
(horizontal alignment) and va
(vertical alignment) flags to control where the text is placed relative to the (x,y) point specified.
import matplotlib.pyplot as plt
x = [0, 10, 20, 30, 40, 50]
y = [v*v for v in x]
plt.figure()
plt.subplot(2,2,1)
plt.plot(x, y, 'o-')
plt.plot(30, 900, 'or')
plt.text(30, 900, 'Label', fontsize=16, ha='center', va='center')
plt.subplot(2,2,2)
plt.plot(x, y, 'o-')
plt.plot(30, 900, 'or')
plt.text(30, 900, 'Label', fontsize=16, ha='right', va='bottom')
plt.subplot(2,2,3)
plt.plot(x, y, 'o-')
plt.plot(30, 900, 'or')
plt.text(30, 900, 'Label', fontsize=16, ha='left', va='top')
plt.subplot(2,2,4)
plt.plot(x, y, 'o-')
plt.plot(30, 900, 'or')
plt.text(30, 900, 'Label', fontsize=16, ha='left', va='center')
plt.tight_layout()
plt.show()
Use the plt.xlim()
and plt.ylim()
commands:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})
x = [0, 10, 20, 30, 40, 50]
y = [v*v for v in x]
# Default data limits:
plt.figure(figsize=(7,3))
plt.subplot(1,2,1)
plt.plot(x, y, 'o-')
# Custom data limits:
plt.subplot(1,2,2)
plt.plot(x, y, 'o-')
plt.xlim(25, 45)
plt.ylim(0, 50*50)
plt.tight_layout()
plt.show()
Use the plt.legend()
command, indicating the text for each data series using the label
flag. For quality math formatting, you can use LaTex expressions enclosed in dollar signs ($
).
import matplotlib.pyplot as plt
x = [v/100.0 for v in range(0, 150)]
x2 = [v*v for v in x]
x3 = [v*v*v for v in x]
x4 = [v*v*v*v for v in x]
plt.figure()
plt.plot(x, x, '-k', label='$x$')
plt.plot(x, x2, '-g', label='$x^2$')
plt.plot(x, x3, '-b', label='$x^3$')
plt.plot(x, x4, '-r', label='$x^4$')
plt.legend(fontsize=16)
plt.show()
Use the loc
flag to plt.legend()
. Options include
As illustrated below, you can also use the ncol
flag to set the number of columns through which the labels are distributed.
import matplotlib.pyplot as plt
x = [v/100.0 for v in range(0, 150)]
x2 = [v*v for v in x]
x3 = [v*v*v for v in x]
x4 = [v*v*v*v for v in x]
plt.figure()
plt.plot(x, x, '-k', label='$x$')
plt.plot(x, x2, '-g', label='$x^2$')
plt.plot(x, x3, '-b', label='$x^3$')
plt.plot(x, x4, '-r', label='$x^4$')
plt.ylim([-1.5, 5])
plt.legend(fontsize=14, loc='lower center', ncol=4)
plt.show()
Use the plt.xlabel()
and plt.ylabel()
commands. You can use LaTex in these argument, but you'll need to use a double backslash (\\
) in place of each single backslash (\
) in LaTex variables. (The first backslash is an escape sequence to prevent the interpreter from reading it as the start of a special character.)
E.g., normal LaTex syntax codes for the Angstrom symbol by the text \AA
; to use this symbol in a Matplotlib label argument, you'll need to enter instead \\AA
.
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 20})
x = [v/100.0 for v in range(-150, 150)]
y = [200*v*v for v in x]
plt.figure()
plt.plot(x, y, 'k')
plt.xlabel('x ($\\AA$)')
plt.ylabel('V(x) (cm$^{-1})$')
plt.show()
Use
plt.gca().xaxis.set_label_position("top")
plt.gca().xaxis.tick_top()
to move the x axis to the top of the plot. Use
plt.gca().yaxis.set_label_position("right")
plt.gca().yaxis.tick_right()
to move the y axis to the right-hand side. (The same commands work in reverse and in combination with each other.)
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 20})
x = [v/100.0 for v in range(-150, 150)]
y = [200*v*v for v in x]
plt.figure(figsize=(8,6))
plt.subplot(2,2,1)
plt.plot(x, y, 'k')
plt.xlabel('x ($\\AA$)')
plt.ylabel('V(x) (cm$^{-1})$')
plt.gca().xaxis.set_label_position("top")
plt.gca().xaxis.tick_top()
plt.subplot(2,2,2)
plt.plot(x, y, 'k')
plt.xlabel('x ($\\AA$)')
plt.ylabel('V(x) (cm$^{-1})$')
# Move x axis to top of plot
plt.gca().xaxis.set_label_position("top")
plt.gca().xaxis.tick_top()
# Move y-axis to the right of plot
plt.gca().yaxis.set_label_position("right")
plt.gca().yaxis.tick_right()
# We'll leave this one set to the defaults
plt.subplot(2,2,3)
plt.plot(x, y, 'k')
plt.xlabel('x ($\\AA$)')
plt.ylabel('V(x) (cm$^{-1})$')
plt.subplot(2,2,4)
plt.plot(x, y, 'k')
plt.xlabel('x ($\\AA$)')
plt.ylabel('V(x) (cm$^{-1})$')
# Move y-axis to the right of plot
plt.gca().yaxis.set_label_position("right")
plt.gca().yaxis.tick_right()
plt.tight_layout()
# The subplots_adjust() command controls horizontal
# ("width") space and vertical ("height") space
# between subplots. It's better to call this *after*
# plt.tight_layout() so that the tight_layout function
# first ensures all labels fall within the bounds
# of the figure. (Otherwise, some labels may be)
# out of the figure boundaries when exported using
# plt.savefig().)
plt.subplots_adjust(wspace=0.1)
plt.subplots_adjust(hspace=0.1)
plt.savefig('test.png')
plt.show()
Use plt.gca().tick_params(direction='in')
. You can also use out
to make ticks point out or inout
to make ticks that point both in and out.
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 20})
x = [v/100.0 for v in range(-150, 150)]
y = [200*v*v for v in x]
plt.figure(figsize=(4,4))
plt.plot(x, y, 'k')
plt.gca().tick_params(direction='in')
plt.show()
Use the plt.fill_between()
. The first two arguments are as in plt.plot()
. The y2=
flag should either be a vector or scalar giving the lower boundary for filling. The default value is y2=0
, which will fill down/up to the x axis.
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(-5, 5, 0.1)
y = x*np.exp(-x*x)
# We add a solid line also to make the edges clear
plt.plot(x, y, 'k-')
plt.fill_between(x, y, y2=0, color=[0.5, 0.5, 1.0])
plt.show()
Use the alpha
flag, a number between 0 (transparent) and 1 (opaque).
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(-5, 5, 0.1)
yfore = x*np.exp(-x*x)
yback = 0.3*np.exp(-x*x/4)
# We'll add an opaque Gaussian in the background
plt.plot(x, yback, 'k-')
plt.fill_between(x, yback, y2=0, color=[0.5, 0.5, 1.0], alpha=1.0)
# And a half-transparent Gaussian derivative in the foreground
plt.plot(x, yfore, 'k-')
plt.fill_between(x, yfore, y2=0, color=[1.0, 0.5, 0.5], alpha=0.5)
plt.show()
Use the zorder
flag. Lower values place objects farther toward the back.
import matplotlib.pyplot as plt
import numpy as np
x = np.arange(-5, 5, 0.1)
yfore = x*np.exp(-x*x)
yback = 0.3*np.exp(-x*x/4)
# The Gaussian is moved to the front when zorder=1
plt.plot(x, yback, 'k-', zorder=1)
plt.fill_between(x, yback, y2=0, color=[0.5, 0.5, 1.0], alpha=1.0, zorder=1)
# The Gaussian derivative is now moved to the background since zorder=0
plt.plot(x, yfore, 'k-', zorder=0)
plt.fill_between(x, yfore, y2=0, color=[1.0, 0.5, 0.5], alpha=0.5, zorder=0)
plt.show()
When comparing many curves (or spectra) over the same x-axis range, it's often useful to "stack" the spectra so that features can be easily compared without too much overlap. There's no one "right" way to do this, but a very simple method is to write a for()
loop that plots each curve individually, with an offset shift*n
added to each consecutive curve.
How do you choose the right shift
value to separate the curves? That's art as much as science, but a good start is to set shift
equal to 1/2 of the difference between maximum and minimum values in your set of curves.
# This will be our x-axis
xaxis = np.arange(0, 300, 1.0)
# This is the number of points on the x-axis
Npts = len(xaxis)
# This will be the number of curves we "stack"
Ncurves = 10
# This will hold the curves, i.e., the y-axis data
Curves = np.zeros((Npts,Ncurves))
for n in range(0, Ncurves):
# sigma sets the width of the curve
sigma = 20 + n
# mu will be the x-axis location of the peak
mu = np.mean(xaxis) + 20*np.sin(2*np.pi*n/Ncurves)
# Generate some noise to mimic experimental data
noise = 0.01*np.random.normal(size=(Npts,))
# Generate the curve
Curves[:,n] = np.exp(-(xaxis-mu)**2/(2*sigma**2)) + noise
# Now let's compare stacked and un-stacked representations
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plt.plot(xaxis, Curves, 'k')
plt.title("Unstacked")
plt.subplot(1,2,2)
plt.title('Stacked')
# The value "shift" controls the spacing between the curves.
# This parameter may be changed from figure to figure, but
# a useful starting point 1/2 the difference between max and
# min values of the data set.
shift = 0.5*np.max(np.max(Curves)) - np.min(np.min(Curves))
for n in range(0, Ncurves):
# We add an offset shift*n to each consecutive curve.
plt.plot(xaxis, Curves[:,n]+shift*n, 'k')
plt.show()
If you don't specify line colors explicitly, matplotlib will assign them according to a predefined "colormap" -- a sequence of colors that can defines the color of each object in a Matplotlib figure. The code below illustrates how to extract a list of such colors explicitly from the pre-defined colormap options. (See https://matplotlib.org/stable/gallery/color/colormap_reference.html). The key line of code is:
Colors = mpl.cm.copper(np.linspace(0, 0.8, Ncurves))
Colors
will now be an Ncurves-by-4 NumPy array; each row can be passed as a color to plt.plot()
. (The fourth value is the alpha
channel indicating transparency.) Thus
plt.plot(xaxis, Curves[:,n], color=Colors[n])
will generate a curve with the color defined by Colors[n]
.
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
# This will be our x-axis
xaxis = np.arange(0, 300, 1.0)
# This is the number of points on the x-axis
Npts = len(xaxis)
# This will be the number of curves we "stack"
Ncurves = 10
# This will hold the curves, i.e., the y-axis data
Curves = np.zeros((Npts,Ncurves))
for n in range(0, Ncurves):
# sigma sets the width of the curve
sigma = 20 + n
# mu will be the x-axis location of the peak
mu = np.mean(xaxis) + 20*np.sin(2*np.pi*n/Ncurves)
# Generate some noise to mimic experimental data
noise = 0.01*np.random.normal(size=(Npts,))
# Generate the curve
Curves[:,n] = np.exp(-(xaxis-mu)**2/(2*sigma**2)) + noise
# Get an array of colors for each curve.
# The keyword "copper" here can be replaced with any of the options
# listed on this page:
# https://matplotlib.org/stable/gallery/color/colormap_reference.html
# The two arguments set to "0" and "0.8" below can be chosen anywhere
# from 0 to 1. They control which part of the colormap sequence is
# reflected in your curves. In this case, we set the uppber bound to
# 0.8 instead of 1 because the top end of the "copper" colormap
# is difficult to see against a white background.
Colors = mpl.cm.copper(np.linspace(0, 0.8, Ncurves))
# Now let's compare stacked and un-stacked representations
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
for n in range(0, Ncurves):
plt.plot(xaxis, Curves[:,n], color=Colors[n])
plt.title("Unstacked")
plt.subplot(1,2,2)
plt.title('Stacked')
shift = 0.5*np.max(np.max(Curves)) - np.min(np.min(Curves))
for n in range(0, Ncurves):
# We add an offset shift*n to each consecutive curve.
plt.plot(xaxis, Curves[:,n]+shift*n, color=Colors[n])
plt.show()
Use the plt.gca().set_aspect()
command. plt.gca()
calls the current axis
object that controls axis labels, frames, legends, aspect ratios, and much more. The set_aspect()
command allows you to control the relative scaling of the x
and y
axes on a given plot.
plt.gca().set_aspect(1)
means that the display step-sizes for the x and y axes will be the same.
plt.gca().set_aspect(2)
means that the display step-size for the y
axis will be twice as large as the display step-size for the x
axis.
plt.gca().set_aspect(0.5)
means that the display step-size for the x
axis will be twice as large as the display step-size for the y
axis.
This is particularly useful if you want to prepare a square plot with the same data limits along both axes. In the example, below, we use the NumPy library and the plt.contourf
function to prepare a filled contour plot of a Gaussian function, with equal step sizes along the x
and y
axes.
import numpy as np
plt.rcParams.update({'font.size': 12})
plt.figure(figsize=(9,3))
#################################################################
## First, we'll create a perfectly square image of a Gaussian. ##
plt.subplot(1,3,1)
# These will be the sampling points along x and y axes
x = np.arange(-5, 5, 0.1)
y = np.arange(-5, 5, 0.1)
# np.meshgrid() creates a grid of data points with all
# possible combinations of x-values and y-values.
xx,yy = np.meshgrid(x, y)
# plt.contourf() prepares filled contour plots.
# np.exp() takes the exponential of the argument.
# In this cases, this produces a Gaussian function.
plt.contourf(xx, yy, np.exp(-(xx*xx + yy*yy)))
# Setting the aspect ratio equal
plt.gca().set_aspect('equal')
#################################################################
## Now let's see what happens if x and y have different limits ##
plt.subplot(1,3,2)
x = np.arange(-5, 5, 0.1)
y = np.arange(-2.5, 2.5, 0.1)
xx,yy = np.meshgrid(x, y)
plt.contourf(xx, yy, np.exp(-(xx*xx + yy*yy)))
plt.gca().set_aspect('equal')
# The displayed step size along x and y axes is still the same, so
# it's obvious that the y axis covers less range.
##################################################################
## We can create a square plot again by "stretching" the y axis ##
plt.subplot(1,3,3)
x = np.arange(-5, 5, 0.1)
y = np.arange(-2.5, 2.5, 0.1)
xx,yy = np.meshgrid(x, y)
plt.contourf(xx, yy, np.exp(-(xx*xx + yy*yy)))
# This means that the y-axis display stepsize will be twice as
# big as the x axis. The data limits remain unchanged.
plt.gca().set_aspect(2)
plt.show()
This may sound like a complicated question, but it's a fairly common issue. Many file formats include multiple sets of data in a single file, delimited by various begin/end markers. For example, the tsv
files produced by our NanoDrop UV/Vis instrument include multiple spectra in a single tab-delimited file. A typical block entry might read something like:
WT 0.1M sucrose
4/4/2023 5:34 PM
Wavelength (nm) 10mm Absorbance
//WLCalib: Shift 0.20333
//WLCalib: 261S -0.45
//WLCalib: 362S -0.7
//WLCalib: 484S -0.73
//WLCalib: 529S -0.44
//WLCalib: 823S -0.81
//QSpecEnd:
190 0.599
190.5 0.542
191 0.51
191.5 0.456
192 0.421
.
.
.
848 -0.011
848.5 -0.004
849 -0.002
849.5 -0.007
850 -0.02
(The dots indicate omitted data lines.) And there are many such blocks in a single tsv
file. How do you import such data into NumPy arrays?
The answer is much more complicated than when the file contains a single numerical array and we can just scoop up the data using np.loadtxt()
. For block-delimited files like this, we're going to have to read the file line-by-line and conver the data ourselves.
The code below gives an example of how to do this for a tsv
file that contains several experimental absorption spectra. The function load_between_markers()
steps through the file line-by-line and stores data between the beginning marker QSpecEnd
and the ending marker ''
, i.e., a blank line. Each block of data (between a single pair of begin/end markers) is stored as a list of text lines, and the final output from the function is a list of all such data blocks recorded. (It also returns a list of the interstitial "header" data between these data blocks, in case such information is needed for identifying the different data blocks.)
The second function block2mat()
converts these lines of text data into NumPy arrays, one array for each data block.
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 14})
def load_between_markers(infile, mark1, mark2):
# This holds data blocks
Blocks = []
# This holds blocks of interstitial text. (I.e.,
# anything that comes between the data blocks.)
Inters = []
# Open the file
with open(infile) as fd:
# current_inters will hold the current set
# of interstitial data. Initially empty.
current_inters = []
# record_data indicates whether we are in
# a section of the file between mark1 and mark2.
# If so, we should be recording the data.
record_data = False
# Step through the lines in the file, one by one.
for line in fd:
# If we encounter a mark2 or if the line is empty and mark2=='',
# we've reached the end of a block. We should STOP recording data.
if (len(mark2)>0 and mark2 in line) or (mark2=='' and len(line.strip())==0):
# If record_data==False, we don't have to do anything.
if record_data==True:
# Append the current block to the Blocks list.
Blocks.append(current_block)
# Stop recording data
record_data = False
# Reset the current block and inters lists to empty.
current_block = []
current_inters = []
# If we're currently recording, append data to the current block.
# Don't include empty lines.
if (record_data==True) and len(line.strip())>0:
# We strip newlines and white space from each data line
current_block.append(line.strip())
# Otherwise, append any the line to the interstitial data list.
# We ignore empty lines.
else:
if len(line.strip())>0:
current_inters.append(line.strip())
# If we find a mark1, start recording data.
if mark1 in line:
record_data = True
# Empty the current_block so that we start fresh.
current_block = []
# Append the mark1 line to interstitial data.
Inters.append(current_inters)
return Blocks, Inters
# Converts text blocks into NumPy arrays.
def block2mat(Blocks, delim):
# This will be a list of NumPy arrays.
Mats = []
for block in Blocks:
# data list for this block.
dlist = []
# Step through each line.
for line in block:
# Convert each line to a list of floats and append to dlist.
dlist.append([float(term) for term in line.split(delim)])
# Convert to a NumPy array
dmat = np.array(dlist)
# Append to list of NumPy arrays
Mats.append(dmat)
return Mats
blist, ilist = load_between_markers('data_block_example.tsv', 'QSpecEnd:', '')
mlist = block2mat(blist, '\t')
for n in range(0, len(mlist)):
plt.plot(mlist[n][:,0], mlist[n][:,1], label=ilist[n][0])
plt.legend(fontsize=12)
plt.show()