#
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib as mp
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import laUtilities as ut
import slideUtilities as sl
import demoUtilities as dm
from matplotlib import animation
from importlib import reload
from datetime import datetime
from IPython.display import Image, display_html, display, Math, HTML;
qr_setting = None
mp.rcParams['animation.html'] = 'jshtml';
Homework 5 due today at 8pm
Homework 6 is out, due next Friday 3/24 at 8pm
Office hours
Weekly reading and viewing assignments
#
fig = ut.three_d_figure((0,0), fig_desc = 'H = Span{a1, a2, a3}',
xmin = -10, xmax = 10, ymin = -10, ymax = 10, zmin = -10, zmax = 10, qr = qr_setting)
a1 = [-8.0, 8.0, 5.0]
a2 = [3.0, 2.0, -2.0]
a3 = 2.5 * np.array(a2)
fig.text(a1[0]+.5, a1[1]+.5, a1[2]+.5, r'$\bf a_1$', 'a_1', size=18)
fig.text(a3[0]+.5, a3[1]+.5, a3[2]+.5, r'$\bf a_3$', 'a_3', size=18)
fig.text(a2[0]+.5, a2[1]+.5, a2[2]+.5, r'$\bf a_2$', 'a_2', size=18)
fig.plotSpan(a1, a2,'Green')
fig.plotPoint(a1[0], a1[1], a1[2],'r')
fig.plotPoint(a3[0], a3[1], a3[2],'r')
fig.plotPoint(a2[0], a2[1], a2[2],'r')
fig.plotLine([[0, 0, 0], a3], 'r', '--')
fig.plotLine([[0, 0, 0], a1], 'r', '--')
# fig.plotPoint(a3[0], a3[1], a3[2],'r')
fig.text(0.1, 0.1, -3, r'$\bf 0$', '0', size=12)
fig.plotPoint(0, 0, 0, 'b')
# fig.set_title(r'$H$ = Span$\{{\bf a}_1, {\bf a}_2, {\bf a}_3\}$')
fig.text(9, -9, -7, r'H', 'H', size = 16)
img = fig.dont_save()
[This lecture is based on Prof. Crovella's CS 132 lecture notes.]
Invertible Matrix Theorem. Let by a square matrix.
Then the following statements are equivalent; that is, they are either all true or all false.
So far have been working with vector spaces like , and so on. But there are more vector spaces!
Today we'll define a subspace and show how the concept helps us understand the nature of matrices and their linear transformations.
Definition. A subspace is any set in that has three properties:
Another way of stating properties 2 and 3 is that is closed under addition and scalar multiplication.
The first thing to note is that there is a close connection between Span and Subspace:
Every Span is a Subspace.
To see this, let's take a specific example.
For example, take and in , and let = Span
Then is a subspace of .
#
fig = ut.three_d_figure((20, 1), fig_desc = 'Span{v1, v2}, a subspace.',
xmin = -10, xmax = 10, ymin = -10, ymax = 10, zmin = -10, zmax = 10, qr = qr_setting)
a1 = [-8.0, 8.0, 5.0]
a2 = [3.0, 2.0, -2.0]
a3 = 2.5 * np.array(a2)
fig.text(a1[0]+.5, a1[1]+.5, a1[2]+.5, r'$\bf v_1$', 'v_1', size=20)
fig.text(a3[0]+.5, a3[1]+.5, a3[2]+.5, r'$\bf v_2$', 'v_2', size=20)
fig.plotSpan(a1, a2,'Green')
fig.plotPoint(a1[0], a1[1], a1[2],'r')
fig.plotPoint(a3[0], a3[1], a3[2],'r')
# fig.plotPoint(a3[0], a3[1], a3[2],'r')
#fig.text(a1[0], a1[1], a1[2], r'$\bf v_1$', 'a_1', size=20)
fig.text(0.1, 0.1, -3, r'$\bf 0$', '0', size=12)
fig.plotPoint(0, 0, 0, 'b')
fig.set_title(r'Span{$v_1, v_2$}, a subspace.')
fig.text(9, -9, -7, r'$\bf H$', 'H', size = 20)
img = fig.save()
Let's check this:
The zero vector is in .
is in Span
The sum of any two vectors in is in .
If and then their sum
For any scalar , is in .
Because of this, we refer to Span as the subspace spanned by
OK, here is another subspace -- a line:
#
fig = ut.three_d_figure((20, 2), fig_desc = 'Span{v1}, another subspace.',
xmin = -10, xmax = 10, ymin = -10, ymax = 10, zmin = -10, zmax = 10, qr = qr_setting)
v = [ 4.0, 4.0, 2.0]
fig.text(v[0]+.5, v[1]+.5, v[2]+.5, r'$\bf v_1$', 'v1', size=20)
fig.text(-9, -7, -9, r'Span{$\bf v_1$}', 'Span{v1}', size=16)
fig.text(0.2, 0.2, -4, r'$\bf 0$', '0', size=12)
# plotting the span of v
# this is based on the reduced echelon matrix that expresses the system whose solution is v
fig.plotIntersection([1, 0, -v[0]/v[2], 0], [0, 1, -v[1]/v[2], 0], '-', 'Red')
fig.plotPoint(v[0], v[1], v[2], 'r')
fig.plotPoint(0, 0, 0, 'b')
# plotting the axes
fig.plotIntersection([0, 0, 1, 0], [0, 1, 0, 0])
fig.plotIntersection([0, 0, 1, 0], [1, 0, 0, 0])
fig.plotIntersection([0, 1, 0, 0], [1, 0, 0, 0])
fig.set_title(r'Span {$v_1$}, another subspace.')
img = fig.save()
Next question: is any line a subspace?
What about a line that is not through the origin?
How about this line:
#
ax = ut.plotSetup()
ut.centerAxes(ax)
ax.plot([-6,6],[4.33,0.33],'b-')
ax.text(-4,3,'$L$',size=20);
In fact, a line not through the origin fails all three requirements for a subspace:
does not contain the zero vector.
is not closed under addition.
#
ax = ut.plotSetup()
ut.centerAxes(ax)
ax.arrow(0,0,1,2,head_width=0.2, head_length=0.2,length_includes_head = True)
ax.arrow(0,0,4,1,head_width=0.2, head_length=0.2,length_includes_head = True)
ax.plot([4,5],[1,3],'--')
ax.plot([1,5],[2,3],'--')
ax.plot([-6,6],[4.33,0.33],'b-')
ax.text(5.25,3.25,r'${\bf u}$+${\bf v}$',size=16)
ax.text(1.5,2.5,r'${\bf u}$',size=16)
ax.text(4.75,1,r'${\bf v}$',size=16)
ax.text(-4,3,'$L$',size=20)
ut.plotPoint(ax,5,3)
ax.plot(0,0,'');
#
ax = ut.plotSetup()
ut.centerAxes(ax)
line = np.array([[-6,6],[4.33,0.33]])
w = .3 * line[:,0] + .7 * line[:,1]
ut.plotPoint(ax,w[0],w[1],'k')
ax.arrow(0,0,w[0],w[1],head_width=0.2, head_length=0.2,length_includes_head = True)
w_ext = np.array([w, 1.75*w]).T
ax.plot(w_ext[0], w_ext[1], '--')
ax.plot(line[0], line[1],'b-')
ax.text(w[0],w[1]-0.5,r'${\bf w}$',size=16)
ax.text(w_ext[0,1], w_ext[1,1]-0.5, r'$\alpha{\bf w}$', size=16)
ut.plotPoint(ax,w_ext[0,1],w_ext[1,1],'r')
ax.text(-4,3,'$L$',size=20);
On the other hand, any line, plane, or hyperplane through the origin is a subspace.
Make sure you can see why (or prove it to yourself).
Now let's start to use the subspace concept to characterize matrices.
We are thinking of these matrices as linear operators.
Every matrix has associated with it two subspaces:
Definition. The column space of a matrix is the set of all linear combinations of the columns of .
If = , with columns in then is the same as Span, and hence it is a subspace.
The column space of an matrix is a subspace of
In particular, note that equals only when the columns of span Otherwise, is only part of
How to interpret
When a system of linear equations is written in the form the column space of is the set of all for which the system has a solution.
Equivalently, when we consider the linear operator that is implemented by the matrix , the column space of is the range of
#
fig = ut.three_d_figure((20, 3), fig_desc = 'Column Space of A = [a1, a2, a3].',
xmin = -10, xmax = 10, ymin = -10, ymax = 10, zmin = -10, zmax = 10, qr = qr_setting)
a1 = [-8.0, 8.0, 5.0]
a2 = [3.0, 2.0, -2.0]
a3 = 2.5 * np.array(a2)
fig.text(a1[0]+.5, a1[1]+.5, a1[2]+.5, r'$\bf a_1$', 'a_1', size=18)
fig.text(a3[0]+.5, a3[1]+.5, a3[2]+.5, r'$\bf a_3$', 'a_3', size=18)
fig.text(a2[0]+.5, a2[1]+.5, a2[2]+.5, r'$\bf a_2$', 'a_2', size=18)
fig.plotSpan(a1, a2,'Green')
fig.plotPoint(a1[0], a1[1], a1[2],'r')
fig.plotPoint(a3[0], a3[1], a3[2],'r')
fig.plotPoint(a2[0], a2[1], a2[2],'r')
# fig.plotPoint(a3[0], a3[1], a3[2],'r')
fig.text(0.1, 0.1, -3, r'$\bf 0$', '0', size=12)
fig.plotPoint(0, 0, 0, 'b')
fig.set_title(r'Column Space of $A = [{\bf a}_1, {\bf a}_2, {\bf a}_3]$')
fig.text(9, -9, -7, r'Col $A$', 'H', size = 16)
img = fig.save()
Definition. The null space of a matrix is the set of all solutions of the homogeneous equation
In other words: the null space of is the set of all vectors that are mapped to the zero vector by . It is sometimes called the kernel of .
When has columns, a solution of is a vector in So the null space of is a subset of
In fact, is a subspace of
Theorem. The null space of an matrix is a subspace of
Equivalently, the set of all solutions of a system of homogeneous linear equations in unknowns is a subspace of
Proof.
The sum of two vectors in is in
Take two vectors and that are in By definition and
Then is in because
Any scalar multiple of a vector in is in
Take a vector that is in Then
Testing whether a vector is in is easy: simply compute and see if the result is zero.
Remark. Note that the two subspaces and live in different "universes."
For an matrix,
Let's say you have a subspace.
For example, perhaps it is Span.
We would like to find the simplest way of describing this space.
For example, consider this subspace with:
#
fig = ut.three_d_figure((20, 4), fig_desc = 'H = Span{a1, a2, a3}',
xmin = -10, xmax = 10, ymin = -10, ymax = 10, zmin = -10, zmax = 10, qr = qr_setting)
a1 = [-8.0, 8.0, 5.0]
a2 = [3.0, 2.0, -2.0]
a3 = 2.5 * np.array(a2)
fig.text(a1[0]+.5, a1[1]+.5, a1[2]+.5, r'$\bf a_1$', 'a_1', size=18)
fig.text(a3[0]+.5, a3[1]+.5, a3[2]+.5, r'$\bf a_3$', 'a_3', size=18)
fig.text(a2[0]+.5, a2[1]+.5, a2[2]+.5, r'$\bf a_2$', 'a_2', size=18)
fig.plotSpan(a1, a2,'Green')
fig.plotPoint(a1[0], a1[1], a1[2],'r')
fig.plotPoint(a3[0], a3[1], a3[2],'r')
fig.plotPoint(a2[0], a2[1], a2[2],'r')
# fig.plotPoint(a3[0], a3[1], a3[2],'r')
fig.text(0.1, 0.1, -3, r'$\bf 0$', '0', size=12)
fig.plotPoint(0, 0, 0, 'b')
fig.set_title(r'$H$ = Span$\{{\bf a}_1, {\bf a}_2, {\bf a}_3\}$')
fig.text(9, -9, -7, r'H', 'H', size = 16)
img = fig.save()
Note that is a scalar multiple of . Thus:
Span
is the same subspace as
Span.
Making this idea more general:
So in our example above, we would prefer to say that the subspace is:
= Span
rather than
= Span.
In other words, the more "concisely" we can describe the subspace, the better.
Now, given some subspace, how small a spanning set can we find?
Here is the key idea we will use to answer that question:
It can be shown that the smallest possible spanning set must be linearly independent.
We will call such minimally-small sets of vectors a basis for the space.
Definition. A basis for a subspace of is a linearly independent set in that spans
#
fig = ut.three_d_figure((20, 5), fig_desc = 'Bases of H are {a1, a2} or {a1, a3}.',
xmin = -10, xmax = 10, ymin = -10, ymax = 10, zmin = -10, zmax = 10, qr = qr_setting)
a1 = [-8.0, 8.0, 5.0]
a2 = [3.0, 2.0, -2.0]
a3 = 2.5 * np.array(a2)
fig.text(a1[0]+.5, a1[1]+.5, a1[2]+.5, r'$\bf a_1$', 'a_1', size=18)
fig.text(a3[0]+.5, a3[1]+.5, a3[2]+.5, r'$\bf a_3$', 'a_3', size=18)
fig.text(a2[0]+.5, a2[1]+.5, a2[2]+.5, r'$\bf a_2$', 'a_2', size=18)
fig.plotSpan(a1, a2,'Green')
fig.plotPoint(a1[0], a1[1], a1[2],'r')
fig.plotPoint(a3[0], a3[1], a3[2],'r')
fig.plotPoint(a2[0], a2[1], a2[2],'r')
fig.plotLine([[0, 0, 0], a3], 'r', '--')
fig.plotLine([[0, 0, 0], a1], 'r', '--')
# fig.plotPoint(a3[0], a3[1], a3[2],'r')
fig.text(0.1, 0.1, -3, r'$\bf 0$', '0', size=12)
fig.plotPoint(0, 0, 0, 'b')
fig.set_title(r'Bases of $H$ are $\{{\bf a}_1, {\bf a}_2\}$ or $\{{\bf a}_1, {\bf a}_3\}$')
fig.text(9, -9, -7, r'H', 'H', size = 16)
img = fig.save()
So in the example above, a basis for could be:
or
However,
is not a basis for .
That is because are not linearly independent.
(Conceptually, there are "too many vectors" in this set).
And furthermore,
is not a basis for .
That is because does not span .
(Conceptually, there are "not enough vectors" in this set).
Example. The columns of any invertible matrix form a basis for
This is because, by the Invertible Matrix Theorem, they are linearly independent, and they span
So, for example, we could use the identity matrix, It columns are
The set is called the standard basis for
(We have already used the term "standard basis vector" to refer to any of the vectors. Now we know where the term comes from!)
#
fig = ut.three_d_figure((20, 6), fig_desc = 'The Standard Basis for R3',
xmin = -1.2, xmax = 1.2, ymin = -1.2, ymax = 1.2, zmin = -1.2, zmax = 1.2, qr = qr_setting)
e1 = [1, 0, 0]
e2 = [0, 1, 0]
e3 = [0, 0, 1]
origin = [0, 0, 0]
fig.ax.quiver(e1, e2, e3, e1, e2, e3, length=1.0, color='Red')
fig.plotLine([[0, 0, 0], [0, 1, 0]], color='Red')
fig.plotLine([[0, 0, 0], [0, 0, 1]], color='Red')
fig.plotLine([[0, 0, 0], [1, 0, 0]], color='Red')
fig.text(1, 0, -0.5, r'${\bf e_1}$', 'e1', size=16)
fig.text(0.2, 1, 0, r'${\bf e_2}$', 'e2', size=16)
fig.text(0, 0.2, 1, r'${\bf e_3}$', 'e3', size=16)
# plotting the axes
fig.plotIntersection([0, 0, 1, 0], [0, 1, 0, 0])
fig.plotIntersection([0, 0, 1, 0], [1, 0, 0, 0])
fig.plotIntersection([0, 1, 0, 0], [1, 0, 0, 0])
fig.set_title(r'The Standard Basis for $\mathbb{R}^3$',size=14);
fig.save();
Being able to express a subspace in terms of a basis is very powerful.
Hence, we will often want to be able to describe subspaces like or using their bases.
We'll start with finding a basis for the null space of a matrix.
Example. Find a basis for the null space of the matrix
Solution. We would like to describe the set of all solutions of
We start by writing the solution of in parametric form:
So and are basic, and and are free.
So the general solution is:
Now, what we want to do is write the solution set as a weighted combination of vectors.
This is a neat trick -- we are creating a vector equation.
The key idea is that the free variables will become the weights.
Now what we have is an expression that describes the entire solution set of So is the set of all linear combinations of and . That is, is the subspace spanned by
Furthermore, this construction automatically makes and linearly independent. Since each weight appears by itself in one position, the only way for the whole weighted sum to be zero is if every weight is zero -- which is the definition of linear independence.
So is a basis for
Conclusion: by finding a parametric description of the solution of the equation we can construct a basis for the nullspace of .
It can be shown that if a subspace has a basis of vectors, then every basis of must consist of exactly vectors.
That is, for a given , the number is a special number.
Definition. The dimension of a nonzero subspace denoted by is the number of vectors in any basis for
The dimension of the zero subspace is defined to be zero.
So now we can say with precision things we've previously said informally.
For example, a plane through is two-dimensional, and a line through is one-dimensional.
Remark: What is the dimension of a line not through the origin? It is undefined, because a line not through the the origin is not a subspace, so cannot have a basis, so does not have a dimension.
Remember that associated to any matrix are two special subspaces: the null space and column space of a matrix . Let's determine the dimension of each subspace.
Earlier we showed the following: one can construct a basis for using the columns corresponding to free variables in the solution of .
We looked at the following example:
We constructed an explicit description of the null space of this matrix, as where , , and are linearly independent.
Each basis vector corresponds to a free variable in the equation Since this basis contains 3 vectors, the dimension of 's null space (ie, ) is 3.
In general, to find the dimension of simply identify and count the number of free variables in
To find a basis for the column space, we have an easier starting point.
We know that the column space is the span of the matrix columns.
So, we can choose matrix columns to make up the basis.
The question is: which columns should we choose?
Warmup.
We start with a warmup example.
Suppose we have a matrix that happens to be in reduced echelon form:
Denote the columns of by .
Note that and
So any combination of is actually just a combination of and
So spans .
Also, and are linearly independent, because they are columns from an identity matrix.
So: the pivot columns of form a basis for
Note that this means: there is no combination of columns 1, 2, and 5 that yields the zero vector. (Other than the trivial combination of course.)
So, for matrices in reduced row echelon form, we have a simple rule for the basis of the column space:
Choose the columns that hold the pivots.
The general case.
Now I'll show that the pivot columns of form a basis for for any .
Consider the case where for some nonzero
This says that there is a linear dependence relation between some of the columns of .
If any of the entries in are zero, then those columns do not participate in the linear dependence relation.
When we row-reduce to its reduced echelon form , the columns are changed, but the equations and have the same solution set.
So this means that the columns of and the columns of have exactly the same dependence relationships as the columns of . Specifically:
If some column of can be written as a combination of other columns of , then the same is true of the corresponding columns of .
If no combination of certain columns of yields the zero vector, then no combination of corresponding columns of yields the zero vector.
In other words:
If some set of columns of spans the column space of , then the same columns of span the column space of .
If some set of columns of are linearly independent, then the same columns of are linearly independent.
So, if some columns of are a basis for , then the corresponding columns of are a basis for .
Example. Consider the matrix :
To find its basis, we simply need to look at the basis for its reduced row echelon form.
It turns out that is row equivalent to the matrix that we considered above. For matrix , recall that:
Therefore we can immediately conclude that a basis for is 's columns 1, 2, and 5.
So a basis for is:
Theorem. The pivot columns of a matrix form a basis for the column space of .
Be careful here -- note that
Definition. The rank of a matrix, denoted by is the dimension of the column space of .
Since the pivot columns of form a basis for the rank of is just the number of pivot columns in . This is also equal to the number of basic variables in (hopefully now the term makes sense!)
Example. Determine the rank of the matrix
Solution Reduce to an echelon form:
The matrix has 3 pivot columns, so
Consider a matrix We know that:
= the number of free variables in , which is the number of non-pivot columns in .
= the number of basis variables in , which is the number of pivot columns.
So we can now make this important connection:
This relationship involving dimension is true even though the two subspaces themselves live in different spaces: and .
This leads to the following theorem:
Theorem. If a matrix has columns, then .
This is a terrifically important fact! Here is an intuitive way to understand it. Let's think about a matrix and the associated linear transformation .
If the matrix has columns, then 's column space could have dimension as high as . In other words, 's range could have dimension as high as .
However, if "throws away" a nullspace of dimension , then that reduces the column space of to . Meaning, the dimension of 's range is reduced to .
The above arguments show that when has columns, then the "larger" that the column space is, the "smaller" that the null space is.
(Where "larger" means "has more dimensions.")
Let's consider what this means when is square . On one "extreme," the column space of could have maximum dimension -- i.e.,
Recall that the IMT said that an matrix is invertible if and only if its columns are linearly independent, and if and only if its columns span
Hence we now can see that an matrix is invertible if and only if the columns of form a basis for
This leads to the following facts, which further extend the IMT:
Theorem. Let be a square matrix. Then the following statements are each equivalent to the statement that is an invertible matrix: