#
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib as mp
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import laUtilities as ut
import slideUtilities as sl
import demoUtilities as dm
from matplotlib import animation
from importlib import reload
from datetime import datetime
from IPython.display import Image, display_html, display, Math, HTML;
qr_setting = None
mp.rcParams['animation.html'] = 'jshtml';
Homework 4 is out, due 3/3
Upcoming office hours
Aggarwal Section 2.4-2.5
#
B = np.array(
[[ 1, 0],
[ 0,-1]])
A = np.array(
[[-1, 0],
[ 0, 1]])
#print("A ="); print(A)
#print("B ="); print(B)
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(B @ note,'g')
dm.plotShape(A @ (B @ note),'r')
[This lecture is based on Prof. Crovella's CS 132 lecture notes.]
Last time, we discussed a geometric interpretation of matrix-vector multiplication .
We think of as a linear transformation that maps the vector in the domain into a new vector in the co-domain. The resulting vector is called the image of under the transformation .
#
display(Image("images/05-3dtransform.jpg", width=1000))
We saw some example transformations like the horizontal shear matrix .
To understand how matrices transform space, it suffices to understand how the matrix transforms the standard basis vectors and , along with the unit square containing them.
#
ax = dm.plotSetup()
square = np.array([[0, 1, 1, 0],
[1, 1, 0, 0]])
dm.plotSquare(square)
A = np.array([[1.0, 1.5],[0.0,1.0]])
ssquare = np.zeros(np.shape(square))
for i in range(4):
ssquare[:,i] = dm.AxVS(A,square[:,i])
dm.plotSquare(ssquare,'r')
ax.plot([0],[1],'ro',markersize=8)
ax.arrow(0, 0, 1, 0, head_width=0.2, head_length=0.2, length_includes_head = True)
ax.arrow(0, 0, 0, 1, head_width=0.2, head_length=0.2, length_includes_head = True)
ax.text(-0.5,0.35,'$e_2$',size=20)
ax.plot([1],[0],'ro',markersize=8)
ax.text(0.35,-0.35,'$e_1$',size=20);
Then, we can understand how the matrix applies to all of using the principle of linearity.
Definition. A transformation is linear if:
for all in the domain of and for all scalars .
We proved last time that the ideas of matrix multiplication and linear transformation of vector spaces are equivalent.
Then, we found the matrix corresponding to several linear transformations of 2-dimensional space. For example, the act of rotating counterclockwise by degrees corresponds to the matrix .
#
note = np.array(
[[193,47],
[140,204],
[123,193],
[99,189],
[74,196],
[58,213],
[49,237],
[52,261],
[65,279],
[86,292],
[113,295],
[135,282],
[152,258],
[201,95],
[212,127],
[218,150],
[213,168],
[201,185],
[192,200],
[203,214],
[219,205],
[233,191],
[242,170],
[244,149],
[242,131],
[233,111]])
note = note.T/150.0
dm.plotSetup()
note = dm.mnote()
dm.plotShape(note)
angle = 90
theta = (angle/180) * np.pi
A = np.array(
[[np.cos(theta), -np.sin(theta)],
[np.sin(theta), np.cos(theta)]])
rnote = A @ note
dm.plotShape(rnote,'r')
Let's examine several linear transformations of to , both geometrically and computationally.
Let be the transformation that rotates each point in about the origin through an angle , with counterclockwise rotation for a positive angle.
Let's find the standard matrix of this transformation.
Question. The columns of are and Where does map the standard basis vectors?
Referring to the diagram below, we can see that rotates into and rotates into
#
import matplotlib.patches as patches
ax = dm.plotSetup(-1.2, 1.2, -0.5, 1.2)
# red circle portion
arc = patches.Arc([0., 0.], 2., 2., 0., 340., 200.,
linewidth = 2, color = 'r',
linestyle = '-.')
ax.add_patch(arc)
#
# labels
ax.text(1.1, 0.1, r'$\mathbf{e}_1 = (1, 0)$', size = 20)
ax.text(0.1, 1.1, r'$\mathbf{e}_2 = (0, 1)$', size = 20)
#
# angle of rotation and rotated points
theta = np.pi / 6
e1t = [np.cos(theta), np.sin(theta)]
e2t = [-np.sin(theta), np.cos(theta)]
#
# theta labels
ax.text(0.5, 0.08, r'$\theta$', size = 20)
ax.text(-0.25, 0.5, r'$\theta$', size = 20)
#
# arrows from origin
ax.arrow(0, 0, e1t[0], e1t[1],
length_includes_head = True,
width = .02)
ax.arrow(0, 0, e2t[0], e2t[1],
length_includes_head = True,
width = .02)
#
# new point labels
ax.text(e1t[0]+.05, e1t[1]+.05, r'$[\cos\; \theta, \sin \;\theta]$', size = 20)
ax.text(e2t[0]-1.1, e2t[1]+.05, r'$[-\sin\; \theta, \cos \;\theta]$', size = 20)
#
# curved arrows showing rotation
ax.annotate("",
xytext=(0.7, 0), xycoords='data',
xy=(0.7*e1t[0], 0.7*e1t[1]), textcoords='data',
size=10, va="center", ha="center",
arrowprops=dict(arrowstyle="simple",
connectionstyle="arc3,rad=0.3"),
)
ax.annotate("",
xytext=(0, 0.7), xycoords='data',
xy=(0.7*e2t[0], 0.7*e2t[1]), textcoords='data',
size=10, va="center", ha="center",
arrowprops=dict(arrowstyle="simple",
connectionstyle="arc3,rad=0.3"),
)
#
# new points
plt.plot([e1t[0], e2t[0]], [e1t[1], e2t[1]], 'bo', markersize = 10)
plt.plot([0, 1], [1, 0], 'go', markersize = 10);
So by the Theorem above,
In particular, a 90 degree counterclockwise rotation corresponds to the matrix
First, let's recall the linear transformation
With , this is a dilation. It moves every vector further from the origin.
Let's say the dilation is by a factor of .
To construct the matrix that implements this transformation, we ask: where do and go?
#
ax = dm.plotSetup()
ax.plot([0],[1],'ro',markersize=8)
ax.text(0.25,1,'(0,1)',size=20)
ax.plot([1],[0],'ro',markersize=8)
ax.text(1.25,0.25,'(1,0)',size=20);
Under the action of , goes to and goes to .
So the matrix must be . Let's see this visually.
#
square = np.array(
[[0,1,1,0],
[1,1,0,0]])
A = np.array(
[[2.5, 0],
[0, 2.5]])
# display(Latex(rf"$A = {ltx_array_fmt(A, '{:1.1f}')}$"))
dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
#
dm.plotSetup(-7,7,-7, 7)
dm.plotShape(note)
dm.plotShape(A @ note,'r')
OK, now let's reflect through the axis. Where do and go?
#
A = np.array(
[[1, 0],
[0, -1]])
print("A = "); print(A)
#display(Latex(rf"$A = {ltx_array_fmt(A, '{:d}')}$"))
dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
#plt.title(r'Reflection through the $x_1$ axis', size = 20);
A = [[ 1 0] [ 0 -1]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
What about reflection through the axis?
#
A = np.array(
[[-1,0],
[0, 1]])
print("A = "); print(A)
dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
A = [[-1 0] [ 0 1]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
What about reflection through the line ?
#
A = np.array(
[[0,1],
[1,0]])
print("A = "); print(A)
dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
plt.plot([-2,2],[-2,2],'b--');
A = [[0 1] [1 0]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
What about reflection through the line ?
#
A = np.array(
[[ 0,-1],
[-1, 0]])
print("A = "); print(A)
dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
plt.plot([-2,2],[2,-2],'b--');
A = [[ 0 -1] [-1 0]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
What about 180-degree rotation through the origin? Note that this is different than reflection about the line (how?).
We could use our rotation formula to compute this matrix, but let's instead just think about how this transformation affects and .
#
A = np.array(
[[-1, 0],
[ 0,-1]])
print("A = "); print(A)
ax = dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r');
A = [[-1 0] [ 0 -1]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
We can expand or shrink one dimension independent of the other. Suppose we want to contract the dimension by a ratio of , but leave the axis unchanged. What matrix performs this operation?
#
A = np.array(
[[0.45, 0],
[0, 1]])
print("A = "); print(A)
ax = dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
ax.arrow(1.0,1.5,-1.0,0,head_width=0.15, head_length=0.1, length_includes_head=True);
A = [[0.45 0. ] [0. 1. ]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
We can similarly construct a horizontal expansion by a ratio of .
#
A = np.array(
[[2.5,0],
[0, 1]])
print("A = "); print(A)
dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
A = [[2.5 0. ] [0. 1. ]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
Matrices of the form or are called shears.
We have already seen a horizontal shear. Here is a vertical shear.
#
A = np.array(
[[ 1, 0],
[-1.5, 1]])
print("A = "); print(A)
dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
A = [[ 1. 0. ] [-1.5 1. ]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
Now let's look at a particular kind of transformation called a projection.
Imagine we took any given point and 'dropped' it onto the -axis.
#
A = np.array(
[[1,0],
[0,0]])
print("A = "); print(A)
ax = dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
ax.arrow(1.0,1.0,0,-0.9,head_width=0.15, head_length=0.1, length_includes_head=True);
ax.arrow(0.0,1.0,0,-0.9,head_width=0.15, head_length=0.1, length_includes_head=True);
A = [[1 0] [0 0]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
Question. What happens to the shape of the point set? What about its area?
Here is a projection in the axis.
#
A = np.array(
[[0,0],
[0,1]])
print("A = "); print(A)
ax = dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square)
ax.arrow(1.0,1.0,-0.9,0,head_width=0.15, head_length=0.1, length_includes_head=True);
ax.arrow(1.0,0.0,-0.9,0,head_width=0.15, head_length=0.1, length_includes_head=True);
A = [[0 0] [0 1]]
#
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
Notice that some of these transformations map multiple inputs to the same output, and some are incapable of generating certain outputs.
For example, the projections above can send multiple different points to the same point.
We need some terminology to understand these properties of linear transformations.
Definition. A mapping is said to be onto if each in is the image of at least one in .
In other words, is onto if every element of its codomain is in its range.
Another (important) way of thinking about this is that is onto if there is a solution of
for all possible
This is asking an existence question about a solution of the equation for all
Question. For the transformation pictured below, is onto?
#
display(Image("images/05-image.jpg", width=1000))
Here, we see that maps points in to a plane lying within . That is, the range of is a strict subset of the codomain of .
So is not onto .
Consider the following two transformations (a reflection and a projection). In each case, the red points are the images of the blue points. Is each transformation onto ?
#
A = np.array(
[[ 0,-1],
[-1, 0]])
print("A = "); print(A)
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
A = [[ 0 -1] [-1 0]]
#
A = np.array(
[[1,0],
[0,0]])
print("A = "); print(A)
dm.plotSetup()
dm.plotShape(note)
dm.plotShape(A @ note,'r')
A = [[1 0] [0 0]]
Definition. A mapping is said to be one-to-one if each in is the image of at most one in .
If is one-to-one, then for each the equation has either a unique solution, or none at all. This is asking an existence question about a solution of the equation for all .
#
display(Image("images/06-onto.jpeg", width=1000))
Let's examine the relationship between these ideas and some previous definitions.
Question. If the equation is consistent (i.e., has at least one solution) for all , is onto? Is it one-to-one?
Question. If is consistent and has a unique solution for all , is onto? Is it one-to-one?
Question. If is not consistent for all , is onto? one-to-one?
Questions. (Think about these on your own.)
If is onto, is consistent for all ? is the solution unique for all ?
If is one-to-one, is consistent for all ? is the solution unique for all ?
Notice that in some of the transformations above, the "size" of a shape grows or shrinks.
Let's look at how area (or volume) of a shape is affected by a linear transformation.
Thanks to linearity, it suffices to ask what happens to the unit square (or cube, or hypercube). All other areas or volumes will scale similarly.
#
A = np.array([[2, 1],[0.75, 1.5]])
print("A = "); print(A)
ax = dm.plotSetup(-1, 4, -1, 4)
dm.plotSquare(square, 'r')
dm.plotSquare(A @ square);
A = [[2. 1. ] [0.75 1.5 ]]
Let's denote the matrix of a general 2-dimensional linear transformation as:
Then, here is what happens to the unit square:
#
v1 = np.array([2, 0.75])
v2 = np.array([1, 1.5])
A = np.column_stack([v1, v2])
ax = dm.plotSetup(-1, 4, -1, 4)
dm.plotSquare(square, 'r')
dm.plotSquare(A @ square)
plt.text(1.1, 0.1, '(1, 0)', size = 12)
plt.text(1.05, 1.05, '(1, 1)', size = 12)
plt.text(-0.6, 1.2, '(0, 1)', size = 12)
plt.text(v1[0]+0.2, v1[1]+0.1, '(a, c)', size = 12)
plt.text((v1+v2)[0]+0.1, (v1+v2)[1]+0.1, '(a+b, c+d)', size = 12)
plt.text(v2[0]-0.5, v2[1]+0.1, '(b, d)', size = 12);
Now, let's determine the area of the blue diamond in terms of , and .
To do that, we'll use this diagram:
#
a = 2
b = 1
c = 0.75
d = 1.5
v1 = np.array([a, c])
v2 = np.array([b, d])
A = np.column_stack([v1, v2])
ax = dm.plotSetup(-1, 4, -1, 4)
# red triangles
dm.plotShape(np.array([[0, 0], [0, d], [b, d]]).T, 'r')
plt.text(0.2, 1, r'$\frac{1}{2}bd$', size = 12)
dm.plotShape(np.array([[a, c], [a+b, c+d], [a+b, c]]).T, 'r')
plt.text(2.5, 1, r'$\frac{1}{2}bd$', size = 12)
# gray triangles
dm.plotShape(np.array([[b, d], [b, c+d], [a+b, c+d]]).T, 'k')
plt.text(1.2, 1.9, r'$\frac{1}{2}ac$', size = 12)
dm.plotShape(np.array([[0, 0], [a, c], [a, 0]]).T, 'k')
plt.text(1.2, 0.15, r'$\frac{1}{2}ac$', size = 12)
# green squares
dm.plotShape(np.array([[a, 0], [a, c], [a+b, c], [a+b, 0]]).T, 'g')
plt.text(0.2, 1.9, r'$bc$', size = 12)
dm.plotShape(np.array([[0, d], [0, c+d], [b, c+d], [b, d]]).T, 'g')
plt.text(2.5, 0.15, r'$bc$', size = 12)
#
dm.plotSquare(A @ square)
plt.text(v1[0]-0.5, v1[1]+0.05, '(a, c)', size = 12)
plt.text((v1+v2)[0]+0.1, (v1+v2)[1]+0.1, '(a+b, c+d)', size = 12)
plt.text(v2[0]+0.1, v2[1]-0.15, '(b, d)', size = 12);
Each of the triangles and rectangles has an area we can determine in terms of and .
So the area of the blue diamond is:
So we conclude that when we use a linear transformation
the area of a unit square (or any shape) is scaled by a factor of .
This quantity is a fundamental property of the matrix .
So, we give it a name: it is the determinant of .
We denote it as .
So, for a matrix ,
The determinant can be defined for any square matrix. For a square matrix larger than , the determinant tells us how the volume of a unit (hyper)cube is scaled when it is linearly transformed by .
We will see how to compute these determinants in a later lecture.
For non-square matrices, the determinant is not defined because we aren't scaling the area of a shape; we are moving it to a different space entirely.
#
display(Image("images/05-3dtransform.jpg", width=1000))
A special case to consider is when the determinant of a matrix is zero.
Question. When does it happen that ?
Consider when is the matrix of a projection.
The unit square has been collapsed onto the -axis, resulting in a shape with area of zero.
This is confirmed by the determinant, which is
#
A = np.array(
[[1,0],
[0,0]])
print("A = "); print(A)
ax = dm.plotSetup()
dm.plotSquare(square)
dm.plotSquare(A @ square,'r')
ax.arrow(1.0,1.0,0,-0.9,head_width=0.15, head_length=0.1, length_includes_head=True);
ax.arrow(0.0,1.0,0,-0.9,head_width=0.15, head_length=0.1, length_includes_head=True);
A = [[1 0] [0 0]]
Next, we investigate the idea of the "inverse" of a matrix. It is analogous to inverses of real numbers, where each non-zero number has a multiplicative inverse such that .
This inverse does not exist for all matrices.
#
display(Image("images/06-inverse.png", width=1000))
Definition. A matrix is called invertible if there exists a matrix such that
In that case, is called the inverse of .
Clearly, must also be square and the same size as .
The inverse of is denoted
A matrix that is not invertible is called a singular matrix. A matrix that is invertible is called nonsingular.
Example. As in the previous example, let and . Show that is the inverse of .
We know that corresponds to performing a 270 degree rotation followed by a 90 degree rotation . The result is a 360 degree rotation -- that is, every vector ends up back where it started, which is the identity transformation. Hence, .
Similarly, corresponds to performing a 90 degree rotation followed by a 270 degree rotation, so once again the result is the identity matrix.
Here are some nice properties of the matrix inversion.
Theorem.
(I will let you verify these properties on your own.)
Example.
If and , then:
and:
so we conclude that
How do we find ? Let's first look at the special case of matrices and then consider the general case of any square matrix.
Theorem. Let =
If , then
If , then
Notice that this theorem tells us, for matrices, exactly which ones are invertible... namely, those that have nonzero determinant.
Example. Given a matrix , if the columns of are linearly dependent, is invertible?
Solution. If the columns of are linearly dependent, then at least one of the columns is a multiple of the other.
Let the multiplier be
Then we can express as:
The determinant of is
So a matrix with linearly dependent columns is not invertible.
Now let's look at a general method for computing the inverse of . Recall our definition of matrix multiplication: is the matrix formed by multiplying times each column of .
Let's look at the equation
Let's call the columns of =
We know what the columns of are:
So:
Notice that we can break this up into separate problems:
(This is a common trick ... make sure you understand why it works!)
So here is a general way to compute the inverse of :
If any of the systems are inconsistent or has an infinite solution set, then does not exist.
This general strategy leads to an algorithm for inverting any matrix. We can use Gaussian Elimination on the larger augmented matrix . The result in reduced row echelon form will be .
But solving all of these linear systems is complicated.
Any time you need to invert a matrix larger than you may need to use a calculator or computer.
To invert a matrix in Python/numpy,
use the function np.linalg.inv().
For example:
import numpy as np
A = np.array(
[[ 2.0, 5.0],
[-3.0,-7.0]])
print('A =\n',A)
B = np.linalg.inv(A)
print('B = \n',B)
A = [[ 2. 5.] [-3. -7.]] B = [[-7. -5.] [ 3. 2.]]
What do you think happens if you call np.linalg.inv()
on a matrix that is not invertible?
A = np.array([[2.,4.],[2.,4.]])
np.linalg.inv(A) # try uncommenting this line
--------------------------------------------------------------------------- LinAlgError Traceback (most recent call last) <ipython-input-10-55215ad851ba> in <module> 1 A = np.array([[2.,4.],[2.,4.]]) ----> 2 np.linalg.inv(A) # try uncommenting this line ~/.local/lib/python3.8/site-packages/numpy/core/overrides.py in inv(*args, **kwargs) ~/.local/lib/python3.8/site-packages/numpy/linalg/linalg.py in inv(a) 543 signature = 'D->D' if isComplexType(t) else 'd->d' 544 extobj = get_linalg_error_extobj(_raise_linalgerror_singular) --> 545 ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj) 546 return wrap(ainv.astype(result_t, copy=False)) 547 ~/.local/lib/python3.8/site-packages/numpy/linalg/linalg.py in _raise_linalgerror_singular(err, flag) 86 87 def _raise_linalgerror_singular(err, flag): ---> 88 raise LinAlgError("Singular matrix") 89 90 def _raise_linalgerror_nonposdef(err, flag): LinAlgError: Singular matrix
The right way to handle this is:
A = np.array([[2.,4.],[2.,4.]])
try:
np.linalg.inv(A)
except np.linalg.LinAlgError:
print('Oops, looks like A is singular!')
Oops, looks like A is singular!