#
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib as mp
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import laUtilities as ut
import slideUtilities as sl
import demoUtilities as dm
from matplotlib import animation
from importlib import reload
from datetime import datetime
from IPython.display import Image, display_html, display, Math, HTML;
qr_setting = None

mp.rcParams['animation.html'] = 'jshtml';


#
def centerAxes(ax):
    ax.spines['left'].set_position('zero')
    ax.spines['right'].set_color('none')
    ax.spines['bottom'].set_position('zero')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')
    bounds = np.array([ax.axes.get_xlim(), ax.axes.get_ylim()])
    ax.plot(bounds[0][0],bounds[1][0],'')
    ax.plot(bounds[0][1],bounds[1][1],'')


# Source: Wikipedia
display(Image("images/18-svd-geometric.png", width=350))


boat = np.loadtxt('data/boat.dat')
import matplotlib.cm as cm
plt.figure()
plt.imshow(boat,cmap = cm.Greys_r)
plt.axis('off');


print(boat)

[[127. 123. 125. ... 165. 169. 166.]
 [128. 126. 128. ... 169. 163. 167.]
 [128. 124. 128. ... 178. 160. 175.]
 ...
 [112. 112. 115. ... 101.  97. 104.]
 [110. 112. 117. ... 104.  93. 105.]
 [113. 115. 121. ... 102.  95.  97.]]


u, s, vt = np.linalg.svd(boat, full_matrices = False)
plt.plot(s)
plt.xlabel('$k$', size = 16)
plt.ylabel(r'$\sigma_k$', size = 16)
plt.title('Singular Values of Boat Image', size = 16);


#
u, s, vt = np.linalg.svd(boat, full_matrices = False)
s[40:] = 0
boatApprox = u @ np.diag(s) @ vt
plt.figure(figsize=(9,6))
plt.subplot(1,2,1)
plt.imshow(boatApprox,cmap = cm.Greys_r)
plt.axis('off')
plt.title('Rank 40 Boat')
plt.subplot(1,2,2)
plt.imshow(boat,cmap = cm.Greys_r)
plt.axis('off')
plt.title('Rank 512 Boat');
# plt.subplots_adjust(wspace=0.5)


#
display(Image("images/19-Abilene-map.png", width=500))


with open('data/net-traffic/odnames','r') as f:
    odnames = [line.strip() for line in f]
dates = pd.date_range('9/1/2003', freq = '10min', periods = 1008)
Atraf = pd.read_table('data/net-traffic/X', sep='  ', header=None, names=odnames, engine='python')
Atraf.index = dates
Atraf


Atraf.shape

(1008, 121)


np.linalg.matrix_rank(Atraf)

121


u, s, vt = np.linalg.svd(Atraf)


#
fig = plt.figure(figsize=(6,4))
plt.plot(range(1,1+len(s)),s)
plt.xlabel(r'$k$',size=20)
plt.ylabel(r'$\sigma_k$',size=20)
plt.ylim(ymin = 0)
plt.xlim(xmin = -1)
plt.title(r'Singular Values of $A$',size=20);


#
fig = plt.figure(figsize = (6, 4))
Anorm = np.linalg.norm(Atraf)
plt.plot(range(1, 21), s[0:20]/Anorm, '.-')
plt.xlim([0.5, 20])
plt.ylim([0, 1])
plt.xlabel(r'$k$', size=20)
plt.xticks(range(1, 21))
plt.ylabel(r'$\sigma_k$', size=20);
plt.title(r'Singular Values of $A$',size=20);


#
fig = plt.figure(figsize = (6, 4))
Anorm = np.linalg.norm(Atraf)
err = np.cumsum(s[::-1]**2)
err = np.sqrt(err[::-1])
plt.plot(range(0, 20), err[:20]/Anorm, '.-')
plt.xlim([0, 20])
plt.ylim([0, 1])
plt.xticks(range(1, 21))
plt.xlabel(r'$k$', size = 16)
plt.ylabel(r'relative F-norm error', size=16)
plt.title(r'Relative Error of rank-$k$ approximation to $A$', size=16);


#
display(Image("images/18-facebook.png", width=700))


#
display(Image("images/18-yelp-twitter.png", width=700))


#
with open('data/net-traffic/AbileneFlows/odnames','r') as f:
    odnames = [line.strip() for line in f]
dates = pd.date_range('9/1/2003',freq='10min',periods=1008)
Atraf = pd.read_table('data/net-traffic/AbileneFlows/X',sep='  ',header=None,names=odnames,engine='python')
Atraf.index = dates
plt.figure(figsize=(10,8))
for i in range(1,13):
    ax = plt.subplot(4,3,i)
    Atraf.iloc[:,i-1].plot()
    plt.title(odnames[i])
plt.subplots_adjust(hspace=1)
plt.suptitle('Twelve Example Traffic Traces', size=20);


#
u, s, vt = np.linalg.svd(Atraf, full_matrices = False)
uframe = pd.DataFrame(u @ np.diag(s), index=pd.date_range('9/1/2003', freq = '10min', periods = 1008))
uframe[0].plot()
uframe[1].plot()
plt.title('First Two Columns of $U$');


#
display(Image("images/19-movie.png", width=500))


#
display(Image("images/19-netflix.png", width=500))

	ATLA-ATLA	ATLA-CHIN	ATLA-DNVR	ATLA-HSTN	ATLA-IPLS	ATLA-KSCY	ATLA-LOSA	ATLA-NYCM	ATLA-SNVA	ATLA-STTL	...	WASH-CHIN	WASH-DNVR	WASH-HSTN	WASH-IPLS	WASH-KSCY	WASH-LOSA	WASH-NYCM	WASH-SNVA	WASH-STTL	WASH-WASH
2003-09-01 00:00:00	8466132.0	29346537.0	15792104.0	3646187.0	21756443.0	10792818.0	14220940.0	25014340.0	13677284.0	10591345.0	...	53296727.0	18724766.0	12238893.0	52782009.0	12836459.0	31460190.0	105796930.0	13756184.0	13582945.0	120384980.0
2003-09-01 00:10:00	20524567.0	28726106.0	8030109.0	4175817.0	24497174.0	8623734.0	15695839.0	36788680.0	5607086.0	10714795.0	...	68413060.0	28522606.0	11377094.0	60006620.0	12556471.0	32450393.0	70665497.0	13968786.0	16144471.0	135679630.0
2003-09-01 00:20:00	12864863.0	27630217.0	7417228.0	5337471.0	23254392.0	7882377.0	16176022.0	31682355.0	6354657.0	12205515.0	...	67969461.0	37073856.0	15680615.0	61484233.0	16318506.0	33768245.0	71577084.0	13938533.0	14959708.0	126175780.0
2003-09-01 00:30:00	10856263.0	32243146.0	7136130.0	3695059.0	28747761.0	9102603.0	16200072.0	27472465.0	9402609.0	10934084.0	...	66616097.0	43019246.0	12726958.0	64027333.0	16394673.0	33440318.0	79682647.0	16212806.0	16425845.0	112891500.0
2003-09-01 00:40:00	10068533.0	30164311.0	8061482.0	2922271.0	35642229.0	9104036.0	12279530.0	29171205.0	7624924.0	11327807.0	...	66797282.0	40408580.0	11733121.0	54541962.0	16769259.0	33927515.0	81480788.0	16757707.0	15158825.0	123140310.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2003-09-07 23:10:00	8849096.0	33461807.0	5866138.0	3786793.0	19097140.0	10561532.0	26092040.0	28640962.0	8343867.0	8820650.0	...	65925313.0	21751316.0	11058944.0	58591021.0	17137907.0	24297674.0	83293655.0	17329425.0	20865535.0	123125390.0
2003-09-07 23:20:00	9776675.0	31474607.0	5874654.0	11277465.0	14314837.0	9106198.0	26412752.0	26168288.0	8638782.0	9193717.0	...	70075490.0	29126443.0	12667321.0	54571764.0	15383038.0	25238842.0	70015955.0	16526455.0	16881206.0	142106800.0
2003-09-07 23:30:00	9144621.0	32117262.0	5762691.0	7154577.0	17771350.0	10149256.0	29501669.0	25998158.0	11343171.0	9423042.0	...	68544458.0	27817836.0	15892668.0	50326213.0	12098328.0	27689197.0	73553203.0	18022288.0	18471915.0	127918530.0
2003-09-07 23:40:00	8802106.0	29932510.0	5279285.0	5950898.0	20222187.0	10636832.0	19613671.0	26124024.0	8732768.0	8217873.0	...	65087776.0	28836922.0	11075541.0	52574692.0	11933512.0	31632344.0	81693475.0	16677568.0	16766967.0	138180630.0
2003-09-07 23:50:00	8716795.6	22660870.0	6240626.4	5657380.6	17406086.0	8808588.5	15962917.0	18367639.0	7767967.3	7470650.1	...	65599891.0	25862152.0	11673804.0	60086953.0	11851656.0	30979811.0	73577193.0	19167646.0	19402758.0	137288810.0

Announcements¶

Lecture 33: SVD and Principal Component Analysis¶

Recap from last lecture¶

Dimensionality Reduction¶

33.1 Empirical Examples of SVD¶

Image compression¶

Networks¶

Low Effective Rank is Common¶

Summary¶

33.2 Interpretations of Low Effective Rank¶

Low Rank Implies Common Patterns¶

Low Rank Defines Latent Factors¶