# for QR codes use inline
%matplotlib inline
qr_setting = 'url'
#
# for lecture use notebook
# %matplotlib notebook
# qr_setting = None
#
%config InlineBackend.figure_format='retina'
# import libraries
import numpy as np
import matplotlib as mp
import pandas as pd
import matplotlib.pyplot as plt
import laUtilities as ut
import slideUtilities as sl
import demoUtilities as dm
import pandas as pd
from importlib import reload
from datetime import datetime
from matplotlib import animation
from IPython.display import Image
from IPython.display import display_html
from IPython.display import display
from IPython.display import Math
from IPython.display import Latex
from IPython.display import HTML;
from collections import Counter
import string
import random
import itertools


#
ax = ut.plotSetup(size=(12,8))
ut.centerAxes(ax)
A = np.array([[3,-2],[1,0]])
u = np.array([-1,1])
v = np.array([2,1])
#
ut.plotArrowVec(ax, v, [0,0], color='Red')
ut.plotArrowVec(ax, A.dot(v), [0,0], color='Red')
ax.text(v[0],v[1]+0.2,r'${\bf v}$',size=20)
ax.text(A.dot(v)[0],A.dot(v)[1]+0.2,r'$A{\bf v}$',size=20)
#
ut.plotArrowVec(ax, u, [0,0], color='Blue')
ut.plotArrowVec(ax, A.dot(u), [0,0], color='Blue')
ax.text(u[0]-0.5,u[1]+0.1,r'${\bf u}$',size=20)
ax.text(A.dot(u)[0]-0.7,A.dot(u)[1]+0.3,r'$A{\bf u}$',size=20);


ax = plt.subplot(131)
plt.plot(range(15),xs.T[0],'o-')
ax.set_ylim([0,1])
plt.title(r'$x_1$',size=24)
ax = plt.subplot(132)
plt.plot(range(15),xs.T[1],'o-')
ax.set_ylim([0,1])
plt.title(r'$x_2$',size=24)
ax = plt.subplot(133)
plt.plot(range(15),xs.T[2],'o-')
ax.set_ylim([0,1])
plt.title(r'$x_3$',size=24)
plt.tight_layout()


#
ax = ut.plotSetup(-0.1,1.7,-0.1,1.2)
ut.centerAxes(ax)
A = np.array([[0.95,0.03],[0.05,0.97]])
v1 = np.array([0.375,0.625])
v2 = np.array([0.225,-0.225])
x0 = v1 + v2
#
ax.plot([1,0],[0,1],'b--')
ax.plot(x0[0],x0[1],'bo')
ax.text(x0[0]+0.02,x0[1]+0.02,r'${\bf x_0}$',size=16)
#ax.text(A.dot(x0)[0]+0.2,A.dot(x0)[1]+0.2,r'$A{\bf x_0}$',size=16)
# ax.plot([-10,10],[5*10/6.0,-5*10/6.0],'b-')
#
ax.annotate('Initial State', xy=(x0[0], x0[1]),  xycoords='data',
                xytext=(0.4, 0.8), textcoords='data',
                size=15,
                #bbox=dict(boxstyle="round", fc="0.8"),
                arrowprops={'arrowstyle': 'simple',
                                'fc': '0.5', 
                                'ec': 'none',
                                'connectionstyle' : 'arc3,rad=-0.3'},
                );


#
ax = ut.plotSetup(-0.1,1.7,-0.1,1.2)
ut.centerAxes(ax)
A = np.array([[0.95,0.03],[0.05,0.97]])
v1 = np.array([0.375,0.625])
v2 = np.array([0.225,-0.225])
x0 = v1 + v2
#
ax.plot([1,0],[0,1],'b--')
ax.text(v1[0]+0.02,v1[1]+0.02,r'${\bf v_1}$',size=16)
ax.plot(x0[0],x0[1],'bo')
v = np.zeros((40,2))
for i in range(40):
    v[i] = v1+(0.92**i)*v2
    ax.plot(v[i,0],v[i,1],'o')
ax.text(v[4][0]+0.02,v[4][1]+0.02,r'${\bf x_4}$',size=12)
ax.text(v[10][0]+0.02,v[10][1]+0.02,r'${\bf x_{10}}$',size=12)
ax.text(x0[0]+0.02,x0[1]+0.02,r'${\bf x_0}$',size=16)
ax.plot(v1[0],v1[1],'ro')
#ax.text(A.dot(x0)[0]+0.2,A.dot(x0)[1]+0.2,r'$A{\bf x_0}$',size=16)
# ax.plot([-10,10],[5*10/6.0,-5*10/6.0],'b-')
#
ax.annotate('Steady State', xy=(v1[0], v1[1]),  xycoords='data',
                xytext=(0.1, 0.2), textcoords='data',
                size=15,
                #bbox=dict(boxstyle="round", fc="0.8"),
                arrowprops={'arrowstyle': 'simple',
                                'fc': '0.5', 
                                'ec': 'none',
                                'connectionstyle' : 'arc3,rad=-0.3'},
                )
ax.annotate('Initial State', xy=(v[0,0], v[0,1]),  xycoords='data',
                xytext=(0.4, 0.8), textcoords='data',
                size=15,
                #bbox=dict(boxstyle="round", fc="0.8"),
                arrowprops={'arrowstyle': 'simple',
                                'fc': '0.5', 
                                'ec': 'none',
                                'connectionstyle' : 'arc3,rad=-0.3'},
                )
print('')


alphabet = list(string.ascii_lowercase + " ")
sentence0 = ''
for _ in range(100):
    sentence0 += random.choice(alphabet)
print(sentence0)

ael piqomlhtlkyashc sgbahadtremjvahuxuqilvtudxgglotfoyqekpepktmaknvyowdjhdoavwmlpllzlfxeqvkgqxmrgdz


# From https://en.wikipedia.org/wiki/Letter_frequency
eng_freq = {'e': 0.09992, 't': 0.07424, 'a': 0.06432, 'o': 0.06112, 'i': 0.06056, 'n': 0.05784, 's': 0.05208, 'r': 0.05024, 'h': 0.04040, 'l': 0.03256, 'd': 0.03056, 'c': 0.02672, 'u': 0.02184, 'm': 0.02008, 'f': 0.0192, 'p': 0.01712, 'g': 0.01496, 'w': 0.01344, 'y': 0.01328, 'b': 0.01184, 'v': 0.0084, 'k': 0.00432, 'x': 0.00184, 'j': 0.00128, 'q': 0.00096, 'z': 0.00072, ' ': 0.2}

fig = plt.figure()
ax = fig.add_subplot(111)
i = np.arange(27)
ax.bar(i, [eng_freq[l] * 100 for l in sorted(eng_freq.keys())], width=0.8,
       color='b', alpha=0.5, align='center')
ax.set_xticks(range(len(alphabet)))
ax.set_xticklabels(sorted(alphabet))
ax.set_xlabel("Letters")
ax.set_ylabel("Frequency (%)")
plt.show()


sentence = random.choices(population=list(eng_freq.keys()), weights=eng_freq.values(), k=100)
print(''.join(sentence))

 eloetmn enutle onsrl ulnbh nc  h stnree roanevaiiee ansuamleand afio uatrsitr ynlkhfde  oienmaadodt


# Check out Project Gutenberg for texts in the public domain
# https://www.gutenberg.org/
with open('data/moby.txt', 'r') as file:
    text1 = file.read().rstrip()
    print(f'Moby Dick has {len(text1)} characters')

with open('data/wonderland.txt', 'r') as file:
    text2 = file.read().rstrip()
    print(f'Alice in Wonderland has {len(text2)} characters')
    
with open('data/pride.txt', 'r') as file:
    text3 = file.read().rstrip()
    print(f'Pride & Prejudice has {len(text3)} characters')

text = text1 + text2 + text3

counts = Counter(text) # Counter({'l': 2, 'H': 1, 'e': 1, 'o': 1})
#print(counts)

letter_prob = {}
for i in counts.most_common():
    letter_prob[i[0]] = round(i[1]/len(text),4)
    print(i)

Moby Dick has 1147741 characters
Alice in Wonderland has 134814 characters
Pride & Prejudice has 664175 characters
(' ', 362026)
('e', 198344)
('t', 144438)
('a', 127336)
('o', 116660)
('i', 110303)
('n', 109561)
('h', 103390)
('s', 102978)
('r', 89242)
('l', 68835)
('d', 64969)
('u', 44900)
('m', 39815)
('c', 38116)
('w', 36802)
('f', 34533)
('g', 33177)
('y', 31703)
('b', 27233)
('p', 26799)
('v', 15056)
('k', 12315)
('q', 2385)
('j', 2079)
('x', 2076)
('z', 1659)


stochastic_matrix = np.zeros(shape=(27,27))
gram = {}
for i in range(len(text)-1):
    seq = text[i:i+1]
    if seq not in gram:
        gram[seq] = []
    gram[seq].append(text[i+1])

for i in range(27):
    for j in range(27):
        if alphabet[j] in gram[alphabet[i]]:
            letter_count = Counter(gram[alphabet[i]])
            stochastic_matrix[i][j] = letter_count[alphabet[j]]/sum(letter_count.values())
print(np.round(stochastic_matrix[0:10,0:10],2))

[[0.   0.03 0.03 0.05 0.   0.01 0.02 0.01 0.04 0.  ]
 [0.05 0.02 0.   0.   0.32 0.   0.   0.   0.04 0.01]
 [0.13 0.   0.02 0.   0.17 0.   0.   0.17 0.04 0.  ]
 [0.03 0.   0.   0.01 0.12 0.   0.   0.   0.07 0.  ]
 [0.05 0.   0.02 0.08 0.04 0.01 0.01 0.   0.01 0.  ]
 [0.06 0.   0.   0.   0.08 0.04 0.   0.   0.08 0.  ]
 [0.06 0.   0.   0.   0.12 0.   0.01 0.14 0.04 0.  ]
 [0.18 0.   0.   0.   0.43 0.   0.   0.   0.14 0.  ]
 [0.02 0.01 0.04 0.04 0.03 0.02 0.03 0.   0.   0.  ]
 [0.25 0.   0.   0.   0.24 0.   0.   0.   0.01 0.  ]]


def ngram(text, n, k):
    gram = {}
    for i in range(len(text)-n):
        seq = text[i:i+n]
        if seq not in gram:
            gram[seq] = []
        gram[seq].append(text[i+n])

    sentence = random.choice(list(gram.keys()))
    for i in range(k):
        sentence += random.choice(gram[sentence[i:i+n]])
    return(sentence)


# One proceeding letter (e.g. 't' --> 'e'). 
# This is a first-order Markov Chain
print(ngram(text,1,100))

veyonit e s ly whend mof uotaturof thif way ari liveyindeer os s f ats hokecheand ncar otaraithinggan


# Two proceeding letters (e.g. 'th' --> 'e')
# This is a second-order Markov Chain
print(ngram(text,2,100))

eing but the he boady mr alat peribly re the of itteeptand trantty fough wougholl ritin boust aft land


# Three proceeding letters (e.g. 'the' --> 'r')
# This is a third-order Markov Chain
print(ngram(text,3,100))

kness darchings prove body chapper reforgot to whilation or stubb wantages de boats sting alica feedily


# and so on...
print(ngram(text,4,1000))

uccordaged share of it know do compactolus fancy i supportablissed whatever have as into tell be thats supperson his shoe captain you seems may boy visit in his cetold on bonely could varies art then say but is comrades to the try of sing i see now him so drink when witness so should and white angely sat downwards impetuously replied overtebr and while and and the to became transportion aged thin cupied inted to though forms one and arms an endlingley were sailors and dear one of the cast but ever forming how home to that he reless of radney and go found and she air father whale wrong thing or with the personable and elizarrol gouge gryphony by the turtle lowly because in have no broken into ther engration a sorts starbuckling out that in queequences we fly in tell your owned out one or throughout one degreenlandles some reached with that man floor lover manufactual the cannot a place and whale suppressioner and be remained and more from mortance her father complimentation me is took that


ttext = text.split()

wcounts = Counter(ttext) # Counter({'l': 2, 'H': 1, 'e': 1, 'o': 1})
print(wcounts.most_common(5))

word_prob = {}
for i in wcounts.most_common():
    word_prob[i[0]] = round(i[1]/len(ttext),4)

[('the', 20182), ('and', 10790), ('of', 10614), ('to', 9449), ('a', 7222)]


sentence = ''
for _ in range(100):
    sentence += random.choice(ttext) + ' '
print(sentence)

minded the her i the but lydia life from folded we a failed triangles by when so of him having boys by whalemen suggesting did aye opening elizabeth looks at weeks at morcar with favour apprehension my a first this my she the nothing board been and might suspected you breakfast ordered eye seaport the could i embark proved by my the were deck same ball talk how of and are atmosphere end wholly while a in keeps the glimpse out should that a previous nearer he on that my go with times is which meant you to him his


def ngram_words(text, n, k):
    ttext = text.split()
    gram = {}
    for i in range(len(ttext)-n):
        seq = ' '.join(ttext[i:i+n])
        if seq not in gram:
            gram[seq] = []
        gram[seq].append(ttext[i+n])

    sentence = list(random.choice(list(gram.keys())).split())
    for i in range(k):
        sentence.append(random.choice(gram[' '.join(sentence[i:i+n])]))
    return(' '.join(sentence))


print(ngram_words(text,1,100))

fangs of kings in talking over it was palpable wonders and thin parallel and unentered forests whose eagerness for the wharf towards the clear of which deferred miss bingleys continued in such is all the kelson so honestly blind wall leaving queequeg salamed before i am not even tide of the world of himself always good bye dont fancy long to the like yonder roof bear a laughs the ship is in overcoming it is nothing seemed to produce one all out to a settled they mostly the table but a mildly employed in this motion and wickham and an activity

Announcements¶

Lecture 36: Applications of Markov Chains¶

Recap: Eigenvectors¶

Recap: Markov chains¶

36.1 Markov Chains and Eigenvectors¶

A complete solution for the evolution of a Markov Chain¶

36.2 An early application of Markov Chains¶

Version 0¶

Version 1¶

Version 2¶

Version 3¶

Version 4¶

Summary¶