import numpy as np
x = np.array([[2.5, 3.2], [0, 1], [2, -3]], dtype=np.float32)
print(x)
print(f"Shape {x.shape}")  # the shape is...
print(f"Number of dimensions: {x.ndim}")  # is a matrix (2 axis)
print(f"Number of elements: {x.size}")  # with 6 elements

v = np.array([2.5, 3.2])  # used later

[[ 2.5  3.2]
 [ 0.   1. ]
 [ 2.  -3. ]]
Shape (3, 2)
Number of dimensions: 2
Number of elements: 6

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

v = np.array([2.5, 3.2])
# all the X first, then all the Y
#        [X1   X2]    [Y1  Y2]
plt.plot([0, v[0]], [0, v[1]], 
         marker='x', color='red', lw=4,
         markersize=6)


import matplotlib
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.style.use('seaborn-v0_8-white')
font = {'family': 'sans-serif',
        'weight': 'bold',
        'size': 22}
matplotlib.rc('font', **font)

# Plotting
plt.figure(figsize=(10, 10))
plt.plot(v[0], v[1], marker='x', color='red', lw=4, markersize=6)
# all the X first, then all the Y
plt.plot([0, v[0]], [0, v[1]], marker='x', color='red', lw=4, markersize=6)
# Eyecandy
plt.axis('equal')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.title("Vector")
plt.annotate('(0, 0)', xy=(0, 0), xytext=(.1, -.3))
plt.annotate(f'({v[0]},{v[1]})', xy=(v[0], v[1]), xytext=(v[0], v[1]))
plt.axis([-5, 5, -5, 5])
plt.show()


#help(np.diag)
A = np.ones((3)) #firstly create a vector [1,1,1] and then makes it a diagonal matrix
print(A)

[1. 1. 1.]


# is ID symmetric?
ID = np.diag(np.ones(3))
np.a ll(ID == ID.T)  # == does the comparison element-wise

True


# Generate array from 0 to 8 of int64
# reshape it in a 3x3 matrix
A = np.arange(9).reshape(3, 3)
print(f'A is \n {A}', end='\n'*2)
print(f'The transpose of A is \n {A.T}', end='\n'*2)
print(f'A is if type {A.dtype}')

A is 
 [[0 1 2]
 [3 4 5]
 [6 7 8]]

The transpose of A is 
 [[0 3 6]
 [1 4 7]
 [2 5 8]]

A is if type int64


print(A,end='\n\n')
# Possible to do a reduction on the matrix (sum along rows)
A_c = A.sum(axis=0, keepdims=False)  # 3 (rows are canceled out)
A_c.shape
print(A_c)

[[0 1 2]
 [3 4 5]
 [6 7 8]]

[ 9 12 15]


# Works for other operations too like mean (average)
A.mean(axis=0, keepdims=False)  # 3 (rows are canceled out)

array([3., 4., 5.])


A = np.arange(9).reshape(3, 3)
B = np.ones_like(A)
C = A + B # if now you have all 1 you can also get the same with A + 1 and will do
print('C', C, 'A', A, 'B', B, sep='\n\n')
np.allclose(C, A+1) # you can sum matrix + scalar, numpy will broadcast

C

[[1 2 3]
 [4 5 6]
 [7 8 9]]

A

[[0 1 2]
 [3 4 5]
 [6 7 8]]

B

[[1 1 1]
 [1 1 1]
 [1 1 1]]

True


A = np.arange(9).reshape(3,3)
B = np.zeros_like(A) #np.ones_like(A)*1.5
C = A * B # Hadamard product (multply element-wise)
print('C',C,'A',A,'B',B,sep='\n\n')
np.allclose(C, A*0)

C

[[0 0 0]
 [0 0 0]
 [0 0 0]]

A

[[0 1 2]
 [3 4 5]
 [6 7 8]]

B

[[0 0 0]
 [0 0 0]
 [0 0 0]]

True


# Sum all the values across cols (cols will disappear)
# Sum all values across axis 1.
print(A)
A.sum(axis=(0,1), keepdims=False) #3 (cols are cancelled out)

[[0 1 2]
 [3 4 5]
 [6 7 8]]

36


# Works for other operations too like mean (average)
A.mean(axis=0, keepdims=True) #1x3 (rows are cancelled out but row axis is NOT dropped)

array([[3., 4., 5.]])


x = np.array([1, 2, 3])
y = np.array([1, 0, 1])
np.dot(x, y) == np.sum(x*y)

True


def angle(v, w):
    return np.arccos(v.dot(w) / (np.linalg.norm(v) * np.linalg.norm(w)))

angle(np.array([0, 1, 2]), np.array([2, 3, 4])) # the result is in radians

0.4189900840328574


x = np.array([1, 2, 3])
y = np.array([1, 0, 1, -1])
np.outer(x,y)

array([[ 1,  0,  1, -1],
       [ 2,  0,  2, -2],
       [ 3,  0,  3, -3]])


A = np.arange(27).reshape(3, 9)
x = np.ones((9, 1))
b = A @ x  # 3x9 @ 9x1 = 3x1
bb = np.matmul(A, x)
bbb = np.dot(A,x)
print('A', A, 'x', x, 'b', b, 'bb', bb,'bbb', bbb, sep='\n\n')

# Questions for you: A*B does elementwise multiplcation # will it work?

A

[[ 0  1  2  3  4  5  6  7  8]
 [ 9 10 11 12 13 14 15 16 17]
 [18 19 20 21 22 23 24 25 26]]

x

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]

b

[[ 36.]
 [117.]
 [198.]]

bb

[[ 36.]
 [117.]
 [198.]]

bbb

[[ 36.]
 [117.]
 [198.]]


A = np.random.rand(3, 5)
B = np.random.rand(5, 2)
# 3x2 = 3x5 @ 5x2
C = A @ B
print('A', A, 'B', B, 'C', C, sep='\n\n')

A

[[0.12512166 0.42309604 0.64512834 0.55354724 0.18669087]
 [0.43766488 0.14894529 0.18135714 0.61585767 0.11636141]
 [0.9903951  0.74446411 0.35439661 0.34547079 0.97590836]]

B

[[0.86924393 0.09699908]
 [0.656436   0.56980211]
 [0.46228478 0.77846505]
 [0.91648796 0.51868198]
 [0.48784766 0.98239172]]

C

[[1.28312581 1.22594611]
 [1.18324202 0.70224939]
 [2.30613455 1.93406377]]

u = np.array([3, -4])
np.linalg.norm(u)

np.abs(u).sum()
np.linalg.norm(u,1)


# L1 norm
x = np.array([1, 2, 3, 4])
n1 = np.linalg.norm(x, ord=1)
n1b = np.abs(x).sum()
assert n1 == n1b


import matplotlib.pyplot as plt

def plot_grid(Xs, Ys, axs=None):
    ''' Aux function to plot a grid'''
    t = np.arange(Xs.size) # define progression of int for indexing colormap
    if axs:
        axs.plot(0, 0, marker='*', markersize=7, color='r', linestyle='none') #plot origin
        axs.scatter(Xs,Ys, c=t, cmap='jet', marker='o') # scatter x vs y
        axs.axis('scaled') # axis scaled
    else:
        plt.plot(0, 0, marker='*', color='r',markersize=7, linestyle='none') #plot origin
        plt.scatter(Xs,Ys, c=t, cmap='jet', marker='o') # scatter x vs y
        plt.axis('scaled') # axis scaled

# let's see it with numpy
nX, nY, res = 10, 10, 21 # boundary of our space + resolution
X = np.linspace(-nX, +nX, res) # give me 21 points linear space from -10, +10 
Y = np.linspace(-nX, +nX, res) # give me 21 points linear space from -10, +10
# meshgrid is very useful to evaluate functions on a grid
# z = f(X,Y)
# please see https://numpy.org/doc/stable/reference/generated/numpy.meshgrid.html
Xs, Ys = np.meshgrid(X, Y) #NxN, NxN
plot_grid(Xs, Ys)
#plt.imshow(Ys, cmap='jet')


# Transformation
# 2x2
A = np.array([[0,1],
              [1,0]])
# axis         0 1 2
# [NxN,NxN] -> NxNx2 # add 3-rd axis, like adding another layer
src = np.stack((Xs,Ys), axis=2)
# flatten first two dimension
# (NN)x2
src_r = src.reshape(-1,src.shape[-1]) #ask reshape to keep last dimension and adjust the rest
# 2x2 @ 2x(NN)
dst = A @ src_r.T # 2xNN
#(NN)x2 and then reshape as NxNx2
dst = (dst.T).reshape(src.shape)
# Access X and Y
Xd, Yd = dst[:,:,0], dst[:,:,1]
plot_grid(Xd, Yd) # plot


# Try to see what happens if you change A

# Try with identity matrix and then change values in the diagonal; then change other values
# A = np.array([[1.5,0], 
#               [0,0.5]])

# which kind of map does  A = np.array([[-1, 0], [0, -1]]) ?


def linear_map(A, Xs, Ys):
    '''Map src points with A'''
    # [NxN,NxN] -> NxNx2 # add 3-rd axis, like adding another layer
    src = np.stack((Xs, Ys), axis=2)
    # flatten first two dimension
    # (NN)x2
    # ask to reshape to keep the last dimension and adjust the rest
    src_r = src.reshape(-1, src.shape[-1])
    # 2x2 @ 2x(NN)
    dst = A @ src_r.T  # 2xNN
    # (NN)x2 and then reshape as NxNx2
    dst = (dst.T).reshape(src.shape)
    # Access X and Y
    return dst[:, :, 0], dst[:, :, 1]


A = np.array([[1, 2], 
              [-1, 3]])
print(A)
Xd, Yd = linear_map(A, Xs, Ys)
fig, axs = plt.subplots(1, 2)
fig.suptitle('Linear map')
plot_grid(Xs, Ys, axs[0])
plot_grid(Xd, Yd, axs[1])
# In case we want to zoom on the center
# plt.xlim(-20,20)
# plt.ylim(-20,20)

[[ 1  2]
 [-1  3]]


A = np.array([[2, -1], 
              [4, -2]])
print(A)
Xd, Yd = linear_map(A, Xs, Ys)
fig, axs = plt.subplots(1,2)
fig.suptitle('Severe Deformations')
plot_grid(Xs,Ys,axs[0])
plot_grid(Xd,Yd,axs[1])

[[ 2 -1]
 [ 4 -2]]


A = np.array([[1, 2], [-1, 3]])
print(A)
Xd, Yd = linear_map(A, Xs, Ys)
fig, axs = plt.subplots(1,2)
fig.suptitle('Linear map')
plot_grid(Xs,Ys,axs[0])
plot_grid(Xd,Yd,axs[1])

[[ 1  2]
 [-1  3]]


A_inv = np.linalg.inv(A)
print(A_inv)
# Let's try inverse mapping
Xds, Yds = linear_map(A_inv, Xd, Yd)
fig, axs = plt.subplots(1,2)
fig.suptitle('Linear map')
plot_grid(Xd,Yd,axs[0])
plot_grid(Xds,Yds,axs[1])
print(f'Matrix rank is {np.linalg.matrix_rank(A)}')

[[ 0.6 -0.4]
 [ 0.2  0.2]]
Matrix rank is 2

C = A @ B

A = np.array([0, 1, 2]) #1x3

B = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]]) #3x4
# we wanna do 3x1 X 3x4 but cannot do it but we can do 3x3 X 3x4


A = np.array([0, 1, 2])

B = np.array([[ 0,  1,  2,  3],
              [ 4,  5,  6,  7],
              [ 8,  9, 10, 11]])
A@B # we do NOT wanna do this
#  0*0
#  4*1
#  8*2

array([20, 23, 26, 29])


A = np.array([0, 1, 2]) #1x3   3x4
B = np.array([[ 0,  1,  2,  3],  # 0
              [ 4,  5,  6,  7],  # 1
              [ 8,  9, 10, 11]]) # 2
A[:, np.newaxis] * B # we wanna do this np.newaxis is broad cast and tells 
                     # numpy to put A as col vector

array([[ 0,  0,  0,  0],
       [ 4,  5,  6,  7],
       [16, 18, 20, 22]])


A[:, np.newaxis] * B # same as below
np.repeat(A.reshape(-1, 1), 4, axis=1)*B

array([[ 0,  0,  0,  0],
       [ 4,  5,  6,  7],
       [16, 18, 20, 22]])


(A[:, np.newaxis] * B ).sum(axis=1)# ----> horizontal

array([ 0, 22, 76])

(A[:, np.newaxis] * B ).sum(axis=1)
array([ 0, 22, 76])

np.einsum('i,ij->i', A, B) #3 X 3x4 --> 3

A = np.array([[1, 1, 1],
              [2, 2, 2],
              [5, 5, 5]])

B = np.array([[0, 1, 0],
              [1, 1, 0],
              [1, 1, 1]])

np.einsum('ij,jk->ik', A, B)

A = np.array([[1, 1, 1],
              [2, 2, 2],
              [5, 5, 5]])

B = np.array([[0, 1, 0],
              [1, 1, 0],
              [1, 1, 1]])

np.einsum('ij,jk->ijk', A, B)


A = np.array([[1, 1, 1],
              [2, 2, 2],
              [5, 5, 5]])

B = np.array([[0, 1, 0],
              [1, 1, 0],
              [1, 1, 1]])
C = np.einsum('ij,jk->ijk', A, B)
print(C, C.shape, sep='\n')

[[[0 1 0]
  [1 1 0]
  [1 1 1]]

 [[0 2 0]
  [2 2 0]
  [2 2 2]]

 [[0 5 0]
  [5 5 0]
  [5 5 5]]]
(3, 3, 3)


# C.sum(axis=1) # what happens if we do?


# np.einsum('ij,jk->ik', A, B)


C = np.einsum('ij,kl->ijkl', A, B)
C.shape

(3, 3, 3, 3)

Topic	Authors	Book
Generic ML	H. Daumé III	"A Course in Machine Learning", download the book
Generic ML	Christopher M. Bishop	“Pattern Recognition and Machine Learning” download the book
Generic ML	Kevin P. Murphy	“Probabilistic Machine Learning: An introduction", MIT Press, 2021
Deep Learning	Ian Goodfellow and Yoshua Bengio and Aaron Courville	“Deep Learning”, MIT Press 2016
Deep Learning	Ston Zhang, Zack C. Lipton, Mu Li, Alex J. Smola	“Dive into Deep Learning”

Attribute 1	Attribute 2
Example 1	Example 1
Example 2	Example 2
Example 3	Example 3

Call signature	NumPy equivalent	Description
('i', A)	A	returns a view of A
('i->', A)	sum(A)	sums the values of A
('i,i->i', A, B)	A * B	element-wise multiplication of A and B
('i,i', A, B)	inner(A, B)	inner product of A and B
('i,j->ij', A, B)	outer(A, B)	outer product of A and B

Call signature	NumPy equivalent	Description
('ij', A)	A	returns a view of A
('ji', A)	A.T	view transpose of A
('ii->i', A)	diag(A)	view main diagonal of A
('ii', A)	trace(A)	sums main diagonal of A
('ij->', A)	sum(A)	sums the values of A
('ij->j', A)	sum(A, axis=0)	sum down the columns of A (across rows)
('ij->i', A)	sum(A, axis=1)	sum horizontally along the rows of A
('ij,ij->ij', A, B)	A * B	element-wise multiplication of A and B
('ij,ji->ij', A, B)	A * B.T	element-wise multiplication of A and B.T
('ij,jk', A, B)	dot(A, B)	matrix multiplication of A and B
('ij,kj->ik', A, B)	inner(A, B)	inner product of A and B
('ij,kj->ikj', A, B)	A[:, None] * B	each row of A multiplied by B
('ij,kl->ijkl', A, B)	A[:, :, None, None] * B	each value of A multiplied by B

Labels
Label for Ex 1
Label for Ex 2
Label for Ex 3

Machine Learning¶

2. The geometry of linear maps¶

📚 Textbooks¶

Recap on Linear Algebra¶

Training set¶

$\mathbf{x}$ as a high-dimensional point in a vector space¶

Vectors are written column-wise¶

To make it row-wise just transpose it¶

Numpy¶

During the course, we will learn how to "vectorize" the code (i.e. avoiding for loop).¶

Let's try to plot vector¶

Vectors: - Point in space¶

Formalizing problems¶

Vectors - Direction in space¶

Direction in space¶

Matrix¶

Interpretation¶

Identity Matrix , Diagonal Matrix¶

Symmetric Matrix¶

What does the transpose operation do?¶

Properties of transposing¶

Trace of a Matrix¶

Reduction operations (sum across rows)¶

Generally operations are element-wise¶

Reduction operations (sum across cols)¶

Non-reduction operations (sum)¶

Vector to Vector Operation¶

Inner Product (Dot Product)¶

Inner product: Geometric Interpretation¶

Cosine Similarity¶

Outer Product¶

Matrix to Vector Operation¶

Two interpretations¶

Applications (moving points in space)¶

Matrix-Matrix Multiplication¶

A final note on [computational] matrix order¶

Complexity¶

Why Matrices?¶

1. Good to model linear transformations in space¶

2. Good to model the data. Design matrix ( num of samples x features)¶

3. Express variations in data (covariance matrix is a symmetric matrix)¶

4. Give the direction where to move to minimize loss (Gradients, Deep Learning)¶

Norms¶

$\ell_2$ norm¶

$\ell_1$ norm¶

$\ell_p$ norm¶

Why norms?¶

Matrices as a linear map between spaces¶

Geometry of Linear Transformations of Basis Vector¶

These vectors are an example of a basis, where we can write any vector in our space as a weighted sum of these basis vectors.¶

Demo¶

Linear Map could induce Severe Distortion of the space¶

Severe distortion¶

Higher Dimensions¶

Linear Map Properties:¶

Linear Independent¶

Rank¶

Invertibility¶

Determinant¶

Determinant $\rightarrow$ Hyper-volume ratio¶

Determinant $\rightarrow$ tells how the space is compressed¶

Hyperplanes¶

Hyperplanes¶

Projection¶

Projection vector onto subspace defined by $\mathbf{w}$¶

All the operations in "one fell swoop"¶

Ladies and gentlemen welcome to....¶

Einsum¶

Einsum = Einstein summation¶

Indexes (or variables):¶

Einsum = Einstein summation¶

The computer science way¶

Einsum¶

Einsum¶

Einsum¶

Appendix - Einsum¶

Comparison with Einsum¶

Einsum: Multiply two matrices¶

Einsum: Rules¶

Einsum: Multiply two matrices without reduction (tensor)¶

2. Good to model the data. Design matrix ( `num of samples x features`)¶