CSC 578D / Data Mining / Fall 2018 / University of Victoria

Python Notebook explaining Assignment 02 / Problem 04

Author: Andreas P. Koenzen (akoenzen => uvic.ca)
Version: 0.1

In [7]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.animation as ani

from mpl_toolkits import mplot3d

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.display import HTML

mpl.rcParams['animation.embed_limit'] = 100
In [8]:
data = pd.read_csv(
    'http://www.apkc.net/data/csc_578d/assignment02/regdata.csv',
    header=None,
    names=['GPA', 'Years of Experience', 'Salary']
)
data.head()
Out[8]:
GPA Years of Experience Salary
0 70 1.0 50
1 80 2.0 55
2 65 2.0 45
3 70 2.5 60
4 65 2.7 58

Preprocessing functions:

In [9]:
def prepare(data):
    x = data.values[:,0:-1]
    # print(x[:10, :])
    
    max_x = np.max(x, axis=0)
    min_x = np.min(x, axis=0)
    x = (x - min_x) / (max_x - min_x)
    # print(x[:10, :])

    x = np.insert(x, 2, 1, axis=1)
    # print(x[:10, :])
    
    y = data.values[:,-1:]
    # print(y[:10, :])

    y[(y == 0)] = -1
    # print(y[:10, :])
    
    return x, y

x, y = prepare(data)

Plot the data:

In [10]:
fig1 = plt.figure(figsize=(12, 8))
ax1 = fig1.gca(projection='3d')

_ = ax1.view_init(azim=65, elev=30)
_ = ax1.set_xlabel('Years of Experience', fontsize=12)
_ = ax1.set_ylabel('GPA', fontsize=12)
_ = ax1.set_zlabel('Salary', fontsize=12)

_ = ax1.scatter3D(xs=x[:,1:2], ys=x[:,0:1], zs=y, c='r', marker='o', s=30, alpha=1.0)

Build the model and plot the error curve:

In [11]:
def error(x, y, w):
    return ((y - (x @ w.T)) ** 2)

def error_mean(x, y, w):
    return np.asscalar(((1 / (2 * len(x))) * np.sum(
        error(x, y, w), 
        axis=0, 
        keepdims=True)).ravel())

def grad(x, y, w):
    return ((y - (x @ w.T)) * x)

def grad_mean(x, y, w):
    return ((1 / len(x)) * np.sum(
        grad(x, y, w), 
        axis=0, 
        keepdims=True)).ravel()

def fit(x, y, kappa, iterations):
    w = np.zeros((1, x.shape[1]))
    e = []

    for k in range(iterations):
        e.append(error_mean(x, y, w))
        w = w + (kappa * grad_mean(x, y, w))
        
    return w, e

w, e = fit(x, y, 0.01, 500)
print("Weight vector: {}".format(w))
print()
print("Error curve:")
_ = plt.plot(e)
plt.show()
Weight vector: [[19.36032645 28.060349   46.26638908]]

Error curve:

Plot the plane:

In [14]:
fig1 = plt.figure(figsize=(8, 6))
ax1  = fig1.gca(projection='3d')
frames = 360 * 1
max_elev = 60
min_elev = 15
ele = list(range(min_elev, max_elev, 1))
for k in range(1, int(frames / (max_elev - min_elev))):
    if k % 2 != 0:
        ele += list(range(max_elev, min_elev, -1))
    else:
        ele += list(range(min_elev, max_elev, 1))

def init():
    _ = ax1.view_init(azim=0, elev=0)
    _ = ax1.set_xlabel('Years of Experience', fontsize=12)
    _ = ax1.set_ylabel('GPA', fontsize=12)
    _ = ax1.set_zlabel('Salary', fontsize=12)
    
    _ = ax1.scatter3D(xs=x[:,1:2], ys=x[:,0:1], zs=y, c='r', marker='o', s=30, alpha=1.0)

    x_surf, y_surf = np.meshgrid(np.linspace(0.0, 1.0, num=20), np.linspace(0.0, 1.0, num=20))

    w0 = np.asscalar(w[0][2])
    w1 = np.asscalar(w[0][1])
    w2 = np.asscalar(w[0][0])
    z  = w1 * x_surf + w2 * y_surf + w0
    _ = ax1.plot_wireframe(x_surf, y_surf, z, color='b', alpha=0.3, rstride=2, cstride=2)
    
    return ax1,

def animate(i, ax):
    ax.view_init(elev=ele[i], azim=i)
    
    return ax1,

anim = ani.FuncAnimation(fig1,
                         animate,
                         init_func=init,
                         frames=frames,
                         interval=50,
                         fargs=(ax1,))
plt.close()
HTML(anim.to_jshtml())
Out[14]:


Once Loop Reflect