Software Developer and Writer: Learn From Scratch Neural Networks Using PyQt: Part 9

This content is powered by Balige Publishing. Visit this link (collaboration with Rismon Hasiholan Sianipar) PART 1 PART 2 PART 3 PART 4 PART 5 PART 6 PART 7 PART 8

Tutorial Steps To Implement Principal Component Analysis (PCA) Using Scikit-Learn with PyQt

Step 1: Open gui_scikit.ui form that you created before and save it as gui_scikit_feature.ui. Rename widgetData as widgetVariance.

Step 2: Add a new combo box widget and set its objectName property as cboFeature. Double click on the widget and add three new items: Principal Component Analysis (PCA), Linear Discriminant Analysis (LDA), and Kernel Principal Component Analysis (KPCA) as shown in Figure below.

The gui_scikit_feature.ui form now looks as shown in Figure below.

Step 3: Write this Python script and save it as Scikit_Classifier_Feature.py:

#Scikit_Classifier_Feature.py
from PyQt5.QtWidgets import *
from PyQt5.uic import loadUi
from matplotlib.backends.backend_qt5agg import (NavigationToolbar2QT as NavigationToolbar)
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd 

class DemoGUIScikitFeature(QMainWindow):   
    def __init__(self):       
        QMainWindow.__init__(self)
        loadUi("gui_scikit_feature.ui",self)

        self.setWindowTitle("GUI Demo of Feature Extraction Using Scikit-Learn")
        self.addToolBar(NavigationToolbar(self.widgetVariance.canvas, self))
        self.set_state(False)

    def set_state(self,state):
        self.gbNNParam.setEnabled(state)
        self.listAlgorithm.setEnabled(state)
        self.sbDepth.setEnabled(state)
        self.sbNeighbor.setEnabled(state)
        self.cboFeature.setEnabled(state)       
                     
if __name__ == '__main__':
    import sys
    app = QApplication(sys.argv)
    ex = DemoGUIScikitFeature()
    ex.show()
    sys.exit(app.exec_())

Step 4: Run Scikit_Classifier_Feature.py and see that some widgets are initially disabled as shown in Figure below.

Step 5: Define load_data_ratio() function to read data as a function of data ratio as follows

def load_data_ratio(self,ratio):   
    #Loads wine data
    self.df_wine = \
        pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data',header=None)
    self.X, self.y = self.df_wine.iloc[:, 1:].values, \
        self.df_wine.iloc[:, 0].values
    self.X_train, self.X_test, self.y_train, self.y_test = \
        train_test_split(self.X, self.y, test_size=ratio, \
        stratify=self.y, random_state=0)

    #Standardizes the features
    sc = StandardScaler()
    self.X_train_std = sc.fit_transform(self.X_train)
    self.X_test_std = sc.transform(self.X_test)

Step 6: Define display_table() function to display data on tableData widget

def display_table(self,df):
    # show data on table widget
    self.write_df_to_qtable(df,self.tableData)
        
    styleH = "::section {""background-color: cyan; }"
    self.tableData.horizontalHeader().setStyleSheet(styleH)

    styleV = "::section {""background-color: red; }"
    self.tableData.verticalHeader().setStyleSheet(styleV)  

# Takes a df and writes it to a qtable provided.
@staticmethod def write_df_to_qtable(df,table):
    table.setRowCount(df.shape[0])
    table.setColumnCount(df.shape[1])       

    # convert it to array first
    df_array = df.values
    for row in range(df.shape[0]):
        for col in range(df.shape[1]):
            table.setItem(row, col, \
                QTableWidgetItem(str(df_array[row,col])))

Step 7: Define load_data() function to read the value of dsbRatio widget, to invoke load_data_ratio() and display_table() functions, and to enable all parameters widgets as follows:

def load_data(self):
    ratio = self.dsbRatio.value()
    self.load_data_ratio(ratio)
        
    #Displays data on table
    self.display_table(self.df_wine)

    #Enables all parameter widgets
    self.set_state(True)
        
    #Disables pbLoad
    self.pbLoad.setEnabled(False)

Step 8: Connect clicked() signal of pbLoad widget with load_data() function and put it inside __init__() method

def __init__(self):       
    QMainWindow.__init__(self)
    loadUi("gui_scikit_feature.ui",self)

    self.setWindowTitle("GUI Demo of Feature Extraction Using Scikit-Learn")
    self.addToolBar(NavigationToolbar(self.widgetVariance.canvas, self))
    self.set_state(False)
    self.pbLoad.clicked.connect(self.load_data)

Step 9: Run Scikit_Classifier_Feature.py and see that data now is displayed on table and all parameter widgets are enabled as shown in Figure below.

Step 10: Now, you define draw_exp_var() function to draw explained variance ratio.

#Displays explained variance ratio
def draw_exp_var(self,ylabelStr,xlabelStr,axisWidget):
    #Computes eigenpairs of covariance matrix
    cov_mat = np.cov(self.X_train_std.T)
    eigen_vals, eigen_vecs = np.linalg.eig(cov_mat)

    #Computes total and explained variance
    tot = sum(eigen_vals)
    var_exp = [(i / tot) for i in sorted(eigen_vals, reverse=True)]
    cum_var_exp = np.cumsum(var_exp)

    axisWidget.axis1.clear()
    axisWidget.axis1.bar(range(1,14), var_exp, alpha=0.5,\
        align='center',label='individual explained variance')
    axisWidget.axis1.step(range(1,14), cum_var_exp, \
        where='mid',label='cumulative explained variance')
    axisWidget.axis1.set_ylabel(ylabelStr)
    axisWidget.axis1.set_xlabel(xlabelStr)
    axisWidget.axis1.legend(loc='best')
    axisWidget.draw()

Step 11: Invoke draw_exp_var() from load_data_ratio() function.

def load_data_ratio(self,ratio):   
    #Loads wine data
    self.df_wine = \
        pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data',header=None)
    self.X, self.y = self.df_wine.iloc[:, 1:].values, \
        self.df_wine.iloc[:, 0].values
    self.X_train, self.X_test, self.y_train, self.y_test = \
        train_test_split(self.X, self.y, test_size=ratio, \
        stratify=self.y, random_state=0)

    #Standardizes the features
    sc = StandardScaler()
    self.X_train_std = sc.fit_transform(self.X_train)
    self.X_test_std = sc.transform(self.X_test)

    #Draws explained variance
    ylabelStr = 'Explained variance ratio'
    xlabelStr = 'Principal component index'
    self.draw_exp_var(ylabelStr,xlabelStr,self.widgetVariance.canvas)

Step 12: Run Scikit_Classifier_Feature.py and see that explained variance ratio is now displayed on widgetVariance widget as shown in Figure below.

Step 13: Define display_decision() function to display decision regions.

def display_decision(self,X, y, classifier, axisWidget,title,resolution=0.01):
    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    
    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    axisWidget.axis1.clear()
    axisWidget.axis1.contourf(xx1, xx2, Z, alpha=0.5, cmap=cmap)
    axisWidget.axis1.set_xlim(xx1.min(), xx1.max())
    axisWidget.axis1.set_ylim(xx2.min(), xx2.max())

    # plot class samples
    for idx, cl in enumerate(np.unique(y)):
        axisWidget.axis1.scatter(x=X[y == cl, 0],
                    y=X[y == cl, 1],
                    alpha=0.8,
                    c=colors[idx],
                    marker=markers[idx],
                    label=cl,
                    edgecolor='black')
        
    axisWidget.axis1.set_xlabel('PC 1')
    axisWidget.axis1.set_ylabel('PC 2')
    axisWidget.axis1.legend(loc='lower left')
    axisWidget.axis1.set_title(title)
    axisWidget.draw()

Step 14: Define pca_feature() to PCA extractor as follows.

def pca_feature(self):
    iterNum = self.sbIter.value()
    self.dsbRate.setDecimals(5)
    learningRate = self.dsbRate.value()
    depth = self.sbDepth.value()
    neighbor = self.sbDepth.value()
    ratio = self.dsbRatio.value()
        
    self.load_data_ratio(ratio)
        
    item = self.listAlgorithm.currentItem()
    strList = item.text()

    if strList == 'Logistic Regression':
        self.sbIter.setEnabled(True)  
        self.dsbRate.setEnabled(True)  
        self.sbDepth.setEnabled(False) 
        self.sbNeighbor.setEnabled(False)
        self.dsbRatio.setEnabled(True)

        pca = PCA(n_components=2)
        lr = make_pipeline(StandardScaler(), \
            SGDClassifier('log',max_iter=iterNum,eta0=learningRate,\
            tol=1e-3))
        self.X_train_pca = pca.fit_transform(self.X_train_std)
        self.X_test_pca = pca.transform(self.X_test_std)
        lr.fit(self.X_train_pca, self.y_train)
        
        strTitle = 'LR Classifier with PCA Extractor: ' + \
            str(ratio*100) + '% Data Ratio '
        strTitle += ' and Learning Rate ' +str(learningRate)
        self.display_decision(self.X_train_pca, self.y_train, \
           classifier=lr,axisWidget=self.widgetDecision.canvas,\
           title=strTitle)

Step 15: Define choose_feature() to read current text of cboFeature widget and perform feature extraction accordingly.

def choose_feature(self): 
    strCB = self.cboFeature.currentText()

    if strCB == 'Principal Component Analysis (PCA)':
        self.pca_feature()

Step 16: In __init__() method, set second row of listAlgorithm widget to be selected row, connect clicked() signal of listAlgorithm to choose_feature() function, and connect currentIndexChanged() of cboFeature to choose_feature() function.

In addition, connect valueChanged() signal of sbIter, dsbRatio, dsbRate, sbDepth, and sbNeighbor to choose_feature() function.

def __init__(self):       
    QMainWindow.__init__(self)
    loadUi("gui_scikit_feature.ui",self)

    self.setWindowTitle("GUI Demo of Feature Extraction Using Scikit-Learn")
    self.addToolBar(NavigationToolbar(self.widgetVariance.canvas, self))
    self.set_state(False)
    self.pbLoad.clicked.connect(self.load_data)

    self.listAlgorithm.setCurrentRow(1)
    self.listAlgorithm.clicked.connect(self.choose_feature)
    self.cboFeature.currentIndexChanged.connect(self.choose_feature)
    self.sbIter.valueChanged.connect(self.choose_feature)
    self.dsbRatio.valueChanged.connect(self.choose_feature)
    self.dsbRate.valueChanged.connect(self.choose_feature)
    self.sbDepth.valueChanged.connect(self.choose_feature)
    self.sbNeighbor.valueChanged.connect(self.choose_feature)

Step 17: Run Scikit_Classifier_Feature.py and see that decision regions of LR classifier with PCA extractor as shown in Figure below.

Change data ratio to 0.5 and learning rate 0.1, you will see the decision regions as shown in Figure below.

Step 18: Define accuracy_LR() function to calculate classification accuracy of logistic regression (LR):

def accuracy_LR(self,dataRatio,lRate):     
    pca = PCA(n_components=2)
    lr = make_pipeline(StandardScaler(), \
        SGDClassifier('log',max_iter=1000,eta0=lRate, tol=1e-3))
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    lr.fit(self.X_train_pca, self.y_train)
        
    #Makes prediction
    y_pred = lr.predict(self.X_test_pca)

    #Calculates classification accuracy 
    acc = round(100*accuracy_score(self.y_test, y_pred),1)
    return acc

Step 19: Define graph_LR() function to draw classification accuracy as a function of data ratio and learning rate.

def graph_LR(self,axisWidget,func): 
    ratio = self.dsbRatio.value()
    learningRate = self.dsbRate.value()
        
    if (ratio+0.4) < 1 :
        rangeDR = [ratio,ratio+0.1,ratio+0.2,ratio+0.3,ratio+0.4]
    else :
        rangeDR = [ratio-0.4,ratio-0.3,ratio-0.2,ratio-0.1,ratio]     

    labels = [str(round(rangeDR[0],2)), str(round(rangeDR[1],2)), \
              str(round(rangeDR[2],2)), str(round(rangeDR[3],2)), \
              str(round(rangeDR[4],2))]
               
    LR01 = []
    for i in rangeDR:
        acc = func(i,learningRate)
        LR01.append(acc)   

    LR001 = []
    for i in rangeDR:
        acc = func(i,learningRate+0.1)
        LR001.append(acc)  
            
    LR0001 = []
    for i in rangeDR:
        acc = func(i,learningRate+0.25)
        LR0001.append(acc)       
            
    x = np.arange(len(labels))  # the label locations
    width = 0.3  # the width of the bars
        
    strLabel1 = 'LR=' + str(round(learningRate, 2))
    strLabel2 = 'LR=' + str(round(learningRate+0.1, 2))
    strLabel3 = 'LR=' + str(round(learningRate+0.25, 2))
    axisWidget.axis1.clear()
    rects1 = axisWidget.axis1.bar(x - width/2, LR01, width, \
        label=strLabel1)
    rects2 = axisWidget.axis1.bar(x + width/2, LR001, width, \
        label=strLabel2)
    rects3 = axisWidget.axis1.bar(x + 3*width/2, LR0001, width, \
        label=strLabel3)

    # Add some text for labels, title and custom x-axis tick labels, etc.
    axisWidget.axis1.set_ylabel('Accuracy(%)')
    axisWidget.axis1.set_xlabel('Data Ratio (DR)')
    axisWidget.axis1.set_title(\
        'Accuracy by data ratio (DR) and learning rate (LR)')
    axisWidget.axis1.set_xticks(x)
    axisWidget.axis1.set_xticklabels(labels)
    axisWidget.axis1.legend()
    axisWidget.axis1.set_facecolor('xkcd:light yellow')
        
    self.autolabel(rects1,axisWidget.axis1)
    self.autolabel(rects2,axisWidget.axis1)
   self.autolabel(rects3,axisWidget.axis1)
    axisWidget.draw()      

def autolabel(self,rects,axisWidget):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        axisWidget.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')

Step 20: Modify pca_feature() function to invoke graph_LR().

def pca_feature(self):
    iterNum = self.sbIter.value()
    self.dsbRate.setDecimals(5)
    learningRate = self.dsbRate.value()
    depth = self.sbDepth.value()
    neighbor = self.sbDepth.value()
    ratio = self.dsbRatio.value()
        
    self.load_data_ratio(ratio)
        
    item = self.listAlgorithm.currentItem()
    strList = item.text()

    if strList == 'Logistic Regression':
        pca = PCA(n_components=2)
        lr = make_pipeline(StandardScaler(), \
            SGDClassifier('log',max_iter=iterNum,eta0=learningRate,\
            tol=1e-3))
        self.X_train_pca = pca.fit_transform(self.X_train_std)
        self.X_test_pca = pca.transform(self.X_test_std)
        lr.fit(self.X_train_pca, self.y_train)
        
        strTitle = 'LR Classifier with PCA Extractor: ' + \
            str(ratio*100) + '% Data Ratio '
        strTitle += ' and Learning Rate ' +str(learningRate)
        self.display_decision(self.X_train_pca, self.y_train, \
           classifier=lr,axisWidget=self.widgetDecision.canvas,\
           title=strTitle)

        #display accuracy graph
        self.graph_LR(self.widgetEpoch.canvas, self.accuracy_LR)

Step 21: Run Scikit_Classifier_Feature.py and choose Principal Component Analysis (PCA) from combo box, you will see that classification accuracy of LR classifier using PCA extractor with data ratio = 0.3 and learning rate = 0.0001 as shown in Figure below.