Thursday, February 11, 2021

Learn From Scratch Neural Networks Using PyQt: Part 10

This content is powered by Balige Publishing. Visit this link (collaboration with Rismon Hasiholan Sianipar) PART 1  PART 2  PART 3  PART 4  PART 5  PART 6  PART 7 PART 8 PART 9

Tutorial Steps To Implement Principal Component Analysis (PCA) with Perceptron, Support Vector Machine (SVM), Decision Tree (DT), Random Forest (RF), K-Nearest Neighbor (KNN) classifiers Using Scikit-Learn with PyQt

Step 22: Now, you will implement perceptron classifier with PCA feature extractor. Add this following code to the end of pca_feature() function.

if strList == 'Perceptron':
    self.sbIter.setEnabled(True)  
    self.dsbRate.setEnabled(True)  
    self.sbDepth.setEnabled(False) 
    self.sbNeighbor.setEnabled(False)
    self.dsbRatio.setEnabled(True)
            
    #Trains perceptron
    pca = PCA(n_components=2)
    ppn = Perceptron(max_iter=iterNum, eta0=learningRate, \
        random_state=1)
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    ppn.fit(self.X_train_pca, self.y_train)       
        
    strTitle = 'Perceptron Classifier with PCA Extractor: ' + \
        str(ratio*100) + '% (DR) '
    strTitle += ' and ' +str(learningRate) + ' (LR)'
            
    self.display_decision(self.X_train_pca, self.y_train, \
        classifier=ppn,axisWidget=self.widgetDecision.canvas,\
        title=strTitle)
        
    #display graph
    self.graph_LR(self.widgetEpoch.canvas, self.accuracy_PPN)

Step 23: Define accuracy_PPN() to calculate perceptron classifier accuracy.

def accuracy_PPN(self,dataRatio,lRate):     
    pca = PCA(n_components=2)
    ppn = Perceptron(max_iter=1000, eta0=lRate, random_state=1)
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    ppn.fit(self.X_train_pca, self.y_train)
        
    #Makes prediction
    y_pred = ppn.predict(self.X_test_pca)

    #Calculates classification accuracy 
    acc = round(100*accuracy_score(self.y_test, y_pred),1)

Step 24: Run Scikit_Classifier_Feature.py and choose Perceptron from combo box, you will see that classification accuracy of perceptron classifier using PCA extractor with data ratio = 0.3 and learning rate = 0.0001 as shown in Figure below.


Change data ratio to 0.4 and learning rate 0.01, you will see the decision regions as shown in Figure below.


Step 25: Now, you will implement Support Vector Machine (SVM) classifier with PCA feature extractor. Add this following code to the end of pca_feature() function:

if strList == 'Support Vector Machine (SVM)':
    self.sbIter.setEnabled(True)  
    self.dsbRate.setEnabled(True)  
    self.sbDepth.setEnabled(False) 
    self.sbNeighbor.setEnabled(False)
    self.dsbRatio.setEnabled(True)

    #Trains SVM model
    pca = PCA(n_components=2)
    svm = make_pipeline(StandardScaler(), \
        SGDClassifier('hinge',max_iter=iterNum,\
        eta0=learningRate, tol=1e-3))
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    svm.fit(self.X_train_pca, self.y_train)             
            
    strTitle = 'SVM Classifier with PCA Extractor:' + \
        str(ratio*100) + '% (DR) '
    strTitle += ' and ' +str(learningRate) + ' (LR)'
        
    self.display_decision(self.X_train_pca, self.y_train, \
        classifier=svm,axisWidget=self.widgetDecision.canvas,\
        title=strTitle)
                
    #display graph
    self.graph_LR(self.widgetEpoch.canvas, self.accuracy_SVM)

Step 26: Define accuracy_SVM() to calculate SVM classifier accuracy.

def accuracy_SVM(self,dataRatio,lRate):     
    pca = PCA(n_components=2)
    svm = make_pipeline(StandardScaler(), \
        SGDClassifier('hinge',max_iter=1000,eta0=lRate, tol=1e-3))
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    svm.fit(self.X_train_pca, self.y_train)
        
    #Makes prediction
    y_pred = svm.predict(self.X_test_pca)

    #Calculates classification accuracy 
    acc = round(100*accuracy_score(self.y_test, y_pred),1)
    return acc 

Step 27: Run Scikit_Classifier_Feature.py and choose Support Vector Machine (SVM) from combo box, you will see that classification accuracy of SVM classifier using PCA extractor with data ratio = 0.3 and learning rate = 0.0001 as shown in Figure below.


Change data ratio to 0.4 and learning rate 0.5, you will see the decision regions as shown in Figure below.


Step 28: Now, you will implement Decision Tree (DT) classifier with PCA feature extractor. Add this following code to the end of pca_feature() function:

if strList == 'Decision Tree':
    self.dsbRatio.setEnabled(True)
    self.sbIter.setEnabled(False)  
    self.dsbRate.setEnabled(False)  
    self.sbDepth.setEnabled(True) 
    self.sbIter.setEnabled(False)
    self.sbNeighbor.setEnabled(False)
            
    #Trains Decision Tree model
    pca = PCA(n_components=2)
    tree = DecisionTreeClassifier(criterion='gini', \
        max_depth=depth,random_state=1)            
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    tree.fit(self.X_train_pca, self.y_train)              
            
    strTitle = 'DT Classifier with PCA Extractor: (DR)=' + \
        str(ratio*100) 
    strTitle += ' and Max Depth=' +str(depth)
    self.display_decision(self.X_train_pca, self.y_train, \
        classifier=tree,axisWidget=self.widgetDecision.canvas,\
        title=strTitle)
                
    #display accuracy graph
    self.graph_DT(self.widgetEpoch.canvas, self.accuracy_DT)

Step 29: Define accuracy_DT() to calculate DT classifier accuracy.

def accuracy_DT(self,ratio,depth):        
    #Trains Decision Tree model
    pca = PCA(n_components=2)
    tree = DecisionTreeClassifier(criterion='gini', \
        max_depth=depth,random_state=1)   
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    tree.fit(self.X_train_pca, self.y_train)         
                            
    #Makes prediction
    y_pred = tree.predict(self.X_test_pca)
        
    #Calculates classification accuracy 
    acc = round(100*accuracy_score(self.y_test, y_pred),1)
    return acc 

Step 30: Define graph_DT() function to draw DT classifier accuracy as a function of data ratio and max depth:

def graph_DT(self,axisWidget,func): 
    ratio = self.dsbRatio.value()
    depth = self.sbDepth.value()
        
    if (ratio+0.4) < 1 :
        rangeDR = [ratio,ratio+0.1,ratio+0.2,ratio+0.3,ratio+0.4]
    else :
       rangeDR = [ratio-0.4,ratio-0.3,ratio-0.2,ratio-0.1,ratio]     

    labels = [str(round(rangeDR[0],2)), str(round(rangeDR[1],2)), \
              str(round(rangeDR[2],2)), str(round(rangeDR[3],2)), \
              str(round(rangeDR[4],2))]
               
    Depth1 = []
    for i in rangeDR:
        acc = func(i,depth)
        Depth1.append(acc)   

    Depth2 = []
    for i in rangeDR:
        acc = func(i,depth+4)
        Depth2.append(acc)  
            
    Depth3 = []
    for i in rangeDR:
        acc = func(i,depth+4)
        Depth3.append(acc)       
            
    x = np.arange(len(labels))  # the label locations
    width = 0.3  # the width of the bars
        
    strLabel1 = 'Depth=' + str(round(depth, 2))
    strLabel2 = 'Depth=' + str(round(depth+2, 2))
    strLabel3 = 'Depth=' + str(round(depth+4, 2))
    axisWidget.axis1.clear()
    rects1 = axisWidget.axis1.bar(x - width/2, Depth1, \
        width, label=strLabel1)
    rects2 = axisWidget.axis1.bar(x + width/2, Depth2, \
        width, label=strLabel2)
    rects3 = axisWidget.axis1.bar(x + 3*width/2, Depth3, \
        width, label=strLabel3)

    # Add some text for labels, title and custom x-axis tick labels, etc.
    axisWidget.axis1.set_ylabel('Accuracy(%)')
    axisWidget.axis1.set_xlabel('Data Ratio (DR)')
    axisWidget.axis1.set_title('Accuracy by data ratio (DR) and Depth')
    axisWidget.axis1.set_xticks(x)
    axisWidget.axis1.set_xticklabels(labels)
    axisWidget.axis1.set_facecolor('xkcd:light yellow')
    axisWidget.axis1.legend()
        
    self.autolabel(rects1,axisWidget.axis1)
    self.autolabel(rects2,axisWidget.axis1)
    self.autolabel(rects3,axisWidget.axis1)
    axisWidget.draw()

Step 31: Run Scikit_Classifier_Feature.py and choose Decision Tree from combo box, you will see that classification accuracy of DT classifier using PCA extractor with data ratio = 0.3 and max depth = 4 as shown in Figure below.


Change data ratio to 0.4 and max depth = 2, you will see the decision regions as shown in Figure below.


Step 32: Now, you will implement Random Forest (RF) classifier with PCA feature extractor. Add this following code to the end of pca_feature() function:

if strList == 'Random Forest':
    self.dsbRatio.setEnabled(True)
    self.sbIter.setEnabled(False)  
    self.dsbRate.setEnabled(False)  
    self.sbDepth.setEnabled(True) 
    self.sbIter.setEnabled(False)
    self.sbNeighbor.setEnabled(False)
            
    #Trains Random Forest model
    pca = PCA(n_components=2)
    forest = RandomForestClassifier(criterion='gini', \
        n_estimators=25,max_depth=depth,random_state=1)          
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    forest.fit(self.X_train_pca, self.y_train)
            
    strTitle = 'Random Forest Classifier with (DR)=' + str(ratio*100)
    strTitle += ' and Max Depth =' +str(depth)
    self.display_decision(self.X_train_pca, self.y_train, \
        classifier=forest,axisWidget=self.widgetDecision.canvas,\
        title=strTitle)            
                
    #display accuracy graph
    self.graph_DT(self.widgetEpoch.canvas, self.accuracy_RF)

Step 33: Define accuracy_RF() to calculate RF classifier accuracy:

def accuracy_RF(self,ratio,depth):        
    #Trains Decision Tree model
    pca = PCA(n_components=2)
    forest = RandomForestClassifier(criterion='gini', \
        n_estimators=25,max_depth=depth,random_state=1)
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    forest.fit(self.X_train_pca, self.y_train)         
                            
    #Makes prediction
    y_pred = forest.predict(self.X_test_pca)
        
    #Calculates classification accuracy 
    acc = round(100*accuracy_score(self.y_test, y_pred),1)
    return acc 

Step 34: Run Scikit_Classifier_Feature.py and choose Random Forest from combo box, you will see that classification accuracy of RF classifier using PCA extractor with data ratio = 0.3 and max depth = 4 as shown in Figure below.


Change data ratio to 0.4 and max depth = 2, you will see the decision regions as shown in Figure below.


Step 35: Lastly, you will implement K-Nearest Neighbor (KNN) classifier with PCA feature extractor. Add this following code to the end of pca_feature() function:

if strList == 'Nearest Neighbor':
    self.dsbRatio.setEnabled(True)
    self.sbIter.setEnabled(False)  
    self.dsbRate.setEnabled(False)  
    self.sbDepth.setEnabled(False) 
    self.sbIter.setEnabled(False)
    self.sbNeighbor.setEnabled(True)
            
    #Trains Nearest Neighbor model
    pca = PCA(n_components=2)
    knn = KNeighborsClassifier(n_neighbors=neighbor, p=2, \
        metric='minkowski')          
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    knn.fit(self.X_train_pca, self.y_train)
            
    strTitle = 'KNN Classifier with (DR)=' + str(ratio*100)
    strTitle += ' and Neihbors =' +str(neighbor)
    self.display_decision(self.X_train_pca, self.y_train, \
        classifier=knn,axisWidget=self.widgetDecision.canvas,\
        title=strTitle)            
                
    #display accuracy graph
    self.graph_KNN(self.widgetEpoch.canvas, self.accuracy_KNN)

Step 36: Define accuracy_KNN() to calculate KNN classifier accuracy:

def accuracy_KNN(self,ratio,neighbor):    
    pca = PCA(n_components=2)
    knn = KNeighborsClassifier(n_neighbors=neighbor, p=2, \
        metric='minkowski')
    self.X_train_pca = pca.fit_transform(self.X_train_std)
    self.X_test_pca = pca.transform(self.X_test_std)
    knn.fit(self.X_train_pca, self.y_train)
        
    #Makes prediction
    y_pred = knn.predict(self.X_test_pca)

    #Calculates classification accuracy 
    acc = round(100*accuracy_score(self.y_test, y_pred),1)
    return acc 

Step 37: Define graph_KNN() function to draw KNN classifier accuracy as a function of data ratio and number of neighbors:

def graph_KNN(self,axisWidget,func): 
    ratio = self.dsbRatio.value()
    neighbor = self.sbNeighbor.value()
        
    if (ratio+0.4) < 1 :
        rangeDR = [ratio,ratio+0.1,ratio+0.2,ratio+0.3,ratio+0.4]
    else :
       rangeDR = [ratio-0.4,ratio-0.3,ratio-0.2,ratio-0.1,ratio]     

    labels = [str(round(rangeDR[0],2)), str(round(rangeDR[1],2)), \
              str(round(rangeDR[2],2)), str(round(rangeDR[3],2)), \
              str(round(rangeDR[4],2))]
               
    Neighbor1 = []
    for i in rangeDR:
        acc = func(i,neighbor)
        Neighbor1.append(acc)   

    Neighbor2 = []
    for i in rangeDR:
        acc = func(i,neighbor+2)
        Neighbor2.append(acc)  
            
    Neighbor3 = []
    for i in rangeDR:
        acc = func(i,neighbor+3)
        Neighbor3.append(acc)       
            
    x = np.arange(len(labels))  # the label locations
    width = 0.3  # the width of the bars
        
    strLabel1 = 'Neighbor=' + str(round(neighbor, 2))
    strLabel2 = 'Neighbor=' + str(round(neighbor+2, 2))
    strLabel3 = 'Neighbor=' + str(round(neighbor+3, 2))
    axisWidget.axis1.clear()
    rects1 = axisWidget.axis1.bar(x - width/2, Neighbor1, \
        width, label=strLabel1)
    rects2 = axisWidget.axis1.bar(x + width/2, Neighbor2, \
        width, label=strLabel2)
    rects3 = axisWidget.axis1.bar(x + 3*width/2, Neighbor3, \
        width, label=strLabel3)

    # Add some text for labels, title and custom x-axis tick labels, etc.
    axisWidget.axis1.set_ylabel('Accuracy(%)')
    axisWidget.axis1.set_xlabel('Data Ratio (DR)')
    axisWidget.axis1.set_title(\
        'Accuracy by data ratio (DR) and Number of Neighbors')
    axisWidget.axis1.set_xticks(x)
    axisWidget.axis1.set_xticklabels(labels)
    axisWidget.axis1.legend()
        
    self.autolabel(rects1,axisWidget.axis1)
    self.autolabel(rects2,axisWidget.axis1)
    self.autolabel(rects3,axisWidget.axis1)
    axisWidget.draw()

Step 38: Run Scikit_Classifier_Feature.py and choose Nearest Neighbor from combo box, you will see that classification accuracy of KNN classifier using PCA extractor with data ratio = 0.3 and number of neighbors = 5 as shown in Figure below.


Change data ratio to 0.4 and number of neighbors = 7, you will see the decision regions as shown in Figure below.



Below is the full script of Scikit_Classifier_Feature.py so far:

#Scikit_Classifier_Feature.py
from PyQt5.QtWidgets import *
from PyQt5.uic import loadUi
from matplotlib.backends.backend_qt5agg import (NavigationToolbar2QT as NavigationToolbar)
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd 

class DemoGUIScikitFeature(QMainWindow):   
    def __init__(self):       
        QMainWindow.__init__(self)
        loadUi("gui_scikit_feature.ui",self)

        self.setWindowTitle("GUI Demo of Feature Extraction Using Scikit-Learn")
        self.addToolBar(NavigationToolbar(self.widgetVariance.canvas, self))
        self.set_state(False)
        self.pbLoad.clicked.connect(self.load_data)
        self.listAlgorithm.setCurrentRow(1)
        self.listAlgorithm.clicked.connect(self.choose_feature)
        self.cboFeature.currentIndexChanged.connect(self.choose_feature)
        self.sbIter.valueChanged.connect(self.choose_feature)
        self.dsbRatio.valueChanged.connect(self.choose_feature)
        self.dsbRate.valueChanged.connect(self.choose_feature)
        self.sbDepth.valueChanged.connect(self.choose_feature)
        self.sbNeighbor.valueChanged.connect(self.choose_feature)

    def set_state(self,state):
        self.gbNNParam.setEnabled(state)
        self.listAlgorithm.setEnabled(state)
        self.sbDepth.setEnabled(state)
        self.sbNeighbor.setEnabled(state)
        self.cboFeature.setEnabled(state)

    def load_data(self):
        ratio = self.dsbRatio.value()
        self.load_data_ratio(ratio)
        
        #Displays data on table
        self.display_table(self.df_wine)
        
        #Enables all parameter widgets
        self.set_state(True)
        
        #Disables pbLoad
        self.pbLoad.setEnabled(False)
    
    def load_data_ratio(self,ratio):   
        #Loads wine data
        self.df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data',header=None)
        self.X, self.y = self.df_wine.iloc[:, 1:].values, \
            self.df_wine.iloc[:, 0].values
        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(self.X, self.y, test_size=ratio, \
            stratify=self.y, random_state=0)

        #Standardizes the features
        sc = StandardScaler()
        self.X_train_std = sc.fit_transform(self.X_train)
        self.X_test_std = sc.transform(self.X_test)
        
        #Draws explained variance
        ylabelStr = 'Explained variance ratio'
        xlabelStr = 'Principal component index'
        self.draw_exp_var(ylabelStr,xlabelStr,self.widgetVariance.canvas)

    def display_table(self,df):
        # show data on table widget
        self.write_df_to_qtable(df,self.tableData)
        
        styleH = "::section {""background-color: cyan; }"
        self.tableData.horizontalHeader().setStyleSheet(styleH)

        styleV = "::section {""background-color: red; }"
        self.tableData.verticalHeader().setStyleSheet(styleV)  

    # Takes a df and writes it to a qtable provided. df headers become qtable headers
    @staticmethod
    def write_df_to_qtable(df,table):
        table.setRowCount(df.shape[0])
        table.setColumnCount(df.shape[1])       

        # getting data from df is computationally costly so convert it to array first
        df_array = df.values
        for row in range(df.shape[0]):
            for col in range(df.shape[1]):
                table.setItem(row, col, \
                    QTableWidgetItem(str(df_array[row,col]))) 

    #Displays explained variance ratio
    def draw_exp_var(self,ylabelStr,xlabelStr,axisWidget):
        #Computes eigenpairs of covariance matrix
        cov_mat = np.cov(self.X_train_std.T)
        eigen_vals, eigen_vecs = np.linalg.eig(cov_mat)

        #Computes total and explained variance
        tot = sum(eigen_vals)
        var_exp = [(i / tot) for i in sorted(eigen_vals, reverse=True)]
        cum_var_exp = np.cumsum(var_exp)

        axisWidget.axis1.clear()
        axisWidget.axis1.bar(range(1,14), var_exp, alpha=0.5, \
            align='center',label='individual explained variance')
        axisWidget.axis1.step(range(1,14), cum_var_exp, \
            where='mid',label='cumulative explained variance')
        axisWidget.axis1.set_ylabel(ylabelStr)
        axisWidget.axis1.set_xlabel(xlabelStr)
        axisWidget.axis1.legend(loc='best')
        axisWidget.draw()

    def choose_feature(self): 
        strCB = self.cboFeature.currentText()
        if strCB == 'Principal Component Analysis (PCA)':
            self.pca_feature()        

    def pca_feature(self):
        iterNum = self.sbIter.value()
        self.dsbRate.setDecimals(5)
        learningRate = self.dsbRate.value()
        depth = self.sbDepth.value()
        neighbor = self.sbDepth.value()
        ratio = self.dsbRatio.value()
        
        self.load_data_ratio(ratio)
        
        item = self.listAlgorithm.currentItem()
        strList = item.text()
        
        if strList == 'Logistic Regression':
            self.sbIter.setEnabled(True)  
            self.dsbRate.setEnabled(True)  
            self.sbDepth.setEnabled(False) 
            self.sbNeighbor.setEnabled(False)
            self.dsbRatio.setEnabled(True)
            
            pca = PCA(n_components=2)
            lr = make_pipeline(StandardScaler(), \
                SGDClassifier('log',max_iter=iterNum,eta0=learningRate, \
                tol=1e-3))
            self.X_train_pca = pca.fit_transform(self.X_train_std)
            self.X_test_pca = pca.transform(self.X_test_std)
            lr.fit(self.X_train_pca, self.y_train)
        
            strTitle = 'LR Classifier with PCA Extractor: ' + \
                str(ratio*100) + '% Data Ratio '
            strTitle += ' and Learning Rate ' +str(learningRate)
            self.display_decision(self.X_train_pca, self.y_train, \
                classifier=lr,axisWidget=self.widgetDecision.canvas,\
                title=strTitle)

            #display accuracy graph
            self.graph_LR(self.widgetEpoch.canvas, self.accuracy_LR)

        if strList == 'Perceptron':
            self.sbIter.setEnabled(True)  
            self.dsbRate.setEnabled(True)  
            self.sbDepth.setEnabled(False) 
            self.sbNeighbor.setEnabled(False)
            self.dsbRatio.setEnabled(True)
            
            #Trains perceptron
            pca = PCA(n_components=2)
            ppn = Perceptron(max_iter=iterNum, eta0=learningRate, \
                random_state=1)
            self.X_train_pca = pca.fit_transform(self.X_train_std)
            self.X_test_pca = pca.transform(self.X_test_std)
            ppn.fit(self.X_train_pca, self.y_train)       
        
            strTitle = 'Perceptron Classifier with PCA Extractor: ' + \
                str(ratio*100) + '% (DR) '
            strTitle += ' and ' +str(learningRate) + ' (LR)'
            
            self.display_decision(self.X_train_pca, self.y_train,\
                classifier=ppn,axisWidget=self.widgetDecision.canvas,\
                title=strTitle)
        
            #display graph
            self.graph_LR(self.widgetEpoch.canvas, self.accuracy_PPN)

        if strList == 'Support Vector Machine (SVM)':
            self.sbIter.setEnabled(True)  
            self.dsbRate.setEnabled(True)  
            self.sbDepth.setEnabled(False) 
            self.sbNeighbor.setEnabled(False)
            self.dsbRatio.setEnabled(True)
            
            #Trains SVM model
            pca = PCA(n_components=2)
            svm = make_pipeline(StandardScaler(), \
                SGDClassifier('hinge',max_iter=iterNum,eta0=learningRate, \
                tol=1e-3))
            self.X_train_pca = pca.fit_transform(self.X_train_std)
            self.X_test_pca = pca.transform(self.X_test_std)
            svm.fit(self.X_train_pca, self.y_train)             
            
            strTitle = 'SVM Classifier with PCA Extractor:' + str(ratio*100) + '% (DR) '
            strTitle += ' and ' +str(learningRate) + ' (LR)'
        
            self.display_decision(self.X_train_pca, self.y_train, \
                classifier=svm,axisWidget=self.widgetDecision.canvas,\
                title=strTitle)
                
            #display graph
            self.graph_LR(self.widgetEpoch.canvas, self.accuracy_SVM)        

        if strList == 'Decision Tree':
            self.dsbRatio.setEnabled(True)
            self.sbIter.setEnabled(False)  
            self.dsbRate.setEnabled(False)  
            self.sbDepth.setEnabled(True) 
            self.sbIter.setEnabled(False)
            self.sbNeighbor.setEnabled(False)
            
            #Trains Decision Tree model
            pca = PCA(n_components=2)
            tree = DecisionTreeClassifier(criterion='gini', \
                max_depth=depth,random_state=1)            
            self.X_train_pca = pca.fit_transform(self.X_train_std)
            self.X_test_pca = pca.transform(self.X_test_std)
            tree.fit(self.X_train_pca, self.y_train)              
            
            strTitle = 'DT Classifier with PCA Extractor: (DR)=' + \
                str(ratio*100) 
            strTitle += ' and Max Depth=' +str(depth)
            self.display_decision(self.X_train_pca, self.y_train, \
                classifier=tree,axisWidget=self.widgetDecision.canvas,\
                title=strTitle)
                
            #display accuracy graph
            self.graph_DT(self.widgetEpoch.canvas, self.accuracy_DT)
 
        if strList == 'Random Forest':
            self.dsbRatio.setEnabled(True)
            self.sbIter.setEnabled(False)  
            self.dsbRate.setEnabled(False)  
            self.sbDepth.setEnabled(True) 
            self.sbIter.setEnabled(False)
            self.sbNeighbor.setEnabled(False)
            
            #Trains Random Forest model
            pca = PCA(n_components=2)
            forest = RandomForestClassifier(criterion='gini', \
                n_estimators=25,max_depth=depth,random_state=1)          
            self.X_train_pca = pca.fit_transform(self.X_train_std)
            self.X_test_pca = pca.transform(self.X_test_std)
            forest.fit(self.X_train_pca, self.y_train)
            
            strTitle = 'Random Forest Classifier with (DR)=' + str(ratio*100)
            strTitle += ' and Max Depth =' +str(depth)
            self.display_decision(self.X_train_pca, self.y_train, \
                classifier=forest,axisWidget=self.widgetDecision.canvas,\
                title=strTitle)            
                
            #display accuracy graph
            self.graph_DT(self.widgetEpoch.canvas, self.accuracy_RF)

        if strList == 'Nearest Neighbor':
            self.dsbRatio.setEnabled(True)
            self.sbIter.setEnabled(False)  
            self.dsbRate.setEnabled(False)  
            self.sbDepth.setEnabled(False) 
            self.sbIter.setEnabled(False)
            self.sbNeighbor.setEnabled(True)
            
            #Trains Nearest Neighbor model
            pca = PCA(n_components=2)
            knn = KNeighborsClassifier(n_neighbors=neighbor, p=2, \
                metric='minkowski')          
            self.X_train_pca = pca.fit_transform(self.X_train_std)
            self.X_test_pca = pca.transform(self.X_test_std)
            knn.fit(self.X_train_pca, self.y_train)
            
            strTitle = 'KNN Classifier with (DR)=' + str(ratio*100)
            strTitle += ' and Neihbors =' +str(neighbor)
            self.display_decision(self.X_train_pca, self.y_train, \
                classifier=knn,axisWidget=self.widgetDecision.canvas,\
                title=strTitle)            
                
            #display accuracy graph
            self.graph_KNN(self.widgetEpoch.canvas, self.accuracy_KNN)
            
    def display_decision(self,X, y, classifier, \
        axisWidget,title,resolution=0.01):
        # setup marker generator and color map
        markers = ('s', 'x', 'o', '^', 'v')
        colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
        cmap = ListedColormap(colors[:len(np.unique(y))])
    
        # plot the decision surface
        x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
        Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
        Z = Z.reshape(xx1.shape)
        axisWidget.axis1.clear()
        axisWidget.axis1.contourf(xx1, xx2, Z, alpha=0.5, cmap=cmap)
        axisWidget.axis1.set_xlim(xx1.min(), xx1.max())
        axisWidget.axis1.set_ylim(xx2.min(), xx2.max())

        # plot class samples
        for idx, cl in enumerate(np.unique(y)):
            axisWidget.axis1.scatter(x=X[y == cl, 0],
                    y=X[y == cl, 1],
                    alpha=0.8,
                    c=colors[idx],
                    marker=markers[idx],
                    label=cl,
                    edgecolor='black')
        
        axisWidget.axis1.set_xlabel('PC 1')
        axisWidget.axis1.set_ylabel('PC 2')
        axisWidget.axis1.legend(loc='lower left')
        axisWidget.axis1.set_title(title)
        axisWidget.draw()

    def accuracy_PPN(self,dataRatio,lRate):     
        pca = PCA(n_components=2)
        ppn = Perceptron(max_iter=1000, eta0=lRate, random_state=1)
        self.X_train_pca = pca.fit_transform(self.X_train_std)
        self.X_test_pca = pca.transform(self.X_test_std)
        ppn.fit(self.X_train_pca, self.y_train)
        
        #Makes prediction
        y_pred = ppn.predict(self.X_test_pca)

        #Calculates classification accuracy 
        acc = round(100*accuracy_score(self.y_test, y_pred),1)
        return acc 

    def accuracy_SVM(self,dataRatio,lRate):     
        pca = PCA(n_components=2)
        svm = make_pipeline(StandardScaler(), \
            SGDClassifier('hinge',max_iter=1000,eta0=lRate, tol=1e-3))
        self.X_train_pca = pca.fit_transform(self.X_train_std)
        self.X_test_pca = pca.transform(self.X_test_std)
        svm.fit(self.X_train_pca, self.y_train)
        
        #Makes prediction
        y_pred = svm.predict(self.X_test_pca)

        #Calculates classification accuracy 
        acc = round(100*accuracy_score(self.y_test, y_pred),1)
        return acc 

    def accuracy_DT(self,ratio,depth):        
        #Trains Decision Tree model
        pca = PCA(n_components=2)
        tree = DecisionTreeClassifier(criterion='gini', \
            max_depth=depth,random_state=1)   
        self.X_train_pca = pca.fit_transform(self.X_train_std)
        self.X_test_pca = pca.transform(self.X_test_std)
        tree.fit(self.X_train_pca, self.y_train)         
                            
        #Makes prediction
        y_pred = tree.predict(self.X_test_pca)
        
        #Calculates classification accuracy 
        acc = round(100*accuracy_score(self.y_test, y_pred),1)
        return acc 

    def accuracy_RF(self,ratio,depth):        
        #Trains Decision Tree model
        pca = PCA(n_components=2)
        forest = RandomForestClassifier(criterion='gini', \
            n_estimators=25,max_depth=depth,random_state=1)
        self.X_train_pca = pca.fit_transform(self.X_train_std)
        self.X_test_pca = pca.transform(self.X_test_std)
        forest.fit(self.X_train_pca, self.y_train)         
                            
        #Makes prediction
        y_pred = forest.predict(self.X_test_pca)
        
        #Calculates classification accuracy 
        acc = round(100*accuracy_score(self.y_test, y_pred),1)
        return acc 
    
    def accuracy_KNN(self,ratio,neighbor):     
        pca = PCA(n_components=2)
        knn = KNeighborsClassifier(n_neighbors=neighbor, p=2, \
            metric='minkowski')
        self.X_train_pca = pca.fit_transform(self.X_train_std)
        self.X_test_pca = pca.transform(self.X_test_std)
        knn.fit(self.X_train_pca, self.y_train)
        
        #Makes prediction
        y_pred = knn.predict(self.X_test_pca)

        #Calculates classification accuracy 
        acc = round(100*accuracy_score(self.y_test, y_pred),1)
        return acc 

    def accuracy_LR(self,ratio,lRate):        
        pca = PCA(n_components=2)
        lr = make_pipeline(StandardScaler(), \
            SGDClassifier('log',max_iter=1000,eta0=lRate, tol=1e-3))
        self.X_train_pca = pca.fit_transform(self.X_train_std)
        self.X_test_pca = pca.transform(self.X_test_std)
        lr.fit(self.X_train_pca, self.y_train)
                            
        #Makes prediction
        y_pred = lr.predict(self.X_test_pca)
        
        #Calculates classification accuracy 
        acc = round(100*accuracy_score(self.y_test, y_pred),1)
        return acc 

    def graph_KNN(self,axisWidget,func): 
        ratio = self.dsbRatio.value()
        neighbor = self.sbNeighbor.value()
        
        if (ratio+0.4) < 1 :
            rangeDR = [ratio,ratio+0.1,ratio+0.2,ratio+0.3,ratio+0.4]
        else :
           rangeDR = [ratio-0.4,ratio-0.3,ratio-0.2,ratio-0.1,ratio]     

        labels = [str(round(rangeDR[0],2)), str(round(rangeDR[1],2)), \
                  str(round(rangeDR[2],2)), str(round(rangeDR[3],2)), \
                  str(round(rangeDR[4],2))]
               
        Neighbor1 = []
        for i in rangeDR:
            acc = func(i,neighbor)
            Neighbor1.append(acc)   

        Neighbor2 = []
        for i in rangeDR:
            acc = func(i,neighbor+2)
            Neighbor2.append(acc)  
            
        Neighbor3 = []
        for i in rangeDR:
            acc = func(i,neighbor+3)
            Neighbor3.append(acc)       
            
        x = np.arange(len(labels))  # the label locations
        width = 0.3  # the width of the bars
        
        strLabel1 = 'Neighbor=' + str(round(neighbor, 2))
        strLabel2 = 'Neighbor=' + str(round(neighbor+2, 2))
        strLabel3 = 'Neighbor=' + str(round(neighbor+3, 2))
        axisWidget.axis1.clear()
        rects1 = axisWidget.axis1.bar(x - width/2, Neighbor1, \
            width, label=strLabel1)
        rects2 = axisWidget.axis1.bar(x + width/2, Neighbor2, \
            width, label=strLabel2)
        rects3 = axisWidget.axis1.bar(x + 3*width/2, Neighbor3, \
            width, label=strLabel3)

        # Add some text for labels, title and custom x-axis tick labels, etc.
        axisWidget.axis1.set_ylabel('Accuracy(%)')
        axisWidget.axis1.set_xlabel('Data Ratio (DR)')
        axisWidget.axis1.set_title('Accuracy by data ratio (DR) and Number of Neighbors')
        axisWidget.axis1.set_xticks(x)
        axisWidget.axis1.set_xticklabels(labels)
        axisWidget.axis1.legend()
        
        self.autolabel(rects1,axisWidget.axis1)
        self.autolabel(rects2,axisWidget.axis1)
        self.autolabel(rects3,axisWidget.axis1)
        axisWidget.draw()
        
    def graph_DT(self,axisWidget,func): 
        ratio = self.dsbRatio.value()
        depth = self.sbDepth.value()
        
        if (ratio+0.4) < 1 :
            rangeDR = [ratio,ratio+0.1,ratio+0.2,ratio+0.3,ratio+0.4]
        else :
           rangeDR = [ratio-0.4,ratio-0.3,ratio-0.2,ratio-0.1,ratio]     

        labels = [str(round(rangeDR[0],2)), str(round(rangeDR[1],2)), \
                  str(round(rangeDR[2],2)), str(round(rangeDR[3],2)), \
                  str(round(rangeDR[4],2))]
               
        Depth1 = []
        for i in rangeDR:
            acc = func(i,depth)
            Depth1.append(acc)   

        Depth2 = []
        for i in rangeDR:
            acc = func(i,depth+4)
            Depth2.append(acc)  
            
        Depth3 = []
        for i in rangeDR:
            acc = func(i,depth+4)
            Depth3.append(acc)       
            
        x = np.arange(len(labels))  # the label locations
        width = 0.3  # the width of the bars
        
        strLabel1 = 'Depth=' + str(round(depth, 2))
        strLabel2 = 'Depth=' + str(round(depth+2, 2))
        strLabel3 = 'Depth=' + str(round(depth+4, 2))
        axisWidget.axis1.clear()
        rects1 = axisWidget.axis1.bar(x - width/2, Depth1, \
            width, label=strLabel1)
        rects2 = axisWidget.axis1.bar(x + width/2, Depth2, \
            width, label=strLabel2)
        rects3 = axisWidget.axis1.bar(x + 3*width/2, Depth3, \
            width, label=strLabel3)

        # Add some text for labels, title and custom x-axis tick labels, etc.
        axisWidget.axis1.set_ylabel('Accuracy(%)')
        axisWidget.axis1.set_xlabel('Data Ratio (DR)')
        axisWidget.axis1.set_title('Accuracy by data ratio (DR) and Depth')
        axisWidget.axis1.set_xticks(x)
        axisWidget.axis1.set_xticklabels(labels)
        axisWidget.axis1.legend()
        axisWidget.axis1.set_facecolor('xkcd:light yellow')
        
        self.autolabel(rects1,axisWidget.axis1)
        self.autolabel(rects2,axisWidget.axis1)
        self.autolabel(rects3,axisWidget.axis1)
        axisWidget.draw()
        
    def graph_LR(self,axisWidget,func): 
        ratio = self.dsbRatio.value()
        learningRate = self.dsbRate.value()
        
        if (ratio+0.4) < 1 :
            rangeDR = [ratio,ratio+0.1,ratio+0.2,ratio+0.3,ratio+0.4]
        else :
            rangeDR = [ratio-0.4,ratio-0.3,ratio-0.2,ratio-0.1,ratio]     

        labels = [str(round(rangeDR[0],2)), str(round(rangeDR[1],2)), \
                  str(round(rangeDR[2],2)), str(round(rangeDR[3],2)), \
                  str(round(rangeDR[4],2))]
               
        LR01 = []
        for i in rangeDR:
            acc = func(i,learningRate)
            LR01.append(acc)   

        LR001 = []
        for i in rangeDR:
            acc = func(i,learningRate+0.1)
            LR001.append(acc)  
            
        LR0001 = []
        for i in rangeDR:
            acc = func(i,learningRate+0.25)
            LR0001.append(acc)       
            
        x = np.arange(len(labels))  # the label locations
        width = 0.3  # the width of the bars
        
        strLabel1 = 'LR=' + str(round(learningRate, 2))
        strLabel2 = 'LR=' + str(round(learningRate+0.1, 2))
        strLabel3 = 'LR=' + str(round(learningRate+0.25, 2))
        axisWidget.axis1.clear()
        rects1 = axisWidget.axis1.bar(x - width/2, LR01, \
            width, label=strLabel1)
        rects2 = axisWidget.axis1.bar(x + width/2, LR001, \
            width, label=strLabel2)
        rects3 = axisWidget.axis1.bar(x + 3*width/2, LR0001, \
            width, label=strLabel3)

        # Add some text for labels, title and custom x-axis tick labels, etc.
        axisWidget.axis1.set_ylabel('Accuracy(%)')
        axisWidget.axis1.set_xlabel('Data Ratio (DR)')
        axisWidget.axis1.set_title('Accuracy by data ratio (DR) and learning rate (LR)')
        axisWidget.axis1.set_xticks(x)
        axisWidget.axis1.set_xticklabels(labels)
        axisWidget.axis1.legend()
        axisWidget.axis1.set_facecolor('xkcd:light yellow')
        
        self.autolabel(rects1,axisWidget.axis1)
        self.autolabel(rects2,axisWidget.axis1)
        self.autolabel(rects3,axisWidget.axis1)
        axisWidget.draw()      

    def autolabel(self,rects,axisWidget):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = rect.get_height()
            axisWidget.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')
              
if __name__ == '__main__':
    import sys
    app = QApplication(sys.argv)
    ex = DemoGUIScikitFeature()
    ex.show()
    sys.exit(app.exec_())


Learn From Scratch Neural Networks Using PyQt: Part 11




No comments:

Post a Comment