Monday, August 21, 2023

HOTEL REVIEW: SENTIMENT ANALYSIS USING MACHINE LEARNING AND DEEP LEARNING WITH PYTHON GUI --- SECOND EDITION (VIVIAN SIAHAAN)

 Dataset

Googleplay Book

Amazon Kindle

Amazon Paperback

Kobo Store



Long Short-Term Memory (LSTM) Model

The model architecture includes an Embedding layer, a 1D Convolutional layer, MaxPooling layer, Bidirectional LSTM layer, Dropout layer, and a Dense layer. Here's a breakdown of the components and layers in the model:

Embedding Layer: This layer is responsible for converting integer-encoded vocabulary indices into dense vectors of fixed size (embedding_size). The layer is used to learn and represent the semantic relationships between words in the input sequences.

Conv1D Layer: This layer performs 1D convolution over the embedded sequences. It uses 32 filters with a kernel size of 3 and 'same' padding, followed by a ReLU activation function. This can help the model capture local patterns and features in the sequences.

MaxPooling1D Layer: This layer performs max pooling on the output of the Conv1D layer, reducing the spatial dimensions and retaining important features. It uses a pooling window of size 2.

Bidirectional LSTM Layer: This layer contains a Bidirectional LSTM with 32 units. The bidirectional nature of the LSTM allows it to capture information from both past and future context, which can be especially useful for sequence modeling tasks.

Dropout Layer: Dropout is added after the LSTM layer to prevent overfitting. A dropout rate of 0.4 means that during training, 40% of the LSTM units' outputs will be randomly set to zero, which helps in preventing overfitting by introducing some level of regularization.

Dense Layer: This layer has a single neuron with a sigmoid activation function, which makes it suitable for binary classification tasks. It produces a probability output indicating the likelihood of the input belonging to one of the classes.

The model is compiled with a binary cross-entropy loss function and the Adam optimizer. It uses accuracy as the evaluation metric. The training process involves using EarlyStopping with a patience of 5, which means training will stop if the validation loss does not improve for 5 consecutive epochs. The training history, including loss and accuracy values, is saved to 'hotel_history_lstm.npy', and the trained model is saved to 'hotel_lstm.h5'.


#hotel_DL.py
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O 
import os
import cv2
import pandas as pd
import seaborn as sns
import re
import string 
import joblib
from os import path
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
from tensorflow.keras.callbacks import EarlyStopping
from keras.models import Sequential
from sklearn.cluster import KMeans
from minisom import MiniSom
from tensorflow.keras.utils import to_categorical
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from tensorflow.python.keras import models, layers
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer, HashingVectorizer
from keras.layers import Dense, LSTM, Bidirectional,Embedding, Dropout, Conv1D, MaxPooling1D

def read_dataset():    
    #Reads dataset
    curr_path = os.getcwd() 
    df = pd.read_csv(curr_path+"/train.csv")
    
    #Drops User_ID
    df = df.drop("User_ID", axis = 1)
    
    return df

#Converts Is_Response to {0,1}
def convert_response(Is_Response):
    if  Is_Response == "not happy":
        return 0
    elif Is_Response == "happy":
        return 1
    
def preprocessing(df):
    #Replaces duplicate browser names
    df['Browser_Used'] = df['Browser_Used'].replace(['InternetExplorer'],'Internet Explorer')
    df['Browser_Used'] = df['Browser_Used'].replace(['IE'],'Internet Explorer')
    df['Browser_Used'] = df['Browser_Used'].replace(['Mozilla'],'Mozilla Firefox')
    df['Browser_Used'] = df['Browser_Used'].replace(['Firefox'],'Mozilla Firefox')
    df['Browser_Used'] = df['Browser_Used'].replace(['Chrome'],'Google Chrome')

    df['Is_Response'] = df['Is_Response'] .apply(lambda x : convert_response(x))

    #Combines browser used and device used with description
    df['final_text'] = df['Browser_Used'].fillna('') + ' ' + df['Device_Used'].fillna('') + ' ' + df['Description'] 

    return df

#Removes stop words
def remove_stopwords(text):
    text = ' '.join([word for word in text.split() if word not in (stopwords.words('english'))])
    return text

# Remove url  
def remove_url(text):
    url = re.compile(r'https?://\S+|www\.\S+')
    return url.sub(r'',text)

# Remove punct
def remove_punct(text):
    table = str.maketrans('', '', string.punctuation)
    return text.translate(table)

# Remove html 
def remove_html(text):
    html=re.compile(r'<.*?>')
    return html.sub(r'',text)

# Remove @username
def remove_username(text):
    return re.sub('@[^\s]+','',text)

# Remove emojis
def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

# Decontraction text
def decontraction(text):
    text = re.sub(r"won\'t", " will not", text)
    text = re.sub(r"won\'t've", " will not have", text)
    text = re.sub(r"can\'t", " can not", text)
    text = re.sub(r"don\'t", " do not", text)
    
    text = re.sub(r"can\'t've", " can not have", text)
    text = re.sub(r"ma\'am", " madam", text)
    text = re.sub(r"let\'s", " let us", text)
    text = re.sub(r"ain\'t", " am not", text)
    text = re.sub(r"shan\'t", " shall not", text)
    text = re.sub(r"sha\n't", " shall not", text)
    text = re.sub(r"o\'clock", " of the clock", text)
    text = re.sub(r"y\'all", " you all", text)
    text = re.sub(r"n\'t", " not", text)
    text = re.sub(r"n\'t've", " not have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'s", " is", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'d've", " would have", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ll've", " will have", text)
    text = re.sub(r"\'t", " not", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'m", " am", text)
    text = re.sub(r"\'re", " are", text)
    return text  

# Seperate alphanumeric
def seperate_alphanumeric(text):
    words = text
    words = re.findall(r"[^\W\d_]+|\d+", words)
    return " ".join(words)

def cont_rep_char(text):
    tchr = text.group(0) 
    
    if len(tchr) > 1:
        return tchr[0:2] 

def unique_char(rep, text):
    substitute = re.sub(r'(\w)\1+', rep, text)
    return substitute

def char(text):
    substitute = re.sub(r'[^a-zA-Z]',' ',text)
    return substitute

def clean_text(df):
    #Applies functions on tweets
    df['final_text'] = df['final_text'].apply(lambda x : remove_username(x))
    df['final_text'] = df['final_text'].apply(lambda x : remove_url(x))
    df['final_text'] = df['final_text'].apply(lambda x : remove_emoji(x))
    df['final_text'] = df['final_text'].apply(lambda x : decontraction(x))
    df['final_text'] = df['final_text'].apply(lambda x : seperate_alphanumeric(x))
    df['final_text'] = df['final_text'].apply(lambda x : unique_char(cont_rep_char,x))
    df['final_text'] = df['final_text'].apply(lambda x : char(x))
    df['final_text'] = df['final_text'].apply(lambda x : x.lower())
    df['final_text'] = df['final_text'].apply(lambda x : remove_stopwords(x))
    
    return df

def tokenize_pad_sequences(text):
    '''
    This function tokenize the input text into sequnences of intergers and then
    pad each sequence to the same length
    '''
    # Text tokenization
    max_words = 5000
    max_len = 100
    tokenizer = Tokenizer(num_words=max_words, lower=True, split=' ')
    tokenizer.fit_on_texts(text)
    # Transforms text to a sequence of integers
    X = tokenizer.texts_to_sequences(text)
    # Pad sequences to the same length
    X = pad_sequences(X, padding='post', maxlen=max_len)
    # return sequences
    return X, tokenizer

def split_dataset():  
    #Reads dataset
    df = read_dataset()
    
    #Preprocessing
    df = preprocessing(df)

    #Cleans text
    df = clean_text(df)
    
    #Extracts input and output variables
    #Tokenizes input
    X, _ = tokenize_pad_sequences(df['final_text'])    
    y=df["Is_Response"]    

    #Splits the data to get train, test, and validation data
    X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size = 0.2, random_state = 2021, stratify=y)   
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.3, random_state=42, stratify=y_train)
    
    #Saves into npy files
    joblib.dump(X_train, 'X_train.pkl')
    joblib.dump(X_test, 'X_test.pkl')
    joblib.dump(X_valid, 'X_valid.pkl')
    joblib.dump(y_train, 'y_train.pkl')
    joblib.dump(y_test, 'y_test.pkl')  
    joblib.dump(y_valid, 'y_valid.pkl')
    
    return X_train, X_test, X_valid, y_train, y_test, y_valid

def load_files(): 
    if path.isfile('X_train.pkl'):
        X_train = joblib.load('X_train.pkl')
        X_test = joblib.load('X_test.pkl')
        X_valid = joblib.load('X_valid.pkl')
        y_train = joblib.load('y_train.pkl')
        y_test = joblib.load('y_test.pkl')
        y_valid = joblib.load('y_valid.pkl')
        
    else:
        X_train, X_test, X_valid, y_train, y_test, y_valid = split_dataset()
    
    print(X_train.shape)
    return X_train, X_test, X_valid, y_train, y_test, y_valid   

def plot_confusion_matrix(y_test, y_pred, name):
    # Calculate the confusion matrix
    conf_mat = confusion_matrix(y_true=y_test, y_pred=y_pred)
    
    # Define class names for labels
    class_list = ['Not Happy', 'Happy']
    
    # Create a figure and axis for the heatmap
    fig, ax = plt.subplots(figsize=(25, 15))
    
    # Generate the heatmap
    sns.heatmap(conf_mat, annot=True, ax=ax, cmap='summer', fmt='g', annot_kws={"size": 25})
    
    # Set axis labels and title
    ax.set_xlabel('Predicted labels', fontsize=20)
    ax.set_ylabel('True labels', fontsize=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    ax.set_title('Confusion Matrix of ' + name, fontsize=30)
    
    # Set tick labels for class names
    ax.xaxis.set_ticklabels(class_list)
    ax.yaxis.set_ticklabels(class_list)
    
    # Display the heatmap
    plt.show()

def plot_real_pred_val(y_test, ypred, name):
    plt.figure(figsize=(25, 15))
    class_list = ['Not Happy', 'Happy']
    acc = accuracy_score(y_test, ypred)
    
    # Plot the predicted values in red and the true values in white
    plt.scatter(range(len(ypred)), ypred, color="red", lw=5, label="Predicted", s=300, alpha=0.9)
    plt.scatter(range(len(y_test)), y_test, color="white", label="Actual", s=300, alpha=0.9)
    
    # Set plot title, x-axis label, and legend
    plt.title("Predicted Values vs True Values of " + name, fontsize=30)
    plt.xlabel("Accuracy: " + str(round((acc * 100), 3)) + "%", fontsize=25)
    plt.legend(fontsize=25)
    
    # Set y-axis tick positions and labels based on class_list
    plt.yticks(range(len(class_list)), class_list, fontsize=15)
    plt.xticks(fontsize=15)
    
    # Add grid lines and set background color
    plt.grid(True, alpha=0.75, lw=1, ls='-.')
    plt.gca().set_facecolor('dimgray')  # Set background color
    
    # Display the plot
    plt.show()

def plot_accuracy(history, name):
    acc = history['accuracy']
    val_acc = history['val_accuracy']
    epochs = range(1, len(acc) + 1)

    # Create a figure and axis for the plot
    fig, ax = plt.subplots(figsize=(25, 15))
    
    # Plot training accuracy in red and validation accuracy in blue dashed line
    plt.plot(epochs, acc, 'r', label='Training accuracy', lw=7)
    plt.plot(epochs, val_acc, 'b--', label='Validation accuracy', lw=7)
    
    # Set plot title and legend
    plt.title('Training and validation accuracy of ' + name, fontsize=35)
    plt.legend(fontsize=25)
    
    # Set x-axis label and tick label font size
    ax.set_xlabel("Epoch", fontsize=30)
    ax.tick_params(labelsize=30)
    
    # Set background color
    plt.gca().set_facecolor('lightgray')
    
    # Display the plot
    plt.show()   

def plot_loss(history, name):
    loss = history['loss']
    val_loss = history['val_loss']
    epochs = range(1, len(loss) + 1)

    # Create a figure and axis for the plot
    fig, ax = plt.subplots(figsize=(25, 15))
    
    # Plot training loss in red and validation loss in blue dashed line
    plt.plot(epochs, loss, 'r', label='Training loss', lw=7)
    plt.plot(epochs, val_loss, 'b--', label='Validation loss', lw=7)
    
    # Set plot title and legend
    plt.title('Training and validation loss of ' + name, fontsize=35)
    plt.legend(fontsize=25)
    
    # Set x-axis label and tick label font size
    ax.set_xlabel("Epoch", fontsize=30)
    ax.tick_params(labelsize=30)
    
    # Set background color
    plt.gca().set_facecolor('lightgray')
    
    # Display the plot
    plt.show()
    
def prediction(model, X_test, y_test, name):
    #Sets the threshold for the predictions. In this case, the threshold is 0.5 
    #(this value can be modified)
    #prediction on test set
    y_pred = model.predict(X_test)
    y_pred = [int(p>=0.5) for p in y_pred]
    print(y_pred)

    #Performance Evaluation - Accuracy and Classification Report
    #Accuracy Score
    print ('Accuracy Score : ' + name, accuracy_score(y_pred, y_test, normalize=True), '\n')

    #precision, recall report
    print ('Classification Report : ' + name + '\n',classification_report(y_pred, y_test)) 
    
    return y_pred    

def build_train_lstm(X_train, y_train, X_valid, y_valid):
    vocab_size = 5000
    embedding_size = 100  # embedding size
    epochs = 20  # number of epochs
    max_len = 100  # maximum sequence length
    batch_size = 64  # batch size

    model = Sequential()
    model.add(Embedding(vocab_size, embedding_size, input_length=max_len))
    model.add(Conv1D(filters=64, kernel_size=5, padding='same', activation='relu'))  # filters and kernel size
    model.add(MaxPooling1D(pool_size=2))
    model.add(Bidirectional(LSTM(128, return_sequences=True)))  # Increased LSTM units and added return_sequences
    model.add(Bidirectional(LSTM(64)))  # Added another Bidirectional LSTM layer
    model.add(Dropout(0.5))  # dropout rate
    model.add(Dense(32, activation='relu'))  # an additional dense layer
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())

    es = EarlyStopping(monitor='val_loss', patience=5)
    

    history = model.fit(X_train, y_train,
                        validation_data=(X_valid, y_valid),
                        batch_size=batch_size, epochs=epochs, verbose=1,
                        callbacks=[es])

    # Save the trained model
    model.save('hotel_lstm.h5')

    # Save training history into a npy file
    np.save('hotel_history_lstm.npy', history.history)

    # Print keys of the history dictionary
    print(history.history.keys())

def implement_lstm_model():
    #Reads files
    X_train, X_test, X_valid, y_train, y_test, y_valid = load_files()   
    
    #Builds and trains LSTM model
    build_train_lstm(X_train, y_train, X_valid, y_valid)
    
    #Loads LSTM model
    ann_model = tf.keras.models.load_model('hotel_lstm.h5')

    #Gets predicted values
    y_pred = prediction(ann_model, X_test, y_test, 'LSTM Model')
    
    #Plots confusion matrix
    plot_confusion_matrix(y_test, y_pred, 'LSTM Model')
    
    #Plots true values versus predicted values diagram
    plot_real_pred_val(y_test, y_pred, 'LSTM Model')    
    
    # Load the saved training history hepatitisc_history_ann
    history = np.load('hotel_history_lstm.npy', allow_pickle=True).item()

    #Plots accuracy
    plot_accuracy(history, 'LSTM Model')
    
    #Plots loss
    plot_loss(history, 'LSTM Model')    
    
#Implements LSTM Model    
implement_lstm_model()







Output:

Accuracy Score : LSTM Model 0.8647746243739566 

Classification Report : LSTM Model

               precision    recall  f1-score   support

           0       0.78      0.80      0.79      2419

           1       0.91      0.90      0.90      5368

    accuracy                           0.86      7787

   macro avg       0.84      0.85      0.84      7787

weighted avg       0.87      0.86      0.87      7787


Convolutional Neural Network (CNN) Model

The code outlines a Convolutional Neural Network (CNN) model designed for text classification tasks. The model is engineered to process sequences of text and make binary classification predictions. Let's delve into the details of the model structure across four paragraphs:

Input and Embedding Layer: The model begins by accepting sequences of text as inputs. These sequences are expected to have a maximum length of 100 tokens. The embedding layer then maps the input sequences to dense vector representations. In this layer, each token is transformed into a 64-dimensional vector. This embedding process captures semantic relationships among words and tokens, facilitating the model's understanding of the underlying meaning of the text.

Convolutional and Pooling Layers: After embedding, the model employs a series of Convolutional layers, known for their ability to identify local patterns and features within data. The first convolutional layer uses 64 filters with a kernel size of 3. This convolutional operation extracts distinctive features from the embedded sequences. To prevent overfitting and promote faster convergence, Batch Normalization is applied after each convolutional layer, which normalizes the activations. MaxPooling layers follow each convolutional layer. These layers downsample the feature maps, focusing on the most salient information while reducing the computational complexity. The stride of 3 is used for the first MaxPooling layer, followed by a convolutional layer with 64 filters and a kernel size of 5. Another MaxPooling layer with a stride of 5 further downsamples the data.

Global Max Pooling and Flattening: A third Convolutional layer with 64 filters and a kernel size of 5 is applied, followed by a GlobalMaxPooling layer. This operation extracts the most important features across the entire sequence, encapsulating high-level information. Subsequently, the data is flattened into a 1D vector, preparing it for further processing by dense layers.

Dense and Output Layers: The flattened vector is connected to a dense layer with 100 units, each activated by a rectified linear unit (ReLU). This dense layer introduces complexity and enables the model to learn intricate relationships. Finally, a dense output layer with a single neuron and a sigmoid activation function produces the final binary classification predictions. This output neuron generates a probability score ranging from 0 to 1, indicating the likelihood of the input sequence belonging to the positive class. The model is optimized using the Adam optimizer, and it aims to minimize the binary cross-entropy loss. During training, the model's progress is monitored using the validation loss, and training stops if the loss does not improve for five consecutive epochs.

The trained model is saved as 'hotel_cnn.h5', and the history of training performance, including accuracy and loss values, is saved in 'hotel_history_cnn.npy'. This detailed history enables insights into the model's learning process over time. By understanding the intricate interplay of convolutional operations, pooling, and dense layers, the model effectively captures patterns within text sequences, which can then be leveraged for accurate text classification tasks.


def build_train_cnn(X_train, y_train, X_valid, y_valid):
    MAX_FEATURES = 12000
    MAX_LENGTH = 100
    NUM_BATCH = 32
    NUM_EPOCHS = 20
    sequences = layers.Input(shape=(MAX_LENGTH,))
    embedded = layers.Embedding(MAX_FEATURES, 64)(sequences)
    x = layers.Conv1D(64, 3, activation='relu')(embedded)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool1D(3)(x)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool1D(5)(x)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.GlobalMaxPool1D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(100, activation='relu')(x)
    predictions = layers.Dense(1, activation='sigmoid')(x)
    model = models.Model(inputs=sequences, outputs=predictions)
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    print(model.summary())
    es = EarlyStopping(monitor = 'val_loss', patience=5)

    history = model.fit(X_train, y_train, batch_size=NUM_BATCH,
                        epochs=NUM_EPOCHS,
                        validation_data=(X_valid, y_valid), callbacks = [es])

    # Save the trained model
    model.save('hotel_cnn.h5')

    # Save training history into a npy file
    np.save('hotel_history_cnn.npy', history.history)

    # Print keys of the history dictionary
    print(history.history.keys())

def implement_cnn_model():
    #Reads files
    X_train, X_test, X_valid, y_train, y_test, y_valid = load_files()   
    
    #Builds and trains CNN model
    build_train_cnn(X_train, y_train, X_valid, y_valid)
    
    #Loads ANN model
    ann_model = tf.keras.models.load_model('hotel_cnn.h5')

    #Gets predicted values
    y_pred = prediction(ann_model, X_test, y_test, 'CNN Model')
    
    #Plots confusion matrix
    plot_confusion_matrix(y_test, y_pred, 'CNN Model')
    
    #Plots true values versus predicted values diagram
    plot_real_pred_val(y_test, y_pred, 'CNN Model')    
    
    # Load the saved training history hepatitisc_history_ann
    history = np.load('hotel_history_cnn.npy', allow_pickle=True).item()

    #Plots accuracy
    plot_accuracy(history, 'CNN Model')
    
    #Plots loss
    plot_loss(history, 'CNN Model')

#Implements CNN Model    
implement_cnn_model()






Output:

Accuracy Score : CNN Model 0.853730576602029 

Classification Report : CNN Model

               precision    recall  f1-score   support

           0       0.73      0.79      0.76      2291

           1       0.91      0.88      0.89      5496

    accuracy                           0.85      7787

   macro avg       0.82      0.84      0.83      7787

weighted avg       0.86      0.85      0.86      7787


FNN (Feed-Forward Neural Network) Model

The code defines and trains a Feedforward Neural Network (FNN) model for text classification. The model's architecture is designed to process sequences of text and make binary classification predictions. Let's delve into the detailed structure of the model across four paragraphs:

Input and Embedding Layer: The model commences by receiving input sequences of text, where each sequence has a maximum length of 100 tokens. An embedding layer is introduced to map the input tokens to dense vector representations. The embedding process transforms words into 64-dimensional vectors. This mapping captures semantic relationships and contextual meanings among words, enabling the model to comprehend the underlying essence of the text data.

Flattening and Dense Layers: Following embedding, the embedded sequences are flattened into a one-dimensional vector. This operation restructures the data into a format that can be fed into a traditional fully connected neural network. Subsequently, a dense layer with 128 units is introduced, activated by the rectified linear unit (ReLU) function. This layer serves as a key component of the FNN architecture, enabling the model to learn complex relationships within the data. Additionally, a dropout layer with a dropout rate of 0.5 is applied. Dropout serves as a regularization technique, temporarily deactivating certain neurons during training to prevent overfitting.

Additional Dense Layer: Another dense layer is employed, containing 64 units with a ReLU activation function. This layer further enhances the model's capacity to capture intricate patterns in the data. The combination of multiple dense layers introduces increasing levels of abstraction and complexity, facilitating the extraction of relevant features for classification tasks.

Output Layer and Training: The final layer consists of a single neuron with a sigmoid activation function. This neuron generates a probability score ranging between 0 and 1, indicating the likelihood that the input sequence belongs to the positive class. The model's optimization process aims to minimize the binary cross-entropy loss using the Adam optimizer. During training, the model's progress is monitored through the validation loss, and training is halted if the loss doesn't improve for five consecutive epochs, thanks to the Early Stopping callback. The model's architecture, training parameters, and evaluation metrics are printed and displayed in the console. The trained FNN model is saved as 'hotel_fnn.h5', and the historical records of the training process are stored in 'hotel_history_fnn.npy'.


The FNN architecture takes a different approach compared to the previous CNN model. By directly flattening the embedded sequences and applying multiple dense layers, the FNN effectively learns intricate relationships within the text data. The model's performance can be evaluated by observing the accuracy and loss trends plotted from the training history. Keep in mind that the choice between CNNs and FNNs for text classification depends on the nature of the data and the desired level of feature extraction complexity.


def build_train_fnn(X_train, y_train, X_valid, y_valid):
    MAX_FEATURES = 12000
    MAX_LENGTH = 100
    NUM_BATCH = 32
    NUM_EPOCHS = 20
    sequences = layers.Input(shape=(MAX_LENGTH,))
    embedded = layers.Embedding(MAX_FEATURES, 64)(sequences)
    x = layers.Flatten()(embedded)  # Flatten the embedded sequences
    x = layers.Dense(128, activation='relu')(x)  # FNN-specific layer
    x = layers.Dropout(0.5)(x)  # Dropout layer for regularization
    x = layers.Dense(64, activation='relu')(x)  # Another dense layer
    predictions = layers.Dense(1, activation='sigmoid')(x)
    model = models.Model(inputs=sequences, outputs=predictions)
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    print(model.summary())
    es = EarlyStopping(monitor='val_loss', patience=5)

    history = model.fit(X_train, y_train, batch_size=NUM_BATCH,
                        epochs=NUM_EPOCHS,
                        validation_data=(X_valid, y_valid), callbacks=[es])

    # Save the trained model
    model.save('hotel_fnn.h5')

    # Save training history into a npy file
    np.save('hotel_history_fnn.npy', history.history)

    # Print keys of the history dictionary
    print(history.history.keys())

def implement_fnn_model():
    # Reads files
    X_train, X_test, X_valid, y_train, y_test, y_valid = load_files()

    # Builds and trains FNN model
    build_train_fnn(X_train, y_train, X_valid, y_valid)

    # Loads FNN model
    fnn_model = tf.keras.models.load_model('hotel_fnn.h5')

    # Gets predicted values
    y_pred = prediction(fnn_model, X_test, y_test, 'FNN Model')

    # Plots confusion matrix
    plot_confusion_matrix(y_test, y_pred, 'FNN Model')

    # Plots true values versus predicted values diagram
    plot_real_pred_val(y_test, y_pred, 'FNN Model')

    # Load the saved training history
    history = np.load('hotel_history_fnn.npy', allow_pickle=True).item()

    # Plots accuracy
    plot_accuracy(history, 'FNN Model')

    # Plots loss
    plot_loss(history, 'FNN Model')

# Implements FNN Model
implement_fnn_model()






Output:

Accuracy Score : FNN Model 0.8482085527160652 

Classification Report : FNN Model

               precision    recall  f1-score   support

           0       0.69      0.81      0.74      2112

           1       0.92      0.86      0.89      5675

    accuracy                           0.85      7787

   macro avg       0.81      0.84      0.82      7787

weighted avg       0.86      0.85      0.85      7787






No comments:

Post a Comment