KI-KNN 8.09 KB
Newer Older
Naundorf's avatar
Naundorf committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import librosa.feature
import pandas as pd
import numpy as np
from pathlib import Path
from os import chdir
import os
import csv
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras import models
from keras import layers
from keras import optimizers
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import constants


max_len = 216


def create_csv_header():
    header = 'filename '
    for i in range(constants.MFCC_FEATURE_START, ((constants.MFCC_FEATURE_END - 1) * max_len) + 1):
        header += f' mfcc{i}'
    header += ' label'
    header = header.split()
    file = open(constants.FEATURES_CSV_NAME, 'w', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(header)


def extract_features(trainingDataDir, trainingDataSubDirs):
    create_csv_header()
    # Looping over every file inside the subdirectories for feature extraction
    for trainingDataSubDir in trainingDataSubDirs:
        for fileName in os.listdir(trainingDataDir/f'{trainingDataSubDir}'):
            if fileName.endswith(".wav"):
                audioFile = trainingDataDir/f'{trainingDataSubDir}/{fileName}'
                print("Extracting Features from Directory "+trainingDataSubDir+" and file "+audioFile.name)
                y, sr = librosa.load(audioFile, mono=True)
                mfcc = librosa.feature.mfcc(y=y, sr=44100, n_mfcc=constants.N_MFCC)
                to_append = f'{audioFile.name}'

                print ("mfcc.shape before cutting/padding: ", mfcc.shape)
                #the mfcc extraction results in slighty different time frames per mfcc segment
                #in the case of 5seconds and 20 mfcc segments there are som time informations between 215 and 217
                #in order to unify the size the ones that stores under 216 timeinformation there will be some padding
                #for the ones that stores over 216 time information there will be a trim operation
                #IMPORTANT:
                #if the time of the audiofiles or the mfcc extraction number changes (current value is 20)
                #there needs to be an adjustment to the MAX_LEN parameter
                #1. check the new size of all the files
                #2. find a suitable average for padding or cutting
                #3. adjust the MAX_LEN variable in the constants.py file
                if (constants.MFCC_MAX_LEN > mfcc.shape[1]):
                    pad_width = max_len - mfcc.shape[1]
                    mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
                elif constants.MFCC_MAX_LEN < mfcc.shape[1]:
                    mfcc = mfcc[:, :constants.MFCC_MAX_LEN]
                print ("mfcc.shape after cutting/padding: ", mfcc.shape)
                #store every single value in the csv file
                mfcc = np.reshape(mfcc, constants.MFCC_MAX_LEN * constants.N_MFCC)
                #print ("new total size: ", mfcc.shape)
                for g in mfcc:
                    to_append += f' {g}'
                if trainingDataSubDir == constants.CAR:
                    to_append += f' {constants.LIGHT_WEIGHT}'
                elif trainingDataSubDir == constants.BUS:
                    to_append += f' {constants.MEDIUM_WEIGHT}'
                elif trainingDataSubDir == constants.TRUCK:
                    to_append += f' {constants.HEAVY_WEIGHT}'
                elif trainingDataSubDir == constants.MOTORCYCLE:
                    to_append += f' {constants.TWO_WHEELED}'
                elif trainingDataSubDir == constants.TRAM:
                    to_append += f' {constants.RAIL_BOUND}'

                file = open(constants.FEATURES_CSV_NAME, 'a', newline='')
                with file:
                    writer = csv.writer(file)
                    writer.writerow(to_append.split())

def preprocessing_csv_data():
    print("Reading Features... ")
    data = pd.read_csv(constants.FEATURES_CSV_NAME)
    data.head()
    # Dropping unnecessary columns (Column Filename is dropped)
    data = data.drop(['filename'], axis=1)
    data.head()
    return data


def encode_labels(data):
    # Extracting classes/label column as y from csv and converting string labels to numbers using LabelEncoder
    audio_list = data.iloc[:, -1]
    encoder = LabelEncoder()
    target_labels = encoder.fit_transform(audio_list)
    return target_labels, encoder


def normalize_data(data):
    # normalizing - Extracting Remaining Columns as X and normalizing them to a common scale
    scaler = StandardScaler()
    print (data.iloc[:, :-1])
    X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype=float))
    X = X.reshape(-1, constants.N_MFCC, constants.MFCC_MAX_LEN, constants.CHANNELS)
    return X


def train_test_data_split(X, y):
    # splitting of dataset into train and test dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
    print (X_train[0].shape)
    return X_train, X_test, y_train, y_test


def create_and_compile_model():
    print("Creating a Model")
    # creating a model
    from keras.models import Sequential
    from keras.layers import Conv2D, Dense, MaxPooling2D, Dropout, Flatten, BatchNormalization
    model = models.Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=(constants.N_MFCC, constants.MFCC_MAX_LEN, constants.CHANNELS)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(128, activation="relu"))
    model.add(Dense(constants.OUTPUT_LAYER_DIMENSIONS, activation='softmax'))
    print("Compiling a Model")
    optimizer = keras.optimizers.RMSprop()
    model.compile(optimizer=optimizer, loss=constants.LOSS_FUNCTION_SPARSE, metrics=[constants.ACCURACY_METRICS])
    print(model.summary())
    return model


def train_and_save_model(model, X_train, y_train, X_test, y_test):
    logdir = constants.LOG_DIR_PATH
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
    print("Start Training...")
    history = model.fit(X_train, y_train, batch_size=32, epochs=35, validation_data=(X_test, y_test), callbacks=[tensorboard_callback])
    # Saving the trained model to avoid re-training
    model.save(constants.TRAINED_MODEL)
    return history


def predict(X_test, y_test):
    print("Predictions.....")
    predictions = np.argmax(model.predict(X_test), axis=-1)
    target_names = [constants.LIGHT_WEIGHT, constants.MEDIUM_WEIGHT, constants.HEAVY_WEIGHT,constants.TWO_WHEELED, constants.RAIL_BOUND]
    print(classification_report(y_test, predictions, target_names=target_names))


def plot_model_accuracy(history):
    # Plot graph Model Accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()


def plot_model_loss(history):
    # Plot graph Model Loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper right')
    plt.show()


# Changing Directory to Training Dataset Folder
chdir(constants.TRAINING_DATA_DIRECTORY_NAME)
trainingDataDir = Path.cwd()
trainingDataSubDirs = os.listdir(trainingDataDir)
chdir("..")
if os.path.isfile(constants.FEATURES_CSV_NAME):
    print("features.csv already exists, skip extraction")
else:
    extract_features(trainingDataDir, trainingDataSubDirs)
#extract_features(trainingDataDir, trainingDataSubDirs)



data = preprocessing_csv_data()
target_labels, encoder = encode_labels(data)
X = normalize_data(data)
X_train, X_test, y_train, y_test = train_test_data_split(X, target_labels)
model = create_and_compile_model()
history = train_and_save_model(model, X_train, y_train, X_test, y_test)
predict(X_test, y_test)
plot_model_accuracy(history)
plot_model_loss(history)