Commit b52cc91f authored by Hotwani's avatar Hotwani
Browse files

Commit for code optimization

parent 6c4a93b8
Pipeline #1990 failed with stages
from pandas import datetime from _datetime import datetime
MFCC_FEATURE_START = 1 MFCC_RANGE_START = 1
MFCC_FEATURE_END = 21 MFCC_RANGE_END = 21
TRAINING_DATA_DIRECTORY_NAME = 'DemoTrainingDataset' TRAINING_DATA_DIRECTORY_NAME = 'DemoTrainingDataset'
TESTING_DATA_DIRECTORY_NAME = 'TEST' TESTING_DATA_DIRECTORY_NAME = 'Test'
CAR = 'Car' CAR = 'Car'
...@@ -36,4 +36,6 @@ ACCURACY_METRICS = 'accuracy' ...@@ -36,4 +36,6 @@ ACCURACY_METRICS = 'accuracy'
LOG_DIR_PATH = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") LOG_DIR_PATH = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
TRAINED_MODEL = 'trained_model.h5' TRAINED_MODEL = 'Trained_Model/trained_model.h5'
TEST_DATA_SPLIT = 0.20
...@@ -10,36 +10,43 @@ from sklearn.model_selection import train_test_split ...@@ -10,36 +10,43 @@ from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras import models from keras import models
from keras import layers from keras import layers
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report from sklearn.metrics import classification_report
import constants import constants
import sys
def create_csv_header(): def create_csv_header():
header = 'filename ' if os.path.isfile(constants.TRAINED_MODEL):
for i in range(constants.MFCC_FEATURE_START, constants.MFCC_FEATURE_END): sys.exit("Trained model file already exists, "
header += f' mfcc{i}' "remove/move trained_model.h5 to another location and start training again")
header += ' label' if os.path.isfile(constants.FEATURES_CSV_NAME):
header = header.split() sys.exit("features.csv already exist, please remove/move the file to another location and run main.py again")
file = open(constants.FEATURES_CSV_NAME, 'w', newline='') else:
with file: header = 'filename '
writer = csv.writer(file) for i in range(constants.MFCC_RANGE_START, constants.MFCC_RANGE_END):
writer.writerow(header) header += f' mfcc{i}'
header += ' label'
header = header.split()
file = open(constants.FEATURES_CSV_NAME, 'x', newline='')
with file:
writer = csv.writer(file)
writer.writerow(header)
def extract_features(trainingDataDir, trainingDataSubDirs): def extract_features(trainingDataDir, trainingDataSubDirs):
create_csv_header() create_csv_header()
# Looping over every file inside the subdirectories for feature extraction # Looping over every file inside the subdirectories for feature extraction
for trainingDataSubDir in trainingDataSubDirs: for trainingDataSubDir in trainingDataSubDirs:
for fileName in os.listdir(trainingDataDir/f'{trainingDataSubDir}'): for audio_file_name in os.listdir(trainingDataDir/f'{trainingDataSubDir}'):
if fileName.endswith(".wav"): if audio_file_name.endswith(".wav"):
audioFile = trainingDataDir/f'{trainingDataSubDir}/{fileName}' audio_file = trainingDataDir/f'{trainingDataSubDir}/{audio_file_name}'
print("Extracting Features from Directory "+trainingDataSubDir+" and file "+audioFile.name) print("Extracting Features from Directory "+trainingDataSubDir+" and file "+audio_file.name)
y, sr = librosa.load(audioFile, mono=True) y, sr = librosa.load(audio_file, mono=True)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=(constants.MFCC_FEATURE_END - constants.MFCC_FEATURE_START)) mfcc_features = librosa.feature.mfcc(y=y, sr=sr,
to_append = f'{audioFile.name}' n_mfcc=(constants.MFCC_RANGE_END - constants.MFCC_RANGE_START))
for g in mfcc: to_append = f'{audio_file.name}'
to_append += f' {np.mean(g)}' for mfcc_segment in mfcc_features:
to_append += f' {np.mean(mfcc_segment)}'
if trainingDataSubDir == constants.CAR: if trainingDataSubDir == constants.CAR:
to_append += f' {constants.LIGHT_WEIGHT}' to_append += f' {constants.LIGHT_WEIGHT}'
elif trainingDataSubDir == constants.BUS: elif trainingDataSubDir == constants.BUS:
...@@ -58,104 +65,84 @@ def extract_features(trainingDataDir, trainingDataSubDirs): ...@@ -58,104 +65,84 @@ def extract_features(trainingDataDir, trainingDataSubDirs):
def preprocessing_csv_data(): def preprocessing_csv_data():
print("Reading Features... ") features_data = pd.read_csv(constants.FEATURES_CSV_NAME)
data = pd.read_csv(constants.FEATURES_CSV_NAME) features_data.head()
data.head()
# Dropping unnecessary columns (Column Filename is dropped) # Dropping unnecessary columns (Column Filename is dropped)
data = data.drop(['filename'], axis=1) updated_features_data = features_data.drop(['filename'], axis=1)
data.head() updated_features_data.head()
return data return updated_features_data
def encode_labels(data): def encode_labels(processedFeaturesData):
# Extracting classes/label column as y from csv and converting string labels to numbers using LabelEncoder # Extracting classes/label column as y from csv and converting string labels to numbers using LabelEncoder
audio_list = data.iloc[:, -1] audio_labels_list = processedFeaturesData.iloc[:, -1]
encoder = LabelEncoder() encode_object = LabelEncoder()
target_labels = encoder.fit_transform(audio_list) encoded_target_audio_labels = encode_object.fit_transform(audio_labels_list)
return target_labels, encoder return encoded_target_audio_labels, encode_object
def normalize_data(data): def normalize_data(processedData):
# normalizing - Extracting Remaining Columns as X and normalizing them to a common scale # normalizing - Extracting Remaining Columns as X and normalizing them to a common scale
scaler = StandardScaler() scale_object = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype=float)) X_normalized_features = scale_object.fit_transform(np.array(processedData.iloc[:, :-1], dtype=float))
return X return X_normalized_features
def train_test_data_split(X, y): def train_test_data_split(XInput, yLabels):
# splitting of dataset into train and test dataset # splitting of dataset into train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) X_split_train, X_split_test, y_split_train, y_split_test = train_test_split(XInput, yLabels,
return X_train, X_test, y_train, y_test test_size=constants.TEST_DATA_SPLIT)
return X_split_train, X_split_test, y_split_train, y_split_test
def create_and_compile_model(): def create_and_compile_model():
print("Creating a Model") print("Creating a Model")
# creating a model model_instance = models.Sequential()
model = models.Sequential() model_instance.add(layers.Dense(constants.HIDDEN_LAYER_1_DIMENSIONS, activation=constants.ACTIVATION_RELU,
model.add(layers.Dense(constants.HIDDEN_LAYER_1_DIMENSIONS, activation=constants.ACTIVATION_RELU, input_shape=(X.shape[1],))) input_shape=(X_input_features.shape[1],)))
model.add(layers.Dense(constants.HIDDEN_LAYER_2_DIMENSIONS, activation=constants.ACTIVATION_RELU)) model_instance.add(layers.Dense(constants.HIDDEN_LAYER_2_DIMENSIONS, activation=constants.ACTIVATION_RELU))
model.add(layers.Dense(constants.HIDDEN_LAYER_3_DIMENSIONS, activation=constants.ACTIVATION_RELU)) model_instance.add(layers.Dense(constants.HIDDEN_LAYER_3_DIMENSIONS, activation=constants.ACTIVATION_RELU))
model.add(layers.Dense(constants.OUTPUT_LAYER_DIMENSIONS, activation=constants.ACTIVATION_SOFTMAX)) model_instance.add(layers.Dense(constants.OUTPUT_LAYER_DIMENSIONS, activation=constants.ACTIVATION_SOFTMAX))
print("Compiling a Model") print("Compiling a Model")
model.compile(optimizer= constants.OPTIMIZER_ADAM, loss= constants.LOSS_FUNCTION_SPARSE, metrics=[constants.ACCURACY_METRICS]) model_instance.compile(optimizer=constants.OPTIMIZER_ADAM,
return model loss=constants.LOSS_FUNCTION_SPARSE,
metrics=[constants.ACCURACY_METRICS])
return model_instance
def train_and_save_model(model, X_train, y_train, X_test, y_test): def train_and_save_model(compiledModel, X_train, y_train, X_test, y_test):
logdir = constants.LOG_DIR_PATH log_directory = constants.LOG_DIR_PATH
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_directory)
print("Start Training...") print("Start Training...")
history = model.fit(X_train, y_train, epochs=35, validation_data=(X_test, y_test), callbacks=[tensorboard_callback]) training_history = compiledModel.fit(X_train, y_train, epochs=35,
validation_data=(X_test, y_test),
callbacks=[tensorboard_callback])
# Saving the trained model to avoid re-training # Saving the trained model to avoid re-training
model.save(constants.TRAINED_MODEL) #print(training_history)
return history compiledModel.save(constants.TRAINED_MODEL)
return training_history
def predict(X_test, y_test): def predict(X_test, y_test):
print("Predictions.....") print("Predictions.....")
predictions = np.argmax(model.predict(X_test), axis=-1) final_predictions = np.argmax(compiled_model.predict(X_test), axis=-1)
target_names = [constants.LIGHT_WEIGHT, constants.MEDIUM_WEIGHT, constants.HEAVY_WEIGHT,constants.TWO_WHEELED, constants.RAIL_BOUND] target_names = [constants.LIGHT_WEIGHT, constants.MEDIUM_WEIGHT, constants.HEAVY_WEIGHT, constants.TWO_WHEELED,
print(classification_report(y_test, predictions, target_names=target_names)) constants.RAIL_BOUND]
print(classification_report(y_test, final_predictions, target_names=target_names))
def plot_model_accuracy(history):
# Plot graph Model Accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()
def plot_model_loss(history):
# Plot graph Model Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()
# Changing Directory to Training Dataset Folder # Changing Directory to Training Dataset Folder
chdir(constants.TRAINING_DATA_DIRECTORY_NAME) chdir(constants.TRAINING_DATA_DIRECTORY_NAME)
trainingDataDir = Path.cwd() training_data_directory = Path.cwd()
trainingDataSubDirs = os.listdir(trainingDataDir) training_data_sub_directories = os.listdir(training_data_directory)
extract_features(training_data_directory, training_data_sub_directories)
extract_features(trainingDataDir, trainingDataSubDirs) processed_features_data = preprocessing_csv_data()
data = preprocessing_csv_data() target_audio_labels, encoder_object = encode_labels(processed_features_data)
target_labels, encoder = encode_labels(data) X_input_features = normalize_data(processed_features_data)
X = normalize_data(data) X_train_data, X_test_data, y_train_data, y_test_data = train_test_data_split(X_input_features, target_audio_labels)
X_train, X_test, y_train, y_test = train_test_data_split(X, target_labels) compiled_model = create_and_compile_model()
model = create_and_compile_model() model_training_history = train_and_save_model(compiled_model, X_train_data, y_train_data, X_test_data, y_test_data)
history = train_and_save_model(model, X_train, y_train, X_test, y_test) predict(X_test_data, y_test_data)
predict(X_test, y_test)
plot_model_accuracy(history)
plot_model_loss(history)
...@@ -7,31 +7,36 @@ import csv ...@@ -7,31 +7,36 @@ import csv
from tensorflow import keras from tensorflow import keras
from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.preprocessing import LabelEncoder, StandardScaler
import constants import constants
import sys
def create_csv_header(): def create_csv_header():
header='' if os.path.isfile(constants.TEST_CSV_NAME):
for i in range(constants.MFCC_FEATURE_START, constants.MFCC_FEATURE_END): sys.exit("test.csv already exist, please remove/move the file to another location and run test.py again")
header += f' mfcc{i}' else:
header = header.split() header = ''
file = open(constants.TEST_CSV_NAME, 'w', newline='') for i in range(constants.MFCC_RANGE_START, constants.MFCC_RANGE_END):
with file: header += f' mfcc{i}'
writer = csv.writer(file) header = header.split()
writer.writerow(header) file = open(constants.TEST_CSV_NAME, 'x', newline='')
with file:
writer = csv.writer(file)
writer.writerow(header)
def extract_features(workingDir, subDirectories): def extract_features(workingDir, subDirectories):
create_csv_header() create_csv_header()
for subDirectory in subDirectories: for subDirectory in subDirectories:
if subDirectory == constants.TESTING_DATA_DIRECTORY_NAME: if subDirectory == constants.TESTING_DATA_DIRECTORY_NAME:
for fileName in os.listdir(workingDir/f'{subDirectory}'): for test_audio_file_name in os.listdir(workingDir/f'{subDirectory}'):
if fileName.endswith(".wav"): if test_audio_file_name.endswith(".wav"):
audioFile = workingDir / f'{subDirectory}/{fileName}' test_audio_file = workingDir / f'{subDirectory}/{test_audio_file_name}'
y, sr = librosa.load(audioFile, mono=True) y, sr = librosa.load(test_audio_file, mono=True)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=(constants.MFCC_FEATURE_END - constants.MFCC_FEATURE_START)) mfcc_features = librosa.feature.mfcc(y=y, sr=sr,
n_mfcc=(constants.MFCC_RANGE_END - constants.MFCC_RANGE_START))
to_append = '' to_append = ''
for g in mfcc: for mfcc_segment in mfcc_features:
to_append += f' {np.mean(g)}' to_append += f' {np.mean(mfcc_segment)}'
file = open(constants.TEST_CSV_NAME, 'a', newline='') file = open(constants.TEST_CSV_NAME, 'a', newline='')
with file: with file:
writer = csv.writer(file) writer = csv.writer(file)
...@@ -39,30 +44,29 @@ def extract_features(workingDir, subDirectories): ...@@ -39,30 +44,29 @@ def extract_features(workingDir, subDirectories):
def preprocessing_csv_data(): def preprocessing_csv_data():
# reading dataset from csv
print("Reading Features... ") print("Reading Features... ")
data = pd.read_csv(constants.TEST_CSV_NAME) test_features_data = pd.read_csv(constants.TEST_CSV_NAME)
data.head() test_features_data.head()
return data return test_features_data
def normalize_data(data): def normalize_data(processedData):
# # normalizing - Extracting Remaining Columns as X and normalizing them to a common scale # # normalizing - Extracting Remaining Columns as X and normalizing them to a common scale
scaler = StandardScaler() scale_object = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :], dtype=float)) X_test = scale_object.fit_transform(np.array(processedData.iloc[:, :], dtype=float))
print(X) return X_test
print(X.shape)
return X
WorkingDir = Path.cwd() working_directory = Path.cwd()
subDirectories = os.listdir(WorkingDir) sub_directories = os.listdir(working_directory)
extract_features(WorkingDir, subDirectories) extract_features(working_directory, sub_directories)
data = preprocessing_csv_data() processed_data = preprocessing_csv_data()
X = normalize_data(data) X_test_data = normalize_data(processed_data)
model = keras.models.load_model('./DemoTrainingDataset/trained_model.h5') if os.path.isfile('./DemoTrainingDataset/Trained_Model/trained_model.h5'):
model.summary() model = keras.models.load_model('./DemoTrainingDataset/Trained_Model/trained_model.h5')
predictions = np.argmax(model.predict(X), axis=-1) else:
sys.exit("Trained model file does not exists")
predictions = np.argmax(model.predict(X_test_data), axis=-1)
encoder = LabelEncoder() encoder = LabelEncoder()
labels = ['Light-Weight', 'Medium-Weight', 'Heavy-Weight', 'Two-Wheeled', 'Rail-Bound'] labels = ['Light-Weight', 'Medium-Weight', 'Heavy-Weight', 'Two-Wheeled', 'Rail-Bound']
encoder.fit_transform(labels) encoder.fit_transform(labels)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment