Clasificacion de Audio

Invítame a un Café

# Importar librerias
import pandas as pd # Pandas
import numpy as np # Numpy
import matplotlib.pyplot as plt

from sklearn import preprocessing # para preprocesar los datos
from sklearn import tree # Decision tree
from sklearn.ensemble import AdaBoostClassifier # Ensamble de clasificadores usando AdaBoost
from sklearn.model_selection import cross_val_score # Para realizar crossvalidation
from sklearn.metrics import confusion_matrix, classification_report # Confusion Matrix y reporte de clasificacion
from sklearn.svm import SVC # Clasificador support vector machine

Sonidos

import IPython.display as ipd # Para reproducir audio en el Jupyter Notebook
print('Banana')
ipd.display(ipd.Audio('./Data/Audio/Banana/Track70.wav'))
print('Chair')
ipd.display(ipd.Audio('./Data/Audio/Chair/Track106.wav'))
print('Goodbye')
ipd.display(ipd.Audio('./Data/Audio/Goodbye/Track54.wav'))
print('Hello')
ipd.display(ipd.Audio('./Data/Audio/Hello/Track32.wav'))
print('IceCream')
ipd.display(ipd.Audio('./Data/Audio/IceCream/Track91.wav'))

Banana

Chair

Goodbye

Hello

IceCream

Leer caracteristicas de la señal de audio

Importar las caraceristicas extraidas del audio usando el script audio.R

# Caracteristicas extraidas
train    = pd.read_csv("Data/data-model.csv") # datos de entrenamiento
test     = pd.read_csv("Data/validation.csv") #datos de prueba
train.sample(10) #Tomar aleatoriamente 10 elementos de tabla

Unnamed: 0sound.filesselecdurationmeanfreqsdmedianQ25Q75IQR...meanfunminfunmaxfunmeandommindommaxdomdfrangemodindxclassy
5270Track97.wav013.9633183.8238962.3027800.9634205.9610164.997596...2.7391040.1807388.8200001.1957260.0861338.1826178.0964840.089428IceCream5
5068Track95.wav014.5888474.4990122.2507060.6105368.3474447.736908...3.2569840.17294114.7000001.8151690.08613311.36953111.2833980.080611IceCream5
2027Track63.wav011.8699112.5572731.0004040.5468882.1608731.613985...7.4418750.18375014.7000000.8236450.0861331.2919921.2058590.085714Goodbye2
3244Track83.wav011.7587152.3200011.2514270.5175511.8771411.359590...0.3445310.3445310.3445310.5446630.1722661.5503911.3781250.229167Banana3
4058Track115.wav011.6134041.9676671.2624650.6191601.7660711.146911...0.4455710.1778231.0255811.3063480.0861335.3402345.2541020.041729Chair4
1926Track62.wav011.8187392.4351031.0793490.6227632.0860961.463332...14.70000014.70000014.7000000.7125530.0861331.2919921.2058590.089286Goodbye2
4159Track116.wav011.6073271.9023581.2218250.5734001.8273791.253979...2.6023810.24775314.7000001.1097880.0861335.0818364.9957030.072414Chair4
34Track35.wav011.3554392.1264150.6338940.4313481.2465330.815185...13.47500011.02500014.7000000.5085940.0861330.7751950.6890630.137500Hello1
3856Track113.wav011.2748781.4967291.1426840.5757881.4072350.831447...5.9156440.18223114.7000001.1347060.0861335.2541025.1679690.116667Chair4
4563Track90.wav012.0825402.9566321.3083020.3822292.1445901.762360...12.25000011.02500014.7000000.3758520.0861331.2058591.1197270.065934IceCream5

10 rows × 27 columns

x = train[["meanfreq","sd","median","Q25","Q75","IQR","skew","kurt","sp.ent","sfm","mode","centroid",
           "peakf","meanfun","minfun","maxfun","meandom","mindom","maxdom","dfrange","modindx"]]
y = train["class"]

Algoritmos de Machine Learning

AdaBoost

# Definir los algoritmos de machine learning
clh = tree.DecisionTreeClassifier(max_depth  = 7)
clf = AdaBoostClassifier(base_estimator= clh,n_estimators=10) # Adaboost
# Entrenar el adaBoost
clf.fit(x,y)
AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=7,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=1.0, n_estimators=10, random_state=None)
#  Aplicar el modelo a los datos
y_tree_pred = clf.predict(x)
# Valor medio del desempeño con cross validation
print('Desempeño Promedio en el set de entrenamiento = ')
print(np.mean(cross_val_score(clf, x, y, cv=8)))
Desempeño Promedio en el set de entrenamiento =
0.8215277777777779
# Ver la matriz de confusion
cm = confusion_matrix(y, y_tree_pred)
print(cm)
[[11  0  0  0  0]
 [ 0  9  0  0  0]
 [ 0  0 12  0  0]
 [ 0  0  0 12  0]
 [ 0  0  0  0 13]]
#Reporte de clasificacion
print(classification_report(y,y_tree_pred))
              precision    recall  f1-score   support

      Banana       1.00      1.00      1.00        11
       Chair       1.00      1.00      1.00         9
     Goodbye       1.00      1.00      1.00        12
       Hello       1.00      1.00      1.00        12
    IceCream       1.00      1.00      1.00        13

   micro avg       1.00      1.00      1.00        57
   macro avg       1.00      1.00      1.00        57
weighted avg       1.00      1.00      1.00        57
test_x = test[["meanfreq","sd","median","Q25","Q75","IQR","skew","kurt","sp.ent","sfm","mode",
               "centroid","peakf","meanfun","minfun","maxfun","meandom","mindom","maxdom","dfrange","modindx"]]
print('Desempeño promedio en el set de pruebas = ')
print(clf.score(test_x,test["class"]))
Desempeño promedio en el set de pruebas =
0.8888888888888888
# función para graficar mejor la matriz de confusion
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('Clase Verdadera')
    plt.xlabel('Prediccion de clase\n accuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()
plot_confusion_matrix(cm           = cm,
                      normalize    = False,
                      target_names = ['Banana', 'chair', 'Goodby', 'Hello','Icecream'],
                      title        = "Confusion Matrix")

png

Support Vector Classification (SVC)

# Configurar el clasificador SVC
svcfit = SVC(C=0.01, kernel='linear')
x =  preprocessing.scale(x) # Escalar los datos
# Entrenar el modelo
svcfit.fit(x, y)
SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
# Predicion con el modelo
y_svc_pred =svcfit.predict(x)
# Valor medio del desempeño con cross validation
print('Desempeño Promedio en el set de entrenamiento = ')
print(np.mean(cross_val_score(svcfit, x, y, cv=8)))
Desempeño Promedio en el set de entrenamiento =
0.6329861111111111
# Matriz de confusion
cm = confusion_matrix(y,y_svc_pred)
print(cm)
[[ 1  0  0 10  0]
 [ 0  8  1  0  0]
 [ 1  0 11  0  0]
 [ 0  0  0 12  0]
 [ 1  0  0  1 11]]
#Reporte de clasificacion
print(classification_report(y,y_svc_pred))
              precision    recall  f1-score   support

      Banana       0.33      0.09      0.14        11
       Chair       1.00      0.89      0.94         9
     Goodbye       0.92      0.92      0.92        12
       Hello       0.52      1.00      0.69        12
    IceCream       1.00      0.85      0.92        13

   micro avg       0.75      0.75      0.75        57
   macro avg       0.75      0.75      0.72        57
weighted avg       0.75      0.75      0.72        57
test_x     =  preprocessing.scale(test_x)
print('Desempeño Promedio en el set de test = ')
print(svcfit.score(test_x,test["class"]))
Desempeño Promedio en el set de test =
0.7777777777777778
plot_confusion_matrix(cm           = cm,
                      normalize    = False,
                      target_names = ['Banana', 'chair', 'Goodby', 'Hello','Icecream'],
                      title        = "Confusion Matrix")

png

Phd. Jose R. Zapata