Loss Functions
Loss Functions used in Machine Learning and Deep Learning
- About
About
A Loss Function is a function that calculates a single real number which indicates how far a prediction is from the actual. During the creation of a Machine Learning model the loss is minimised by using some type of algorithm also known as an Optimiser. The Optimiser optimises the result by reducing the loss. In general, the loss is calculated over a number of input examples (batch).
Tutorial Overiew
JASON BROWNLEE MACHINE LEARNING MASTERY
This tutorial is divided into three parts; they are:
Regression Loss Functions
Mean Squared Error Loss - default - distribution of target is gaussian - square of error means larger errors penalised more than smaller ones (Keras mse or mean_squared_error)
Mean Squared Logarithmic Error Loss
Mean Absolute Error Loss - distribution of target variable is mainly gaussian but may heave outliers (keras - mean_absolute_error)
Binary Classification Loss Functions - targets are either of two labels
**Binary Cross-Entropy (keras - binary_cross_entropy)
Hinge Loss - for use with SVM - binay classification where target values are in the set {-1,1} encourages examples to have the correct sign - assigning more errorwhen thee is a differene in sign between actual/predicted class values
Squared Hinge Loss - square of hinge loss - hass effect of smoothing the surfaceof the error function and making it easier to work with numerically
Multi-Class Classification Loss Functions - targets can belong to one of many labels or classes - predict probability of example beloging to each known class
Multi-Class Cross-Entropy Loss - use this first - calculate a score that summarizes the ave diff between actual and predicted probability distributions for all classes - keras -categorical_cross_entropy
Sparse Multiclass Cross-Entropy Loss - large nb of labels- eg words in a vocabulary may have tens or hundreds of thousands of categories, one for each label. No one-hot encoding keras - sparse_categorical_crossentropy
Kullback Leibler Divergence Loss KL divergence - measure of how one probability distribution differs from a baseline distribution KLdiv of 0 suggest distributions are identical calculates how much information is lost if the predicted target distributon is use to approximate the desired target probablity distribuion KLd more commonly used when using models that learn to approximate a more complex fn than simply multi-class ie autoencoding used for learning a dense feature representation under a model that must construct the original input
#'/Users/dexterdsilva/Documents/Developer/MachineLearning/brownlee_mlm'
import os
os.path.abspath('')
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
import matplotlib.pyplot as plt
import keras
print(keras.__version__)
import tensorflow as tf
print(tf.__version__)
#generate regression dataset
#20
X,y=make_regression(n_samples=1000, n_features=20,noise=0.1,
random_state=1)
X[0]
print(y.shape)
f'{y[0]}'
print(type(y))
X=StandardScaler().fit_transform(X)
X[0]
y=StandardScaler().fit_transform(y.reshape(len(y),1))[:,0]
n_train=500
trainX, testX = X[:n_train,:], X[n_train:,:]
trainy, testy = y[:n_train], y[n_train:]
model= Sequential()
model.add(Dense(25, input_dim=20,activation='relu',
kernel_initializer='he_uniform'))
model.add(Dense(1, activation='linear'))
opt=SGD(lr=0.01, momentum=0.9)
model.compile(loss='mse', optimizer = opt)
h=model.fit(trainX, trainy, validation_data=(testX,testy),
epochs=100,
verbose=0)
h.history.keys()
fig= plt.figure(figsize=(5,5))
plt.plot(h.history['loss'],label='train_loss')
plt.plot(h.history['val_loss'], label='test_loss')
plt.legend()
plt.show()
model=Sequential()
model.add(Dense(25, input_dim=20, activation ='relu',
kernel_initializer = 'he_uniform'))
model.add(Dense(1, activation = 'linear'))
opt=SGD(lr=0.1, momentum=0.9)
model.compile(loss='mean_squared_logarithmic_error',
optimizer=opt,
metrics=['mse'])
print(trainX.shape,' ',trainy.shape,' ', testX.shape,' ', testy.shape)
h = model.fit(trainX, trainy,
validation_data=(testX, testy),
epochs=200,
verbose=0)
# evaluate the model
_,train_mse=model.evaluate(trainX, trainy, verbose=0)
_,test_mse=model.evaluate(testX, testy, verbose=0)
print('Train: {:.2f} Test {:.2f}'.format(train_mse, test_mse))
h.history.keys()
fig=plt.figure(figsize=(10,10))
plt.subplot(211)
plt.title('Loss')
plt.plot(h.history['loss'],label='Training Loss')
plt.plot(h.history['val_loss'],label = 'Test Loss')
plt.legend()
plt.subplot(212)
plt.title('Mean Squared Error')
plt.plot(h.history['mse'], label='train')
plt.plot(h.history['val_mse'], label='test')
plt.legend()
plt.show()
X,y=make_regression(n_samples=1000, n_features=20,
noise=0.1, random_state=1)
print(X.shape, y.shape,' ', type(y))
plt.scatter(X[:,1],y)
len(y)
X=StandardScaler().fit_transform(X)
y=StandardScaler().fit_transform(y.reshape(len(y),1))
n_train=500
trainX, testX = X[:n_train,:],X[n_train:,:]
trainy, testy = y[:n_train], y[n_train:]
model= Sequential()
model.add(Dense(25,input_dim=20,
activation='relu',
kernel_initializer='he_uniform'))
model.add(Dense(1, activation='linear'))
opt=SGD(lr=0.01, momentum=0.9)
model.compile(loss='mean_absolute_error', optimizer=opt,metrics=['mse'])
h=model.fit(trainX, trainy,validation_data=(testX, testy),
epochs=100, verbose=1)
_,train_mse=model.evaluate(trainX, trainy, verbose=1)
_,test_mse=model.evaluate(testX, testy, verbose=1)
h.history.keys()
print('Train: {:.4f} Test:{:.4f}'.format(train_mse, test_mse))
fig=plt.figure(figsize=(10,10))
plt.subplot(121)
plt.title('LOSS')
plt.plot(h.history['loss'], label='train')
plt.plot(h.history['val_loss'], label='test')
plt.legend()
plt.subplot(122)
plt.title('MSE')
plt.plot(h.history['mse'],label='train')
plt.plot(h.history['val_mse'], label='test')
plt.legend()
plt.show()
from sklearn.datasets import make_circles
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from numpy import where
import matplotlib.pyplot as plt
%matplotlib inline
X,y = make_circles(n_samples=1000,noise=0.1, random_state=1)
for i in range(2):
samples_idx= where(y==i)
plt.scatter(X[samples_idx,0], X[samples_idx,1], label=str(i))
plt.legend()
plt.show()
print(X[:5])
print(y[:5])
n_train=500
trainX,testX=X[:n_train,:],X[:n_train]
trainy,testy = y[:n_train], y[:n_train]
print(trainX.shape ,' ', trainy.shape)
model=Sequential()
model.add(Dense(50, input_dim=2, activation='relu',
kernel_initializer='he_uniform'))
model.add(Dense(1, activation='sigmoid'))
opt=SGD(lr=0.01, momentum=0.9)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
h=model.fit(trainX, trainy, validation_data=(testX,testy),
epochs=200,verbose=1)
#_,train_acc=model.evaluate(trainX,trainy, verbose=1)
_,test_acc=model.evaluate(testX, testy,verbose=1)
print('Train: {:.4f} Test:{:.4f}'.format(train_acc, test_acc))
h.history.keys()
fig=plt.figure(figsize=(8,4))
plt.subplot(121)
plt.title("LOSS")
plt.plot(h.history['loss'], label='train')
plt.plot(h.history['val_loss'], label='test')
plt.legend()
plt.subplot(122)
plt.title("ACCURACY")
plt.plot(h.history['accuracy'], label='train')
plt.plot(h.history['val_accuracy'], label='test')
plt.legend()
plt.show()
y[where(y==0)]= -1
X,y = make_circles(n_samples=1000, noise=0.1, random_state=1)
y[where(y==0)]= -1
n_train=500
trainX,testX= X[:n_train,:],X[n_train:,:]
trainy,testy=y[:n_train],y[n_train:]
model=Sequential()
model.add(Dense(50,input_dim=2,activation='relu',
kernel_initializer='he_uniform'))
model.add(Dense(1, activation='tanh'))
opt=SGD(lr=0.01, momentum=0.9)
model.compile(loss='hinge', optimizer=opt,metrics=['accuracy'])
model.summary()
h=model.fit(trainX,trainy,validation_data=(testX,testy),
epochs=200,
verbose=1)
model.metrics_names
_,train_acc=model.evaluate(trainX,trainy, verbose=1)
_,test_acc=model.evaluate(testX,testy, verbose= 1)
print('Train: {:.4f} Test: {:.4f}'.format(train_acc, test_acc))
h.history.keys()
fig =plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.title('ACCURACY')
plt.plot(h.history['accuracy'], label='train')
plt.plot(h.history['val_accuracy'], label = 'test')
plt.legend()
plt.subplot(1,2,2)
plt.title("LOSS")
plt.plot(h.history['loss'], label='train')
plt.plot(h.history['val_loss'], label='test')
plt.legend()
plt.show()
X,y = make_circles(n_samples=1000, noise=0.1, random_state=1)
y[where(y==0)]= -1
n_train=500
trainX,testX = X[:n_train,:],X[n_train:,:]
trainy,testy=y[:n_train],y[n_train:]
collapse-hide
model = Sequential()
model.add(Dense(50, input_dim=2, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(1, activation='tanh'))
opt=SGD(lr=0.01, momentum=0.9)
model.compile(loss='squared_hinge', optimizer=opt, metrics=['accuracy'])
h = model.fit(trainX,trainy,validation_data=(testX,testy), epochs=200, verbose=1)
print(model.metrics_names)
res_train = model.evaluate(trainX,trainy)
res_test = model.evaluate(testX, testy)
print('Traina cc:{:.4f} Test acc:{:.4f}'.format(res_train[1], res_test[1]))
h.history.keys()
fig=plt.figure(figsize=(8,2))
plt.subplot(121)
plt.title("LOSS")
plt.plot(h.history['loss'], label='train')
plt.plot(h.history['val_loss'], label='test')
plt.legend()
plt.subplot(122)
plt.title("ACCURACY")
plt.plot(h.history['accuracy'], label='train')
plt.plot(h.history['val_accuracy'], label='test')
plt.legend()
plt.show()
from sklearn.datasets import make_blobs
X,y = make_blobs(n_samples=1000, centers=3, n_features=2,
cluster_std=2, random_state=2)
print(X[:5], ' ', y[:5])
for i in range(3):
samples_idx = where(y==i)
plt.scatter(X[samples_idx,0], X[samples_idx,1], label=str(i))
plt.legend()
plt.show()
from keras.utils import to_categorical
X,y = make_blobs(n_samples=1000, centers=3, n_features=2,
cluster_std=2, random_state=2)
y = to_categorical(y) #NO ONEHOT ENCODING FOR SPARSE
n_train=500
trainX,testX = X[:n_train,:],X[n_train:,:]
trainy,testy=y[:n_train],y[n_train:]
print(trainy.shape)
print(y[:5])
model = Sequential()
model.add(Dense(50, input_dim=2, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(3, activation='softmax'))
opt=SGD(lr=0.01, momentum=0.9)
#model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.compile(loss='categorical_crossentropy', optimizer=opt,
metrics=['accuracy'], )
h = model.fit(trainX,trainy,validation_data=(testX,testy), epochs=200, verbose=1)
model.metrics_names
res_train = model.evaluate(trainX,trainy)
res_test = model.evaluate(testX, testy)
print('Train acc:{:.4f} Test acc:{:.4f}'.format(res_train[1], res_test[1]))
h.history.keys()
#collapse-hide
fig=plt.figure(figsize=(8,2))
plt.subplot(121)
plt.title("LOSS")
plt.plot(h.history['loss'], label='train')
plt.plot(h.history['val_loss'], label='test')
plt.legend()
plt.subplot(122)
plt.title("ACCURACY")
plt.plot(h.history['accuracy'], label='train')
plt.plot(h.history['val_accuracy'], label='test')
plt.legend()
plt.show()
from keras.utils import to_categorical
X,y = make_blobs(n_samples=1000, centers=3, n_features=2,
cluster_std=2, random_state=2)
y = to_categorical(y) NO ONEHOT ENCODING FOR SPARSE
n_train=500
trainX,testX = X[:n_train,:],X[n_train:,:]
trainy,testy=y[:n_train],y[n_train:]
print(trainy.shape)
print(y[:5])
model = Sequential()
model.add(Dense(50, input_dim=2, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(3, activation='softmax'))
opt=SGD(lr=0.01, momentum=0.9)
#model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt,
metrics=['accuracy'], )
h = model.fit(trainX,trainy,validation_data=(testX,testy), epochs=200, verbose=1)
#res_train = model.evaluate(trainX,trainy)
#res_test = model.evaluate(testX, testy)
#print('Train acc:{:.4f} Test acc:{:.4f}'.format(res_train[1], res_test[1]))
fig=plt.figure(figsize=(8,2))
plt.subplot(121)
plt.title("LOSS")
plt.plot(h.history['loss'], label='train')
plt.plot(h.history['val_loss'], label='test')
plt.legend()
plt.subplot(122)
plt.title("ACCURACY")
plt.plot(h.history['accuracy'], label='train')
plt.plot(h.history['val_accuracy'], label='test')
plt.legend()
plt.show()
#collapse-hide
from sklearn.datasets import make_blobs
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.utils import to_categorical
import matplotlib.pyplot as plt
X,y = make_blobs(n_samples=1000,centers=3,n_features=2, cluster_std=2,
random_state=2)
y[:5]
y = to_categorical(y)
y[:5]
n_train=500
trainX,testX = X[:n_train,:],X[n_train:,:]
trainy, testy = y[n_train:],y[:n_train]
#collapse-hide
model= Sequential()
model.add(Dense(50, input_dim=2, activation='relu',
kernel_initializer='he_uniform'))
model.add(Dense(3, activation='softmax'))
opt=SGD(lr=0.01, momentum=0.9)
model.compile(loss='kullback_leibler_divergence', optimizer=opt, metrics=['accuracy'])
h = model.fit(trainX ,trainy, validation_data=(testX, testy),
epochs =100, verbose=1)
model.evaluate(trainX,trainy,verbose=1)
model.metrics_names
h.history.keys()
fig=plt.figure(figsize=(8,2))
plt.subplot(121)
plt.title("LOSS")
plt.plot(h.history['loss'], label='train')
plt.plot(h.history['val_loss'], label='test')
plt.legend()
plt.subplot(122)
plt.title("ACCURACY")
plt.plot(h.history['accuracy'], label='train')
plt.plot(h.history['val_accuracy'], label='test')
plt.legend()
plt.show()
FIN