!pip install seaborn
!pip install tensorflow
Requirement already satisfied: seaborn in /srv/paws/lib/python3.6/site-packages
Collecting tensorflow
  Downloading https://files.pythonhosted.org/packages/77/63/a9fa76de8dffe7455304c4ed635be4aa9c0bacef6e0633d87d5f54530c5c/tensorflow-1.13.1-cp36-cp36m-manylinux1_x86_64.whl (92.5MB)
    100% |████████████████████████████████| 92.5MB 4.8kB/s eta 0:00:01  6% |██▏                             | 6.3MB 14.2MB/s eta 0:00:07    52% |████████████████▋               | 48.2MB 26.3MB/s eta 0:00:02    59% |███████████████████             | 54.8MB 23.3MB/s eta 0:00:02    74% |████████████████████████        | 69.1MB 27.7MB/s eta 0:00:01    79% |█████████████████████████▌      | 73.8MB 29.9MB/s eta 0:00:01    81% |██████████████████████████      | 75.1MB 26.9MB/s eta 0:00:01    94% |██████████████████████████████▍ | 87.8MB 24.9MB/s eta 0:00:01
Collecting termcolor>=1.1.0 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/8a/48/a76be51647d0eb9f10e2a4511bf3ffb8cc1e6b14e9e4fab46173aa79f981/termcolor-1.1.0.tar.gz
Requirement already satisfied: numpy>=1.13.3 in /srv/paws/lib/python3.6/site-packages (from tensorflow)
Collecting gast>=0.2.0 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Collecting keras-preprocessing>=1.0.5 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/c0/bf/0315ef6a9fd3fc2346e85b0ff1f5f83ca17073f2c31ac719ab2e4da0d4a3/Keras_Preprocessing-1.0.9-py2.py3-none-any.whl (59kB)
    100% |████████████████████████████████| 61kB 1.6MB/s eta 0:00:01
Collecting tensorboard<1.14.0,>=1.13.0 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/0f/39/bdd75b08a6fba41f098b6cb091b9e8c7a80e1b4d679a581a0ccd17b10373/tensorboard-1.13.1-py3-none-any.whl (3.2MB)
    100% |████████████████████████████████| 3.2MB 136kB/s eta 0:00:01
Collecting tensorflow-estimator<1.14.0rc0,>=1.13.0 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/bb/48/13f49fc3fa0fdf916aa1419013bb8f2ad09674c275b4046d5ee669a46873/tensorflow_estimator-1.13.0-py2.py3-none-any.whl (367kB)
    100% |████████████████████████████████| 368kB 873kB/s eta 0:00:01
Collecting grpcio>=1.8.6 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/f4/dc/5503d89e530988eb7a1aed337dcb456ef8150f7c06132233bd9e41ec0215/grpcio-1.19.0-cp36-cp36m-manylinux1_x86_64.whl (10.8MB)
    100% |████████████████████████████████| 10.8MB 40kB/s  eta 0:00:01   31% |██████████▏                     | 3.4MB 28.9MB/s eta 0:00:01    79% |█████████████████████████▌      | 8.6MB 22.7MB/s eta 0:00:01
Collecting keras-applications>=1.0.6 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/90/85/64c82949765cfb246bbdaf5aca2d55f400f792655927a017710a78445def/Keras_Applications-1.0.7-py2.py3-none-any.whl (51kB)
    100% |████████████████████████████████| 61kB 2.0MB/s eta 0:00:01
Collecting astor>=0.6.0 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/35/6b/11530768cac581a12952a2aad00e1526b89d242d0b9f59534ef6e6a1752f/astor-0.7.1-py2.py3-none-any.whl
Collecting absl-py>=0.1.6 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/da/3f/9b0355080b81b15ba6a9ffcf1f5ea39e307a2778b2f2dc8694724e8abd5b/absl-py-0.7.1.tar.gz (99kB)
    100% |████████████████████████████████| 102kB 2.3MB/s ta 0:00:01
Collecting protobuf>=3.6.1 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/5a/aa/a858df367b464f5e9452e1c538aa47754d467023850c00b000287750fa77/protobuf-3.7.1-cp36-cp36m-manylinux1_x86_64.whl (1.2MB)
    100% |████████████████████████████████| 1.2MB 305kB/s eta 0:00:01
Collecting wheel>=0.26 (from tensorflow)
  Downloading https://files.pythonhosted.org/packages/96/ba/a4702cbb6a3a485239fbe9525443446203f00771af9ac000fa3ef2788201/wheel-0.33.1-py2.py3-none-any.whl
Requirement already satisfied: six>=1.10.0 in /srv/paws/lib/python3.6/site-packages (from tensorflow)
Requirement already satisfied: werkzeug>=0.11.15 in /srv/paws/lib/python3.6/site-packages (from tensorboard<1.14.0,>=1.13.0->tensorflow)
Collecting markdown>=2.6.8 (from tensorboard<1.14.0,>=1.13.0->tensorflow)
  Downloading https://files.pythonhosted.org/packages/f5/e4/d8c18f2555add57ff21bf25af36d827145896a07607486cc79a2aea641af/Markdown-3.1-py2.py3-none-any.whl (87kB)
    100% |████████████████████████████████| 92kB 2.1MB/s eta 0:00:01
Collecting mock>=2.0.0 (from tensorflow-estimator<1.14.0rc0,>=1.13.0->tensorflow)
  Downloading https://files.pythonhosted.org/packages/e6/35/f187bdf23be87092bd0f1200d43d23076cee4d0dec109f195173fd3ebc79/mock-2.0.0-py2.py3-none-any.whl (56kB)
    100% |████████████████████████████████| 61kB 1.9MB/s eta 0:00:01
Collecting h5py (from keras-applications>=1.0.6->tensorflow)
  Downloading https://files.pythonhosted.org/packages/30/99/d7d4fbf2d02bb30fb76179911a250074b55b852d34e98dd452a9f394ac06/h5py-2.9.0-cp36-cp36m-manylinux1_x86_64.whl (2.8MB)
    100% |████████████████████████████████| 2.8MB 159kB/s eta 0:00:01
Requirement already satisfied: setuptools in /srv/paws/lib/python3.6/site-packages (from protobuf>=3.6.1->tensorflow)
Collecting pbr>=0.11 (from mock>=2.0.0->tensorflow-estimator<1.14.0rc0,>=1.13.0->tensorflow)
  Downloading https://files.pythonhosted.org/packages/14/09/12fe9a14237a6b7e0ba3a8d6fcf254bf4b10ec56a0185f73d651145e9222/pbr-5.1.3-py2.py3-none-any.whl (107kB)
    100% |████████████████████████████████| 112kB 2.1MB/s eta 0:00:01
Building wheels for collected packages: termcolor, gast, absl-py
  Running setup.py bdist_wheel for termcolor ... error
  Complete output from command /srv/paws/bin/python3.6 -u -c "import setuptools, tokenize;__file__='/tmp/pip-build-e9svcxik/termcolor/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d /tmp/tmp0h9_3p8dpip-wheel- --python-tag cp36:
  usage: -c [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...]
     or: -c --help [cmd1 cmd2 ...]
     or: -c --help-commands
     or: -c cmd --help
  
  error: invalid command 'bdist_wheel'
  
  ----------------------------------------
  Failed building wheel for termcolor
  Running setup.py clean for termcolor
  Running setup.py bdist_wheel for gast ... error
  Complete output from command /srv/paws/bin/python3.6 -u -c "import setuptools, tokenize;__file__='/tmp/pip-build-e9svcxik/gast/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d /tmp/tmpzdvd3lk5pip-wheel- --python-tag cp36:
  usage: -c [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...]
     or: -c --help [cmd1 cmd2 ...]
     or: -c --help-commands
     or: -c cmd --help
  
  error: invalid command 'bdist_wheel'
  
  ----------------------------------------
  Failed building wheel for gast
  Running setup.py clean for gast
  Running setup.py bdist_wheel for absl-py ... error
  Complete output from command /srv/paws/bin/python3.6 -u -c "import setuptools, tokenize;__file__='/tmp/pip-build-e9svcxik/absl-py/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d /tmp/tmp2s5dtfm_pip-wheel- --python-tag cp36:
  /usr/lib/python3.6/distutils/dist.py:261: UserWarning: Unknown distribution option: 'long_description_content_type'
    warnings.warn(msg)
  usage: -c [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...]
     or: -c --help [cmd1 cmd2 ...]
     or: -c --help-commands
     or: -c cmd --help
  
  error: invalid command 'bdist_wheel'
  
  ----------------------------------------
  Failed building wheel for absl-py
  Running setup.py clean for absl-py
Failed to build termcolor gast absl-py
Installing collected packages: termcolor, gast, keras-preprocessing, absl-py, grpcio, wheel, protobuf, markdown, tensorboard, pbr, mock, tensorflow-estimator, h5py, keras-applications, astor, tensorflow
  Running setup.py install for termcolor ... done
  Running setup.py install for gast ... done
  Running setup.py install for absl-py ... done
Successfully installed absl-py-0.7.1 astor-0.7.1 gast-0.2.2 grpcio-1.19.0 h5py-2.9.0 keras-applications-1.0.7 keras-preprocessing-1.0.9 markdown-3.1 mock-2.0.0 pbr-5.1.3 protobuf-3.7.1 tensorboard-1.13.1 tensorflow-1.13.1 tensorflow-estimator-1.13.0 termcolor-1.1.0 wheel-0.33.1
from __future__ import absolute_import, division, print_function

import pathlib

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print(tf.__version__)
1.13.1
"""
This notebook uses the classic Auto MPG Dataset and builds a model to predict 
the fuel efficiency of late-1970s and early 1980s automobiles. To do this, 
we'll provide the model with a description of many automobiles from that time period. 
This description includes attributes like: cylinders, displacement, horsepower, and weight.
"""

dataset_path = keras.utils.get_file("auto-mpg.data", "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
dataset_path
Downloading data from https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data
32768/30286 [================================] - 0s 2us/step
'/home/paws/.keras/datasets/auto-mpg.data'
# import dataset using pandas
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin'] 
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

dataset = raw_dataset.copy()
dataset.tail()
MPG Cylinders Displacement Horsepower Weight Acceleration Model Year Origin
393 27.0 4 140.0 86.0 2790.0 15.6 82 1
394 44.0 4 97.0 52.0 2130.0 24.6 82 2
395 32.0 4 135.0 84.0 2295.0 11.6 82 1
396 28.0 4 120.0 79.0 2625.0 18.6 82 1
397 31.0 4 119.0 82.0 2720.0 19.4 82 1
# drop row with bad values
dataset.isna().sum()
dataset = dataset.dropna()
# drop category colums (values "USA, japan, other" and convert to three collums)
# USA, Japan, Other each of which takes a 0/1 value
origin = dataset.pop('Origin')

dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail()
MPG Cylinders Displacement Horsepower Weight Acceleration Model Year USA Europe Japan
393 27.0 4 140.0 86.0 2790.0 15.6 82 1.0 0.0 0.0
394 44.0 4 97.0 52.0 2130.0 24.6 82 0.0 1.0 0.0
395 32.0 4 135.0 84.0 2295.0 11.6 82 1.0 0.0 0.0
396 28.0 4 120.0 79.0 2625.0 18.6 82 1.0 0.0 0.0
397 31.0 4 119.0 82.0 2720.0 19.4 82 1.0 0.0 0.0
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
sns.pairplot(train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde")
<seaborn.axisgrid.PairGrid at 0x7ff6cb4c4c50>
# look at general stats for data
train_stats = train_dataset.describe()
train_stats.pop("MPG") # miles per galon
train_stats = train_stats.transpose()
train_stats
count mean std min 25% 50% 75% max
Cylinders 314.0 5.477707 1.699788 3.0 4.00 4.0 8.00 8.0
Displacement 314.0 195.318471 104.331589 68.0 105.50 151.0 265.75 455.0
Horsepower 314.0 104.869427 38.096214 46.0 76.25 94.5 128.00 225.0
Weight 314.0 2990.251592 843.898596 1649.0 2256.50 2822.5 3608.00 5140.0
Acceleration 314.0 15.559236 2.789230 8.0 13.80 15.5 17.20 24.8
Model Year 314.0 75.898089 3.675642 70.0 73.00 76.0 79.00 82.0
USA 314.0 0.624204 0.485101 0.0 0.00 1.0 1.00 1.0
Europe 314.0 0.178344 0.383413 0.0 0.00 0.0 0.00 1.0
Japan 314.0 0.197452 0.398712 0.0 0.00 0.0 0.00 1.0
# separate label and features

train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')

# normalize features
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
"""
Let's build our model. Here, we'll use a Sequential model with two densely 
connected hidden layers, and an output layer that returns a single, continuous value
"""


def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]), #needs to know number of inputs
    layers.Dense(64, activation=tf.nn.relu), #relu activation y = max(0,x) linear if x>0
    layers.Dense(1) # no activation cause it is a linear function
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)

  model.compile(loss='mean_squared_error',
                optimizer=optimizer,
                metrics=['mean_absolute_error', 'mean_squared_error']) # metrics calculated to see how well model is doing
  return model

model = build_model()
WARNING:tensorflow:From /srv/paws/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING:tensorflow:From /srv/paws/lib/python3.6/site-packages/tensorflow/python/keras/utils/losses_utils.py:170: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 64)                640       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
=================================================================
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________
# take 10 examples from normed train data
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result
array([[-0.08835188],
       [ 0.10313454],
       [ 0.77511406],
       [-0.07469881],
       [ 0.05337571],
       [ 0.37775832],
       [ 0.05601408],
       [ 0.23992512],
       [ 0.44722676],
       [ 0.15501332]], dtype=float32)
# Display training progress by printing a single dot for each completed epoch
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

# an ephoch is a full iteration over samples
EPOCHS = 1000

history = model.fit(
  normed_train_data, train_labels,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[PrintDot()])
WARNING:tensorflow:From /srv/paws/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.

....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
# visualize model training process
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

# look at loss and validation loss
#Overfitting if: training loss << validation loss

#Underfitting if: training loss >> validation loss

#Just right if training loss ~ validation loss
# loss : difference to value, bias
# validation_loss: variance, variability of model prediction
loss mean_absolute_error mean_squared_error val_loss val_mean_absolute_error val_mean_squared_error epoch
995 2.553091 1.037462 2.553091 8.908630 2.361032 8.908629 995
996 2.691000 1.060704 2.691000 8.561385 2.271480 8.561386 996
997 2.480490 1.008589 2.480490 8.915443 2.363787 8.915442 997
998 2.583865 1.036895 2.583864 8.278137 2.224650 8.278136 998
999 2.501532 1.053180 2.501533 8.348103 2.199461 8.348102 999
# plot history, see that from epoch 100 there is little variation
def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch
  
  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [MPG]')
  plt.plot(hist['epoch'], hist['mean_absolute_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
           label = 'Val Error')
  plt.ylim([0,5])
  plt.legend()
  
  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error [$MPG^2$]')
  plt.plot(hist['epoch'], hist['mean_squared_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_squared_error'],
           label = 'Val Error')
  plt.ylim([0,20])
  plt.legend()
  plt.show()


plot_history(history)
"""
Looks like model does not need to iterate for 1000 epochs, orange going up means 
we are overfitting (high variance= high validation loss)


We'll use an EarlyStopping callback that tests a training condition for every epoch. 
If a set amount of epochs elapses without showing improvement, then automatically stop the training.
"""
model = build_model()

# The patience parameter is the amount of epochs to check for improvement
# it will wait for 10 iterations before calling it quits
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(normed_train_data, train_labels, epochs=EPOCHS,
                    validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot()])

plot_history(history)

# look at plot, both are looking down
............................................................
# evaluate model with test data

loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)
print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))

# predict values using data on testing set:
test_predictions = model.predict(normed_test_data).flatten()

plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
Testing set Mean Abs Error:  1.89 MPG
# let's look at error distribution
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")

# looks "somewhat" gaussian but given that number of samples is very small we cannot expect much else
#