import tensorflow as tf import numpy as np from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split # Load data iris = load_iris() X = iris.data[:, (2,3)] y = (iris.target == 0).astype(np.int) # Split data into train (70%) and test (30%) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42) # Automatically find columns feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train) # Create and train a deep neural network classifier dnn_classifier = tf.contrib.learn.DNNClassifier(hidden_units=[30, 10], n_classes=10, feature_columns=feature_columns) dnn_classifier.fit(x=X_train, y=y_train, batch_size=50, steps=5000) # Predict on test data y_predicted = dnn_classifier.predict(X_test) # Measure accuracy on test data from sklearn.metrics import accuracy_score print(accuracy_score(y_test, list(y_predicted)))
By default, the DNNClassifier uses the ReLU activation function, which can be changed with 'activation_fn' hyperparameter.
DNNClassifier is great for quickly creating deep nets, however, it only allows limited flexibility. In the next section we'll learn about the low-level APIs from Tensorflow which would allow any level of customization.
import tensorflow as tf import numpy as np n_inputs = 28*28 n_outputs = 10 # Add placeholders for input data X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X') y = tf.placeholder(tf.int64, shape=(None), name='y') # Create a utility to produce one hidden layer at a time def neuron_layer(X, n_neurons, name, activation=None): with tf.name_scope(name): n_inputs = int(X.get_shape()[1]) # X[1] is features, X[0] is samples # Create a variable initializer (the particular method is discussed below) stddev = 2 / np.sqrt(n_inputs) init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev) # Weights, biases and output W = tf.Variable(init, name='weights') b = tf.Variable(tf.zeros([n_neurons]), name='biases') z = tf.matmul(X, W) + b if activation == 'relu': return tf.nn.relu(z) else: return z # Create the deep neural network (DNN) with tf.name_scope('dnn'): # Create a hidden layer with 300 neurons hidden1 = neuron_layer(X, 300, 'hidden1', activation='relu') # Create another hidden layer with 100 neurons hidden2 = neuron_layer(hidden1, 100, 'hidden2', activation='relu') logits = neuron_layer(hidden2, n_outputs, 'outputs') # Define loss function (cross-entropy) with tf.name_scope('loss'): xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logits) loss = tf.reduce_mean(xentropy, name='loss') # Define the optimizer, i.e. loss minimizer (GradientDescent) for training learning_rate = 0.01 with tf.name_scope('train'): optimizer = tf.train.GradientDescentOptimizer(learning_rate) training_op = optimizer.minimize(loss) # Define metric to compute while training (accuracy) with tf.name_scope('eval'): correct = tf.nn.in_top_k(logits, y, 1) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) # Load input data from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('/tmp/data') # Train the model init = tf.global_variables_initializer() saver = tf.train.Saver() n_epochs = 10 batch_size = 100 iterations = mnist.train.num_examples // batch_size with tf.Session() as sess: init.run() for epoch in range(n_epochs): for iteration in range(iterations): X_batch, y_batch = mnist.train.next_batch(batch_size) sess.run(training_op, feed_dict = {X: X_batch, y: y_batch}) # Compute accuracy acc_train = accuracy.eval(feed_dict = {X: mnist.train.images, y: mnist.train.labels}) acc_test = accuracy.eval(feed_dict = {X: mnist.test.images, y: mnist.test.labels}) print('epoch:', epoch) print('train accuracy:', acc_train) print('test accuracy:', acc_test) save_path = saver.save(sess, './dnn_model.ckpt')
stddev=2 / np.sqrt(n_inputs) init=tf.truncated_normal((n_inputs,n_neurons),stddev=stddev)
This works great, however we created the hidden layers with Tensorflow's manually with the neuron_layer method, which is quite cumbersome. In the next section we'll discuss a way to avoid that.
# Create the deep neural network (DNN) from tensorflow.contrib.layers import fully_connected with tf.name_scope('dnn'): hidden1 = fully_connected(X, 300, scope='hidden1') hidden2 = fully_connected(hidden1, 100, scope='hidden2') logits = fully_connected(hidden2, n_outputs, scope='outputs', activation_fn=None)
with tf.Session() as sess: # Restore model saver.restore(sess, 'dnn_model.ckpt') # Get images to predict labels X_new_scaled = [..] # Some images # Evaluate Z = logits.eval(feed_dict = {X: X_new_scaled}) y_pred = np.argmax(Z, axis=1)