TensorFlow1.5チュートリアル

提供: ディーズガレージ wiki
移動先: 案内検索

書き込み中の記事です。

TensorFlow1.4チュートリアルはこちら

テスト環境

Windows10のVirtualBoxでUbuntu16.04/64-bit

$ sudo apt-get install python-numpy python-dev python-pip python-wheel
$ sudo apt-get install python3-numpy python3-dev python3-pip python3-wheel
$ sudo apt-get install python-virtualenv python3-tk git openjdk-8-jdk
$ echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list
$ curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add -
$ sudo apt-get update && sudo apt-get install bazel

TensorFlowはvirtualenv版でCPUサポート版
ほぼ全ての学習データがtmpディレクトリに作成されるので注意。再起動すると消える。
貼り付けてるソースは作成時(r1.5.0)のもの。

Install >

Installing TensorFlow on Ubuntu

Google翻訳

$ virtualenv --system-site-packages -p python3 test
$ source ~/test/bin/activate
(test)$ pip3 install --upgrade tensorflow
(test)$ python
>>> import tensorflow as tf
>>> hello = tf.constant('Hello, TensorFlow!')
>>> sess = tf.Session()
>>> print(sess.run(hello))
# b'Hello, TensorFlow!'
# Ctrl+D
(test)$ deactivate
$ rm -r test

Develop > Get Started >

Getting Started for ML Beginners

Google翻訳

Iris three species.jpg

$ virtualenv --system-site-packages -p python3 get_started
$ source ~/get_started/bin/activate
(get_started)$ pip install pandas
(get_started)$ pip install --upgrade tensorflow
(get_started)$ git clone https://github.com/tensorflow/models
(get_started)$ cd models/samples/core/get_started/
(get_started)$ python premade_estimator.py
# Test set accuracy: 0.967
(get_started)$ tensorboard --logdir /tmp/tmpfsausbwi
# tmpfsausbwiはランダム文字列で毎回変化
# ブラウザ http://localhost:6006
# CTRL+C
(get_started)$ deactivate
$ cd ~
$ rm -r \
    get_started \
    models \
    /tmp/tmpfsausbwi

Getting Started with TensorFlow / Google翻訳

Checkpoints / Google翻訳

Feature Columns / Google翻訳

Datasets Quick Start / Google翻訳

Creating Custom Estimators / Google翻訳

premade_estimator.py

Premade estimator.png
"""アイリスデータセットのDNNClassifierの例"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import tensorflow as tf

import iris_data

parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=100, type=int, help='batch size')
parser.add_argument(
    '--train_steps', default=1000, type=int, help='number of training steps')


def main(argv):
    args = parser.parse_args(argv[1:])

    # データを取得する
    (train_x, train_y), (test_x, test_y) = iris_data.load_data()

    # フィーチャーカラムは入力値をどのように扱うか記述します
    my_feature_columns = []
    for key in train_x.keys():
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

    # 2つの隠れ層DNNそれぞれ10,10ユニットを構築
    classifier = tf.estimator.DNNClassifier(
        feature_columns=my_feature_columns,
        # 隠れ層の数(この場合は2)を示す
        # 隠れ層(第1の隠れ層では10、第2の隠れ層では10)のニューロンの数を表す
        hidden_units=[10, 10],
        # 3つのアイリス種を分類するので n_classesを3に設定
        n_classes=3)

    # モデルを訓練する
    classifier.train(
        input_fn=
        lambda: iris_data.train_input_fn(train_x, train_y, args.batch_size),
        steps=args.train_steps)

    # モデルを評価する
    eval_result = classifier.evaluate(
        input_fn=
        lambda: iris_data.eval_input_fn(test_x, test_y, args.batch_size))

    print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

    # モデルから予測を生成する
    expected = ['Setosa', 'Versicolor', 'Virginica']
    predict_x = {
        'SepalLength': [5.1, 5.9, 6.9],
        'SepalWidth': [3.3, 3.0, 3.1],
        'PetalLength': [1.7, 4.2, 5.4],
        'PetalWidth': [0.5, 1.5, 2.1],
    }

    predictions = classifier.predict(
        input_fn=lambda: iris_data.eval_input_fn(predict_x,
                                                 labels=None,
                                                 batch_size=args.batch_size))

    for pred_dict, expec in zip(predictions, expected):
        template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')

        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]

        print(
            template.format(iris_data.SPECIES[class_id], 100 * probability,
                            expec))


if __name__ == '__main__':
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.app.run(main)

iris_data.py

import pandas as pd
import tensorflow as tf

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = [
    'SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species'
]
SPECIES = ['Setosa', 'Versicolor', 'Virginica']


def maybe_download():
    train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)

    return train_path, test_path


def load_data(y_name='Species'):
    """アイリスデータセットを(train_x, train_y), (test_x, test_y)として返します"""
    train_path, test_path = maybe_download()

    train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
    train_x, train_y = train, train.pop(y_name)

    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
    test_x, test_y = test, test.pop(y_name)

    return (train_x, train_y), (test_x, test_y)


def train_input_fn(features, labels, batch_size):
    """トレーニング用の入力関数"""
    # 入力をデータセットに変換します
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # データセットをシャッフル、リピート、バッチします
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # データセットを返します
    return dataset


def eval_input_fn(features, labels, batch_size):
    """評価または予測用の入力関数"""
    features = dict(features)
    if labels is None:
        # ラベルなし、フィーチャーのみ使用
        inputs = features
    else:
        inputs = (features, labels)

    # 入力をデータセットに変換します
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # データセットをバッチする
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # データセットを返します
    return dataset


# 以下には`Dataset`クラスを使用して実装されたcsvパーサーの簡単な例が含まれています
# `tf.parse_csv`は`record_defaults`引数で与えられた例と一致する出力のタイプを設定します
CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]]


def _parse_line(line):
    # 行をそのフィールドにデコードする
    fields = tf.decode_csv(line, record_defaults=CSV_TYPES)

    # 結果を辞書にパックする
    features = dict(zip(CSV_COLUMN_NAMES, fields))

    # フィーチャーからラベルを分離する
    label = features.pop('Species')

    return features, label


def csv_input_fn(csv_path, batch_size):
    # テキスト行を含むデータセットを作成します
    dataset = tf.data.TextLineDataset(csv_path).skip(1)

    # 各行を解析します
    dataset = dataset.map(_parse_line)

    # データセットをシャッフル、リピート、バッチします
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # データセットを返します
    return dataset

Develop > Tutorials > Images >

A Guide to TF Layers: Building a Convolutional Neural Network

Google翻訳

Mnist 0-9.png

$ virtualenv --system-site-packages -p python3 layers
$ source ~/layers/bin/activate
(layers)$ pip install --upgrade tensorflow
(layers)$ gedit cnn_mnist.py
# コピーペースト
(layers)$ python cnn_mnist.py
# {'accuracy': 0.7254, 'loss': 1.7511774, 'global_step': 1000}
(layers)$ tensorboard --logdir /tmp/mnist_convnet_model
# ブラウザ http://localhost:6006
# CTRL+C
(layers)$ deactivate
$ rm -r \
    layers \
    MNIST-data \
    cnn_mnist.py \
    /tmp/mnist_convnet_model

cnn_mnist.py

Cnn mnist.png
"""tf.layersで構築されたMNISTの畳み込みニューラルネットワーク推定器"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)


def cnn_model_fn(features, labels, mode):
    """CNNのモデル関数"""
    # 入力レイヤー
    # Reshape X to 4-D tensor: [batch_size, width, height, channels]
    # MNIST images are 28x28 pixels, and have one color channel
    input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

    # 畳み込み層#1
    # Computes 32 features using a 5x5 filter with ReLU activation.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 28, 28, 1]
    # Output Tensor Shape: [batch_size, 28, 28, 32]
    conv1 = tf.layers.conv2d(
        inputs=input_layer,
        filters=32,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)

    # プール層#1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 28, 28, 32]
    # Output Tensor Shape: [batch_size, 14, 14, 32]
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # 畳み込み層#2
    # Computes 64 features using a 5x5 filter.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 14, 14, 32]
    # Output Tensor Shape: [batch_size, 14, 14, 64]
    conv2 = tf.layers.conv2d(
        inputs=pool1,
        filters=64,
        kernel_size=[5, 5],
        padding="same",
        activation=tf.nn.relu)

    # プール層#2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 14, 14, 64]
    # Output Tensor Shape: [batch_size, 7, 7, 64]
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 7, 7, 64]
    # Output Tensor Shape: [batch_size, 7 * 7 * 64]
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])

    # 高密度層
    # Densely connected layer with 1024 neurons
    # Input Tensor Shape: [batch_size, 7 * 7 * 64]
    # Output Tensor Shape: [batch_size, 1024]
    dense = tf.layers.dense(
        inputs=pool2_flat, units=1024, activation=tf.nn.relu)

    # Add dropout operation; 0.6 probability that element will be kept
    dropout = tf.layers.dropout(
        inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits層
    # Input Tensor Shape: [batch_size, 1024]
    # Output Tensor Shape: [batch_size, 10]
    logits = tf.layers.dense(inputs=dropout, units=10)

    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(input=logits, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # 損失を計算 (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
            loss=loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(
            mode=mode, loss=loss, train_op=train_op)

    # 評価指標を追加 (for EVAL mode)
    eval_metric_ops = {
        "accuracy":
        tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
    }
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)


def main(unused_argv):
    # トレーニングと評価データを読み込む
    mnist = tf.contrib.learn.datasets.load_dataset("mnist")
    train_data = mnist.train.images  # Returns np.array
    train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
    eval_data = mnist.test.images  # Returns np.array
    eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

    # Estimatorの作成
    mnist_classifier = tf.estimator.Estimator(
        model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")

    # 予測のログを設定
    # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=50)

    # モデルをトレーニングする
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        batch_size=100,
        num_epochs=None,
        shuffle=True)
    mnist_classifier.train(
        input_fn=train_input_fn, steps=20000, hooks=[logging_hook])

    # モデルの評価と結果の印刷
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False)
    eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)


if __name__ == "__main__":
    tf.app.run()