博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
神经网络调参实战(四)—— 加深网络层次 & 批归一化 batch normalization
阅读量:2135 次
发布时间:2019-04-30

本文共 27642 字,大约阅读时间需要 92 分钟。

我们使用数据增强的方法已经将模型准确率从78.04%提升到了82.6%

 

在原来数据增强的code基础上

import tensorflow as tfimport osimport pickleimport numpy as np CIFAR_DIR = "dataset/cifar-10-batches-py"# print(os.listdir(CIFAR_DIR))  def load_data(filename):    """read data from data file."""    with open(filename, 'rb') as f:        data = pickle.load(f, encoding='bytes')        return data[b'data'], data[b'labels'] # tensorflow.Dataset.class CifarData:    def __init__(self, filenames, need_shuffle):        all_data = []        all_labels = []        for filename in filenames:            data, labels = load_data(filename)            all_data.append(data)            all_labels.append(labels)        self._data = np.vstack(all_data)         self._labels = np.hstack(all_labels)        print(self._data.shape)        print(self._labels.shape)                self._num_examples = self._data.shape[0]        self._need_shuffle = need_shuffle        self._indicator = 0        if self._need_shuffle:            self._shuffle_data()                def _shuffle_data(self):        # [0,1,2,3,4,5] -> [5,3,2,4,0,1]        p = np.random.permutation(self._num_examples)        self._data = self._data[p]        self._labels = self._labels[p]        def next_batch(self, batch_size):        """return batch_size examples as a batch."""        end_indicator = self._indicator + batch_size        if end_indicator > self._num_examples:            if self._need_shuffle:                self._shuffle_data()                self._indicator = 0                end_indicator = batch_size            else:                raise Exception("have no more examples")        if end_indicator > self._num_examples:            raise Exception("batch size is larger than all examples")        batch_data = self._data[self._indicator: end_indicator]        batch_labels = self._labels[self._indicator: end_indicator]        self._indicator = end_indicator        return batch_data, batch_labels train_filenames = [os.path.join(CIFAR_DIR, 'data_batch_%d' % i) for i in range(1, 6)]test_filenames = [os.path.join(CIFAR_DIR, 'test_batch')] train_data = CifarData(train_filenames, True)test_data = CifarData(test_filenames, False)  batch_size = 20x = tf.placeholder(tf.float32, [batch_size, 3072])y = tf.placeholder(tf.int64, [batch_size])# [None], eg: [0,5,6,3]x_image = tf.reshape(x, [-1, 3, 32, 32])# 32*32x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])#此时x_image   (20, 32, 32, 3) x_image_arr = tf.split(x_image,num_or_size_splits=batch_size,axis=0)result_x_image_arr = [] for x_single_image in x_image_arr:    #x_single_image: [1,32,32,3] => [32,32,3]    x_single_image = tf.reshape(x_single_image,[32,32,3])    data_aug_1 = tf.image.random_flip_left_right(x_single_image)    data_aug_2 = tf.image.random_brightness(data_aug_1, max_delta=63)    data_aug_3 = tf.image.random_contrast(data_aug_2,lower=0.2,upper=1.8) #改变对比度的最小值是0.2,最大值是0.8    x_single_image = tf.reshape(data_aug_3,[1,32,32,3])    result_x_image_arr.append(x_single_image) result_x_images = tf.concat(result_x_image_arr, axis=0)normal_result_x_images = result_x_images / 127.5 - 1 # conv1: 神经元图, feature_map, 输出图像conv1_1 = tf.layers.conv2d(normal_result_x_images,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv1_1')conv1_2 = tf.layers.conv2d(conv1_1,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv1_2') # 16 * 16pooling1 = tf.layers.max_pooling2d(conv1_2,                                   (2, 2), # kernel size                                   (2, 2), # stride                                   name = 'pool1')  conv2_1 = tf.layers.conv2d(pooling1,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv2_1')conv2_2 = tf.layers.conv2d(conv2_1,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv2_2')# 8 * 8pooling2 = tf.layers.max_pooling2d(conv2_2,                                   (2, 2), # kernel size                                   (2, 2), # stride                                   name = 'pool2') conv3_1 = tf.layers.conv2d(pooling2,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv3_1')conv3_2 = tf.layers.conv2d(conv3_1,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv3_2')# 4 * 4 * 32pooling3 = tf.layers.max_pooling2d(conv3_2,                                   (2, 2), # kernel size                                   (2, 2), # stride                                   name = 'pool3')# [None, 4 * 4 * 32]flatten = tf.layers.flatten(pooling3)y_ = tf.layers.dense(flatten, 10) loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)# y_ -> sofmax# y -> one_hot# loss = ylogy_ # indicespredict = tf.argmax(y_, 1)# [1,0,1,1,1,0,0,0]correct_prediction = tf.equal(predict, y)accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64)) with tf.name_scope('train_op'):    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)     init = tf.global_variables_initializer()train_steps = 10000test_steps = 100 # train 10k: 73.4%with tf.Session() as sess:    sess.run(init)    for i in range(train_steps):        batch_data, batch_labels = train_data.next_batch(batch_size)        loss_val, acc_val, _ = sess.run(            [loss, accuracy, train_op],            feed_dict={                x: batch_data,                y: batch_labels})        if (i+1) % 100 == 0:            print('[Train] Step: %d, loss: %4.5f, acc: %4.5f'                   % (i+1, loss_val, acc_val))        if (i+1) % 1000 == 0:            test_data = CifarData(test_filenames, False)            all_test_acc_val = []            for j in range(test_steps):                test_batch_data, test_batch_labels \                    = test_data.next_batch(batch_size)                test_acc_val = sess.run(                    [accuracy],                    feed_dict = {                        x: test_batch_data,                         y: test_batch_labels                    })                all_test_acc_val.append(test_acc_val)            test_acc = np.mean(all_test_acc_val)            print('[Test ] Step: %d, acc: %4.5f' % (i+1, test_acc))

 

加深网络层次

我们给代码再加几个卷积层,将7层的神经网络,拓展成10层的

分别加了conv1_3, conv2_3, conv3_3

import tensorflow as tfimport osimport pickleimport numpy as np CIFAR_DIR = "dataset/cifar-10-batches-py"# print(os.listdir(CIFAR_DIR))  def load_data(filename):    """read data from data file."""    with open(filename, 'rb') as f:        data = pickle.load(f, encoding='bytes')        return data[b'data'], data[b'labels'] # tensorflow.Dataset.class CifarData:    def __init__(self, filenames, need_shuffle):        all_data = []        all_labels = []        for filename in filenames:            data, labels = load_data(filename)            all_data.append(data)            all_labels.append(labels)        self._data = np.vstack(all_data)         self._labels = np.hstack(all_labels)        print(self._data.shape)        print(self._labels.shape)                self._num_examples = self._data.shape[0]        self._need_shuffle = need_shuffle        self._indicator = 0        if self._need_shuffle:            self._shuffle_data()                def _shuffle_data(self):        # [0,1,2,3,4,5] -> [5,3,2,4,0,1]        p = np.random.permutation(self._num_examples)        self._data = self._data[p]        self._labels = self._labels[p]        def next_batch(self, batch_size):        """return batch_size examples as a batch."""        end_indicator = self._indicator + batch_size        if end_indicator > self._num_examples:            if self._need_shuffle:                self._shuffle_data()                self._indicator = 0                end_indicator = batch_size            else:                raise Exception("have no more examples")        if end_indicator > self._num_examples:            raise Exception("batch size is larger than all examples")        batch_data = self._data[self._indicator: end_indicator]        batch_labels = self._labels[self._indicator: end_indicator]        self._indicator = end_indicator        return batch_data, batch_labels train_filenames = [os.path.join(CIFAR_DIR, 'data_batch_%d' % i) for i in range(1, 6)]test_filenames = [os.path.join(CIFAR_DIR, 'test_batch')] train_data = CifarData(train_filenames, True)test_data = CifarData(test_filenames, False)  batch_size = 20x = tf.placeholder(tf.float32, [batch_size, 3072])y = tf.placeholder(tf.int64, [batch_size])# [None], eg: [0,5,6,3]x_image = tf.reshape(x, [-1, 3, 32, 32])# 32*32x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])#此时x_image   (20, 32, 32, 3) x_image_arr = tf.split(x_image,num_or_size_splits=batch_size,axis=0)result_x_image_arr = [] for x_single_image in x_image_arr:    #x_single_image: [1,32,32,3] => [32,32,3]    x_single_image = tf.reshape(x_single_image,[32,32,3])    data_aug_1 = tf.image.random_flip_left_right(x_single_image)    data_aug_2 = tf.image.random_brightness(data_aug_1, max_delta=63)    data_aug_3 = tf.image.random_contrast(data_aug_2,lower=0.2,upper=1.8) #改变对比度的最小值是0.2,最大值是0.8    x_single_image = tf.reshape(data_aug_3,[1,32,32,3])    result_x_image_arr.append(x_single_image) result_x_images = tf.concat(result_x_image_arr, axis=0)normal_result_x_images = result_x_images / 127.5 - 1 # conv1: 神经元图, feature_map, 输出图像conv1_1 = tf.layers.conv2d(normal_result_x_images,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv1_1')conv1_2 = tf.layers.conv2d(conv1_1,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv1_2')conv1_3 = tf.layers.conv2d(conv1_2,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv1_3') # 16 * 16pooling1 = tf.layers.max_pooling2d(conv1_3,                                   (2, 2), # kernel size                                   (2, 2), # stride                                   name = 'pool1')  conv2_1 = tf.layers.conv2d(pooling1,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv2_1')conv2_2 = tf.layers.conv2d(conv2_1,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv2_2')conv2_3 = tf.layers.conv2d(conv2_2,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv2_3')# 8 * 8pooling2 = tf.layers.max_pooling2d(conv2_3,                                   (2, 2), # kernel size                                   (2, 2), # stride                                   name = 'pool2') conv3_1 = tf.layers.conv2d(pooling2,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv3_1')conv3_2 = tf.layers.conv2d(conv3_1,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv3_2')                conv3_3 = tf.layers.conv2d(conv3_2,                           32, # output channel number                           (3,3), # kernel size                           padding = 'same',                           activation = tf.nn.relu,                           name = 'conv3_3')# 4 * 4 * 32pooling3 = tf.layers.max_pooling2d(conv3_3,                                   (2, 2), # kernel size                                   (2, 2), # stride                                   name = 'pool3')# [None, 4 * 4 * 32]flatten = tf.layers.flatten(pooling3)y_ = tf.layers.dense(flatten, 10) loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)# y_ -> sofmax# y -> one_hot# loss = ylogy_ # indicespredict = tf.argmax(y_, 1)# [1,0,1,1,1,0,0,0]correct_prediction = tf.equal(predict, y)accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64)) with tf.name_scope('train_op'):    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)     init = tf.global_variables_initializer()train_steps = 10000test_steps = 100 # train 10k: 73.4%with tf.Session() as sess:    sess.run(init)    for i in range(train_steps):        batch_data, batch_labels = train_data.next_batch(batch_size)        loss_val, acc_val, _ = sess.run(            [loss, accuracy, train_op],            feed_dict={                x: batch_data,                y: batch_labels})        if (i+1) % 100 == 0:            print('[Train] Step: %d, loss: %4.5f, acc: %4.5f'                   % (i+1, loss_val, acc_val))        if (i+1) % 1000 == 0:            test_data = CifarData(test_filenames, False)            all_test_acc_val = []            for j in range(test_steps):                test_batch_data, test_batch_labels \                    = test_data.next_batch(batch_size)                test_acc_val = sess.run(                    [accuracy],                    feed_dict = {                        x: test_batch_data,                         y: test_batch_labels                    })                all_test_acc_val.append(test_acc_val)            test_acc = np.mean(all_test_acc_val)            print('[Test ] Step: %d, acc: %4.5f' % (i+1, test_acc))

按理说 准确率就由82.6%变成了83.4%

加深层次也可以提升模型准确率

 

但训练结果显示并不理想,可能是训练10k不够,有时间来补个训练100k的结果

 

 

批归一化 batch normalization

在增加了网络层次的code基础上

 

首先我们简化一下网络层的写法,因为很多层共有的参数写好多次比较费时间

import tensorflow as tfimport osimport pickleimport numpy as np CIFAR_DIR = "dataset/cifar-10-batches-py"# print(os.listdir(CIFAR_DIR))  def load_data(filename):    """read data from data file."""    with open(filename, 'rb') as f:        data = pickle.load(f, encoding='bytes')        return data[b'data'], data[b'labels'] # tensorflow.Dataset.class CifarData:    def __init__(self, filenames, need_shuffle):        all_data = []        all_labels = []        for filename in filenames:            data, labels = load_data(filename)            all_data.append(data)            all_labels.append(labels)        self._data = np.vstack(all_data)         self._labels = np.hstack(all_labels)        print(self._data.shape)        print(self._labels.shape)                self._num_examples = self._data.shape[0]        self._need_shuffle = need_shuffle        self._indicator = 0        if self._need_shuffle:            self._shuffle_data()                def _shuffle_data(self):        # [0,1,2,3,4,5] -> [5,3,2,4,0,1]        p = np.random.permutation(self._num_examples)        self._data = self._data[p]        self._labels = self._labels[p]        def next_batch(self, batch_size):        """return batch_size examples as a batch."""        end_indicator = self._indicator + batch_size        if end_indicator > self._num_examples:            if self._need_shuffle:                self._shuffle_data()                self._indicator = 0                end_indicator = batch_size            else:                raise Exception("have no more examples")        if end_indicator > self._num_examples:            raise Exception("batch size is larger than all examples")        batch_data = self._data[self._indicator: end_indicator]        batch_labels = self._labels[self._indicator: end_indicator]        self._indicator = end_indicator        return batch_data, batch_labels train_filenames = [os.path.join(CIFAR_DIR, 'data_batch_%d' % i) for i in range(1, 6)]test_filenames = [os.path.join(CIFAR_DIR, 'test_batch')] train_data = CifarData(train_filenames, True)test_data = CifarData(test_filenames, False)  batch_size = 20x = tf.placeholder(tf.float32, [batch_size, 3072])y = tf.placeholder(tf.int64, [batch_size])# [None], eg: [0,5,6,3]x_image = tf.reshape(x, [-1, 3, 32, 32])# 32*32x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])#此时x_image   (20, 32, 32, 3) x_image_arr = tf.split(x_image,num_or_size_splits=batch_size,axis=0)result_x_image_arr = [] for x_single_image in x_image_arr:    #x_single_image: [1,32,32,3] => [32,32,3]    x_single_image = tf.reshape(x_single_image,[32,32,3])    data_aug_1 = tf.image.random_flip_left_right(x_single_image)    data_aug_2 = tf.image.random_brightness(data_aug_1, max_delta=63)    data_aug_3 = tf.image.random_contrast(data_aug_2,lower=0.2,upper=1.8) #改变对比度的最小值是0.2,最大值是0.8    x_single_image = tf.reshape(data_aug_3,[1,32,32,3])    result_x_image_arr.append(x_single_image) result_x_images = tf.concat(result_x_image_arr, axis=0)normal_result_x_images = result_x_images / 127.5 - 1def conv_wrapper(inputs,name,output_channel=32,kernel_size=(3,3),activation=tf.nn.relu,padding='same'):    return tf.layers.conv2d(inputs,                             output_channel,                            kernel_size,                            padding=padding,                            activation=activation,                            name=name)def pooling_wrapper(inputs, name):    return tf.layers.max_pooling2d(inputs,                                   (2,2),                                   (2,2),                                   name=name)# conv1: 神经元图, feature_map, 输出图像conv1_1 = conv_wrapper(normal_result_x_images,'conv1_1')conv1_2 = conv_wrapper(conv1_1,'conv1_2')conv1_3 = conv_wrapper(conv1_2,'conv1_3')# 16 * 16pooling1 = pooling_wrapper(conv1_3,'pool1')conv2_1 = conv_wrapper(pooling1,'conv2_1')conv2_2 = conv_wrapper(conv2_1,'conv2_2')conv2_3 = conv_wrapper(conv2_2,'conv2_3')# 8 * 8pooling2 = pooling_wrapper(conv2_3,'pool2')conv3_1 = conv_wrapper(pooling2,'conv3_1')conv3_2 = conv_wrapper(conv3_1,'conv3_2')conv3_3 = conv_wrapper(conv3_2,'conv3_3')# 4 * 4 * 32pooling3 = pooling_wrapper(conv3_3,'pool3')# [None, 4 * 4 * 32]flatten = tf.layers.flatten(pooling3)y_ = tf.layers.dense(flatten, 10) loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)# y_ -> sofmax# y -> one_hot# loss = ylogy_ # indicespredict = tf.argmax(y_, 1)# [1,0,1,1,1,0,0,0]correct_prediction = tf.equal(predict, y)accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64)) with tf.name_scope('train_op'):    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)     init = tf.global_variables_initializer()train_steps = 10000test_steps = 100 # train 10k: 73.4%with tf.Session() as sess:    sess.run(init)    for i in range(train_steps):        batch_data, batch_labels = train_data.next_batch(batch_size)        loss_val, acc_val, _ = sess.run(            [loss, accuracy, train_op],            feed_dict={                x: batch_data,                y: batch_labels})        if (i+1) % 100 == 0:            print('[Train] Step: %d, loss: %4.5f, acc: %4.5f'                   % (i+1, loss_val, acc_val))        if (i+1) % 1000 == 0:            test_data = CifarData(test_filenames, False)            all_test_acc_val = []            for j in range(test_steps):                test_batch_data, test_batch_labels \                    = test_data.next_batch(batch_size)                test_acc_val = sess.run(                    [accuracy],                    feed_dict = {                        x: test_batch_data,                         y: test_batch_labels                    })                all_test_acc_val.append(test_acc_val)            test_acc = np.mean(all_test_acc_val)            print('[Test ] Step: %d, acc: %4.5f' % (i+1, test_acc))

封装写法前后对比

我们之前也接触过很多封装的库吗,比如SLAM库、keras库,这些库就是像这样,将API给封装起来,提供了一个更优雅的接口,但是可能让初学者就有点难懂

 

下面我们就添加batch norm

做封装是为了方便我们添加batch_norm

现在我们只需要在conv_wrapper中加一个batch_norm项就可以了

batch normalization层是加在conv和activation层之间的

以前without bn: conv -> activation

       现在with batch normalization: conv -> bn -> activation

batch_normalization函数有一项training,batch_normalization要维护一个均值和方差,在训练过程和测试过程它们是不一样的

import tensorflow as tfimport osimport pickleimport numpy as np CIFAR_DIR = "dataset/cifar-10-batches-py"# print(os.listdir(CIFAR_DIR))  def load_data(filename):    """read data from data file."""    with open(filename, 'rb') as f:        data = pickle.load(f, encoding='bytes')        return data[b'data'], data[b'labels'] # tensorflow.Dataset.class CifarData:    def __init__(self, filenames, need_shuffle):        all_data = []        all_labels = []        for filename in filenames:            data, labels = load_data(filename)            all_data.append(data)            all_labels.append(labels)        self._data = np.vstack(all_data)         self._labels = np.hstack(all_labels)        print(self._data.shape)        print(self._labels.shape)                self._num_examples = self._data.shape[0]        self._need_shuffle = need_shuffle        self._indicator = 0        if self._need_shuffle:            self._shuffle_data()                def _shuffle_data(self):        # [0,1,2,3,4,5] -> [5,3,2,4,0,1]        p = np.random.permutation(self._num_examples)        self._data = self._data[p]        self._labels = self._labels[p]        def next_batch(self, batch_size):        """return batch_size examples as a batch."""        end_indicator = self._indicator + batch_size        if end_indicator > self._num_examples:            if self._need_shuffle:                self._shuffle_data()                self._indicator = 0                end_indicator = batch_size            else:                raise Exception("have no more examples")        if end_indicator > self._num_examples:            raise Exception("batch size is larger than all examples")        batch_data = self._data[self._indicator: end_indicator]        batch_labels = self._labels[self._indicator: end_indicator]        self._indicator = end_indicator        return batch_data, batch_labels train_filenames = [os.path.join(CIFAR_DIR, 'data_batch_%d' % i) for i in range(1, 6)]test_filenames = [os.path.join(CIFAR_DIR, 'test_batch')] train_data = CifarData(train_filenames, True)test_data = CifarData(test_filenames, False)  batch_size = 20x = tf.placeholder(tf.float32, [batch_size, 3072])y = tf.placeholder(tf.int64, [batch_size])is_training = tf.placeholder(tf.bool, [])# [None], eg: [0,5,6,3]x_image = tf.reshape(x, [-1, 3, 32, 32])# 32*32x_image = tf.transpose(x_image, perm=[0, 2, 3, 1])#此时x_image   (20, 32, 32, 3) x_image_arr = tf.split(x_image,num_or_size_splits=batch_size,axis=0)result_x_image_arr = [] for x_single_image in x_image_arr:    #x_single_image: [1,32,32,3] => [32,32,3]    x_single_image = tf.reshape(x_single_image,[32,32,3])    data_aug_1 = tf.image.random_flip_left_right(x_single_image)    data_aug_2 = tf.image.random_brightness(data_aug_1, max_delta=63)    data_aug_3 = tf.image.random_contrast(data_aug_2,lower=0.2,upper=1.8) #改变对比度的最小值是0.2,最大值是0.8    x_single_image = tf.reshape(data_aug_3,[1,32,32,3])    result_x_image_arr.append(x_single_image) result_x_images = tf.concat(result_x_image_arr, axis=0)normal_result_x_images = result_x_images / 127.5 - 1def conv_wrapper(inputs,name,is_training,output_channel=32,kernel_size=(3,3),activation=tf.nn.relu,padding='same'):    with tf.name_scope(name):        conv2d = tf.layers.conv2d(inputs,                                 output_channel,                                kernel_size,                                padding=padding,                                activation=None,                                name=name+'/conv2d')        bn = tf.layers.batch_normalization(conv2d,                                           training=is_training)        return activation(bn)def pooling_wrapper(inputs, name):    return tf.layers.max_pooling2d(inputs,                                   (2,2),                                   (2,2),                                   name=name)# conv1: 神经元图, feature_map, 输出图像conv1_1 = conv_wrapper(normal_result_x_images,'conv1_1', is_training)conv1_2 = conv_wrapper(conv1_1,'conv1_2', is_training)conv1_3 = conv_wrapper(conv1_2,'conv1_3', is_training)# 16 * 16pooling1 = pooling_wrapper(conv1_3,'pool1')conv2_1 = conv_wrapper(pooling1,'conv2_1', is_training)conv2_2 = conv_wrapper(conv2_1,'conv2_2', is_training)conv2_3 = conv_wrapper(conv2_2,'conv2_3', is_training)# 8 * 8pooling2 = pooling_wrapper(conv2_3,'pool2')conv3_1 = conv_wrapper(pooling2,'conv3_1', is_training)conv3_2 = conv_wrapper(conv3_1,'conv3_2', is_training)conv3_3 = conv_wrapper(conv3_2,'conv3_3', is_training)# 4 * 4 * 32pooling3 = pooling_wrapper(conv3_3,'pool3')# [None, 4 * 4 * 32]flatten = tf.layers.flatten(pooling3)y_ = tf.layers.dense(flatten, 10) loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)# y_ -> sofmax# y -> one_hot# loss = ylogy_ # indicespredict = tf.argmax(y_, 1)# [1,0,1,1,1,0,0,0]correct_prediction = tf.equal(predict, y)accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64)) with tf.name_scope('train_op'):    train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)     init = tf.global_variables_initializer()train_steps = 10000test_steps = 100 # train 10k: 73.4%with tf.Session() as sess:    sess.run(init)    for i in range(train_steps):        batch_data, batch_labels = train_data.next_batch(batch_size)        loss_val, acc_val, _ = sess.run(            [loss, accuracy, train_op],            feed_dict={                x: batch_data,                y: batch_labels,                is_training: True            })        if (i+1) % 100 == 0:            print('[Train] Step: %d, loss: %4.5f, acc: %4.5f'                   % (i+1, loss_val, acc_val))        if (i+1) % 1000 == 0:            test_data = CifarData(test_filenames, False)            all_test_acc_val = []            for j in range(test_steps):                test_batch_data, test_batch_labels \                    = test_data.next_batch(batch_size)                test_acc_val = sess.run(                    [accuracy],                    feed_dict = {                        x: test_batch_data,                         y: test_batch_labels,                        is_training: False                    })                all_test_acc_val.append(test_acc_val)            test_acc = np.mean(all_test_acc_val)            print('[Test ] Step: %d, acc: %4.5f' % (i+1, test_acc))

 

最后得到的结果是从83.4% 提升到85.6%

最后test的acc很不理想,但是训练过程中的acc很高,可能是test出问题了,有机会回来补

 

 

转载地址:http://olygf.baihongyu.com/

你可能感兴趣的文章
attention 机制入门
查看>>
手把手用 IntelliJ IDEA 和 SBT 创建 scala 项目
查看>>
双向 LSTM
查看>>
GAN 的 keras 实现
查看>>
AI 在 marketing 上的应用
查看>>
Logistic regression 为什么用 sigmoid ?
查看>>
Logistic Regression 为什么用极大似然函数
查看>>
SVM 的核函数选择和调参
查看>>
LightGBM 如何调参
查看>>
用 TensorFlow.js 在浏览器中训练神经网络
查看>>
cs230 深度学习 Lecture 2 编程作业: Logistic Regression with a Neural Network mindset
查看>>
梯度消失问题与如何选择激活函数
查看>>
为什么需要 Mini-batch 梯度下降,及 TensorFlow 应用举例
查看>>
为什么在优化算法中使用指数加权平均
查看>>
什么是 Q-learning
查看>>
用一个小游戏入门深度强化学习
查看>>
如何应用 BERT :Bidirectional Encoder Representations from Transformers
查看>>
5 分钟入门 Google 最强NLP模型:BERT
查看>>
强化学习第1课:像学自行车一样的强化学习
查看>>
强化学习第2课:强化学习,监督式学习,非监督式学习的区别
查看>>