欢迎访问 生活随笔!

生活随笔

当前位置: 首页 > 编程资源 > 编程问答 >内容正文

编程问答

5-RNN-03_双向rnn_英文小短文

发布时间:2024/3/26 编程问答 52 豆豆
生活随笔 收集整理的这篇文章主要介绍了 5-RNN-03_双向rnn_英文小短文 小编觉得挺不错的,现在分享给大家,帮大家做个参考.
import os import numpy as np import tensorflow as tfdef load_data(file_path):"""加载原始数据:param file_path::return:"""with open(file_path, 'r') as reader:data = reader.readlines()return datadef create_lookup_table(text):"""构建字典表 {单词:数字} {数字:单词}:param text::return:"""words = sorted(list(set(text)))# 构建字典word2int = {word:idx for idx,word in enumerate(words)}int2word = dict(enumerate(words))return word2int, int2worddef create_X_and_Y(data, word2int, number_time_steps=3):"""基于原始数据,构建训练数据集的 X和Y:param data::param word2int::param number_time_steps::return:"""X, Y = [], []for content in data:# 得到当前文本对应的单词序列。 strip()去除前后空格words = content.strip().split(' ')# 获得单词总数量words_number = len(words)offset = 0while offset < words_number - number_time_steps:temp_x = words[offset: offset+number_time_steps]temp_y = words[offset+number_time_steps]X.append([word2int[tx] for tx in temp_x])Y.append(word2int[temp_y])offset +=1# 将列表转为numpy ndarrayX = np.asarray(X).reshape([-1, number_time_steps])Y = np.asarray(Y).reshape(-1)return X, Ydef create_model(vocab_size, num_units=32, number_time_steps=3):""":param vocab_size: 词表大小:param num_units: 隐藏层的节点数量(神经元个数):param number_time_steps: 时间步:return:"""with tf.variable_scope('Network', initializer=tf.truncated_normal_initializer(stddev=0.1)):with tf.variable_scope('input'):# 输入数据的形状"""x:[[2, 3, 4],[7, 8, 9]]y:[[5],[10]],"""_x = tf.placeholder(tf.int32, shape=[None, number_time_steps], name='x')_y = tf.placeholder(tf.int32, shape=[None], name='y')_x = tf.cast(_x, tf.float32)# 需要将原始的输入_x 按照时间步进行分割,变成列表。# todo 用的真实的值,但实际项目中 应该 用one-hot或者embedding。input_x = tf.split(_x, num_or_size_splits=number_time_steps, axis=1)# [[N, 1], [N,1], ......]with tf.variable_scope('rnn'):# a、定义cellcell_fw = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_units)cell_bw = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_units)# b、调用双向静态rnn 获取隐藏层输出结果rnn_outputs, _, _ = tf.nn.static_bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=input_x, dtype=tf.float32)# rnn_outputs: [[N, 2*lstm_size], [N, 2*lstm_size], ....]with tf.variable_scope('logits'):# a、获取隐藏层最后一个时刻的输出rnn_output = rnn_outputs[-1]# b、构建输出层变量softmax_w = tf.get_variable('w', shape=[2*num_units, vocab_size], dtype=tf.float32)softmax_b = tf.get_variable('b', shape=[vocab_size], dtype=tf.float32, initializer=tf.zeros_initializer())logits = tf.nn.xw_plus_b(rnn_output, softmax_w, softmax_b)with tf.variable_scope('Predict'):predictions = tf.argmax(logits, axis=1)return _x, _y, logits, predictionsdef create_loss(logits, labels):"""创建损失:param logits::param labels::return:"""with tf.name_scope('loss'):# a\将标签转换为1维的形式labels = tf.reshape(labels, shape=[-1])loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))return lossdef create_optimizer(loss, lr=1e-3):"""构建优化器:param loss::param lr::return:"""with tf.name_scope('optimizer'):optimizer = tf.train.AdamOptimizer(learning_rate=lr)train_opt = optimizer.minimize(loss)return train_optdef train(checkpoint_dir, max_steps=10000, batch_size=64, num_units=32, number_time_steps=10):graph = tf.Graph()with graph.as_default():# 加载数据data = load_data(file_path='../datas/belling_the_cat.txt')text = []for line in data:line = line.strip()for word in line.split(' '):text.append(word)word2int, int2word = create_lookup_table(text)x, y = create_X_and_Y(data, word2int, number_time_steps=number_time_steps)# print(word2int, '\n', int2word)# 1、构建网络_x, _y, logits, predictions = create_model(len(word2int), num_units=num_units, number_time_steps=number_time_steps)# 2、模型损失loss = create_loss(logits, _y)# 3、优化器train_opt = create_optimizer(loss)saver = tf.train.Saver()with tf.Session(graph=graph) as sess:sess.run(tf.global_variables_initializer())# 构建迭代数据total_samples = x.shape[0]n_batches = total_samples // batch_sizetime = 0# 返回一个随机打乱下标的 array, 功能就是shufflerandom_index = np.random.permutation(total_samples)for step in range(1, max_steps):# 获取当前批量的训练数据start_idx = time * batch_sizeend_idx = start_idx + batch_sizeidx = random_index[start_idx: end_idx]train_x = x[idx]train_y = y[idx]# 构建输入数据对象feed = {_x: train_x, _y: train_y}sess.run(train_opt, feed)if step % 200==0:train_loss = sess.run(loss, feed)print('step:{} - Train loss:{}'.format(step, train_loss))# 做一个预测的index = np.random.randint(low=0, high=total_samples)sample_in = np.reshape(x[index], newshape=[-1, number_time_steps])sample_out = sess.run(predictions, feed_dict={_x: sample_in})print('输入:{} - 预测:{} VS 真实值:{}'.format(x[index], int2word[sample_out[0]], int2word[y[index]]))if step % 1000 == 0:# 模型持久化files = 'model.ckpt'save_files = os.path.join(checkpoint_dir, files)saver.save(sess, save_path=save_files, global_step=step)print('model saved!!')# 更新样本顺序的time += 1if time == n_batches:time =0random_index = np.random.permutation(total_samples)if __name__ == '__main__':checkpoint_dir = './models'if not os.path.exists(checkpoint_dir):os.makedirs(checkpoint_dir)train(checkpoint_dir, max_steps=10000, batch_size=64, num_units=32, number_time_steps=10) D:\Anaconda\python.exe D:/AI20/HJZ/04-深度学习/4-RNN/20191228___AI20_RNN/03_双向rnn_英文小短文.py 2020-02-18 10:42:51.076290: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX AVX2 step:200 - Train loss:3.9305477142333984 输入:[ 8 7 1 76 39 1 85 66 24 30] - 预测:, VS 真实值:consists step:400 - Train loss:3.4807467460632324 输入:[ 77 100 44 86 60 2 91 69 56 109] - 预测:, VS 真实值:general step:600 - Train loss:2.976116418838501 输入:[ 86 59 63 86 23 2 21 91 53 101] - 预测:the VS 真实值:should step:800 - Train loss:2.6706745624542236 输入:[65 12 11 61 83 2 88 86 64 58] - 预测:the VS 真实值:said step:1000 - Train loss:2.3042399883270264 输入:[ 39 92 110 55 86 22 2 111 108 8] - 预测:agree VS 真实值:agree model saved!! step:1200 - Train loss:1.834552526473999 输入:[64 58 37 96 11 76 0 85 45 8] - 预测:very VS 真实值:very step:1400 - Train loss:1.6621453762054443 输入:[ 40 14 1 101 28 31 34 35 40 2] - 预测:i VS 真实值:i step:1600 - Train loss:1.3067286014556885 输入:[ 29 93 26 103 54 90 28 84 93 67] - 预测:their VS 真实值:their step:1800 - Train loss:1.2478928565979004 输入:[ 1 86 57 38 4 36 29 93 26 103] - 预测:measures VS 真实值:measures step:2000 - Train loss:0.9508646726608276 输入:[ 92 110 55 86 22 2 111 108 8 7] - 预测:, VS 真实值:, model saved!! step:2200 - Train loss:0.8384971618652344 输入:[61 83 2 88 86 64 58 76 46 45] - 预测:easy VS 真实值:easy step:2400 - Train loss:0.5928971171379089 输入:[64 58 37 96 11 76 0 85 45 8] - 预测:very VS 真实值:very step:2600 - Train loss:0.5592891573905945 输入:[31 34 35 40 2 41 98 1 89 1] - 预测:to VS 真实值:to step:2800 - Train loss:0.4942898452281952 输入:[ 14 1 101 28 31 34 35 40 2 41] - 预测:venture VS 真实值:venture step:3000 - Train loss:0.35549503564834595 输入:[ 77 100 44 86 60 2 91 69 56 109] - 预测:general VS 真实值:general model saved!! step:3200 - Train loss:0.3749193549156189 输入:[93 19 86 23 3 86 57 50 16 65] - 预测:another VS 真实值:another step:3400 - Train loss:0.29113298654556274 输入:[86 23 2 82 76 91 1 11 82 76] - 预测:that VS 真实值:that step:3600 - Train loss:0.25510722398757935 输入:[93 70 85 4 81 19 18 68 1 11] - 预测:attached VS 真实值:attached step:3800 - Train loss:0.2347201555967331 输入:[ 26 103 54 90 28 84 93 67 87 25] - 预测:enemy VS 真实值:enemy step:4000 - Train loss:0.18923482298851013 输入:[ 1 95 10 64 58 37 96 11 76 0] - 预测:that VS 真实值:that model saved!! step:4200 - Train loss:0.16195067763328552 输入:[23 3 86 57 50 16 65 12 11 61] - 预测:spoke VS 真实值:spoke step:4400 - Train loss:0.13978148996829987 输入:[ 86 59 63 86 23 2 21 91 53 101] - 预测:should VS 真实值:should step:4600 - Train loss:0.1490730196237564 输入:[103 54 90 28 84 93 67 87 25 33] - 预测:, VS 真实值:, step:4800 - Train loss:0.10961226373910904 输入:[ 1 11 82 76 85 20 16 48 4 112] - 预测:mouse VS 真实值:mouse step:5000 - Train loss:0.11105622351169586 输入:[ 86 22 2 111 108 8 7 1 76 39] - 预测:, VS 真实值:, model saved!! step:5200 - Train loss:0.0975622832775116 输入:[ 78 9 47 104 77 100 5 1 11 28] - 预测:easily VS 真实值:easily step:5400 - Train loss:0.0716937854886055 输入:[18 68 1 11 17 21 4 74 75 86] - 预测:neck VS 真实值:neck step:5600 - Train loss:0.07302534580230713 输入:[ 91 53 101 78 9 47 104 77 100 5] - 预测:, VS 真实值:, step:5800 - Train loss:0.05743904039263725 输入:[66 24 30 27 44 86 80 11 94 52] - 预测:in VS 真实值:in step:6000 - Train loss:0.05397602543234825 输入:[ 1 85 66 24 30 27 44 86 80 11] - 预测:treacherous VS 真实值:treacherous model saved!! step:6200 - Train loss:0.054213933646678925 输入:[87 25 33 1 86 23 2 82 76 91] - 预测:, VS 真实值:, step:6400 - Train loss:0.0373719185590744 输入:[23 2 82 76 91 1 11 82 76 85] - 预测:but VS 真实值:but step:6600 - Train loss:0.046218693256378174 输入:[ 42 101 28 71 82 79 63 40 14 1] - 预测:we VS 真实值:we step:6800 - Train loss:0.03185339272022247 输入:[ 20 16 48 4 112 58 37 96 11 76] - 预测:he VS 真实值:he step:7000 - Train loss:0.026730481535196304 输入:[ 1 11 17 21 4 74 75 86 59 63] - 预测:the VS 真实值:the model saved!! step:7200 - Train loss:0.02903711423277855 输入:[111 108 8 7 1 76 39 1 85 66] - 预测:chief VS 真实值:chief step:7400 - Train loss:0.026526065543293953 输入:[ 57 38 4 36 29 93 26 103 54 90] - 预测:could VS 真实值:could step:7600 - Train loss:0.02054942026734352 输入:[107 45 93 19 86 23 3 86 57 50] - 预测:at VS 真实值:at step:7800 - Train loss:0.01777688041329384 输入:[ 85 20 16 48 4 112 58 37 96 11] - 预测:said VS 真实值:said step:8000 - Train loss:0.014596270397305489 输入:[112 58 37 96 11 76 39 38 4 69] - 预测:to VS 真实值:to model saved!! step:8200 - Train loss:0.015546170994639397 输入:[107 45 93 19 86 23 3 86 57 50] - 预测:at VS 真实值:at step:8400 - Train loss:0.01338121946901083 输入:[16 65 12 11 61 83 2 88 86 64] - 预测:mouse VS 真实值:mouse step:8600 - Train loss:0.014673653990030289 输入:[ 1 85 66 24 30 27 44 86 80 11] - 预测:treacherous VS 真实值:treacherous step:8800 - Train loss:0.010602903552353382 输入:[75 86 59 63 86 23 2 21 91 53] - 预测:we VS 真实值:we step:9000 - Train loss:0.01917443238198757 输入:[ 1 20 107 45 93 19 86 23 3 86] - 预测:mice VS 真实值:mice model saved!! step:9200 - Train loss:0.012528151273727417 输入:[ 1 89 1 93 70 85 4 81 19 18] - 预测:procured VS 真实值:procured step:9400 - Train loss:0.009897150099277496 输入:[ 26 103 54 90 28 84 93 67 87 25] - 预测:enemy VS 真实值:enemy step:9600 - Train loss:0.007559692487120628 输入:[ 20 16 48 4 112 58 37 96 11 76] - 预测:he VS 真实值:he step:9800 - Train loss:0.008503235876560211 输入:[33 1 86 23 2 82 76 91 1 11] - 预测:some VS 真实值:someProcess finished with exit code 0

总结

以上是生活随笔为你收集整理的5-RNN-03_双向rnn_英文小短文的全部内容,希望文章能够帮你解决所遇到的问题。

如果觉得生活随笔网站内容还不错,欢迎将生活随笔推荐给好友。