keras 中数据预处理
所有的函数都在keras.preprocessing 分别有text ,sequence, image
#?文字预处理 txt?=?"My?name?is?maoli.maoli?don't?like?coding."
文字预处理
文字拆分
建立索引
padding(序列补齐)
标注
from?keras.preprocessing.text?import?text_to_word_sequence?#?文本转化序列 out?=?text_to_word_sequence(txt)?#?默认lower=True,, print(out)??#?与jieba?功能一样
['my',?'name',?'is',?'maoli',?'maoli',?"don't",?'like',?'coding']
out1?=?text_to_word_sequence(txt,filters='maoli')#?无视maili字母 print(out1)
['y',?'n',?'e',?'s',?'.',?'d',?"n't",?'ke',?'c',?'d',?'ng.']
#?文字拆分 chn?=?'我的名字叫毛利。我不喜欢写码' out2?=?text_to_word_sequence(chn) out3?=?text_to_word_sequence(chn,filters='。') print(out2) print(out3)
['我的名字叫毛利。我不喜欢写码'] ['我的名字叫毛利',?'我不喜欢写码']
#?对于中文就没用了,必须使用jieba import?jieba #cut是生成一个生成器,lcut生成列表,cut_all就是filters='。' out4?=?jieba.lcut(chn,cut_all=False) out5?=?jieba.lcut(chn,cut_all=True) print(out4) print(out5)
['我', '的', '名字', '叫', '毛利', '。', '我', '不', '喜欢', '写码'] ['我', '的', '名字', '叫', '毛利', '', '', '我', '不', '喜欢', '写', '码']
print(out)
['my',?'name',?'is',?'maoli',?'maoli',?"don't",?'like',?'coding']
#?倒过来 out.sort(reverse=True) print(out)
['name',?'my',?'maoli',?'maoli',?'like',?'is',?"don't",?'coding']
import?numpy?as?np #?建立索引 word_index?=?dict(list(zip(out,np.arange(len(out)))))
word_index
{'name':?0,?'my':?1,?'maoli':?3,?'like':?4,?'is':?5,?"don't":?6,?'coding':?7}
?#?pad_sequences序列补充 from?keras.preprocessing.sequence?import?pad_sequences??? x?=?[[1,2,3],[4,5],[6,7,8,9]] y0?=?pad_sequences(x) y1?=?pad_sequences(x,maxlen=5)
print(y0) print('-------------'*10) print(y1)
[[0?1?2?3] ?[0?0?4?5] ?[6?7?8?9]] ---------------------------------------------------------------------------------------------------------------------------------- [[0?0?1?2?3] ?[0?0?0?4?5] ?[0?6?7?8?9]]
#?使用Tokenizer分词 from?keras.preprocessing.text?import?Tokenizer somestr?=?['i?am?dalao,my?name?is?maoli','maoli?is?very?cool'] tok?=?Tokenizer() tok.fit_on_texts(somestr) tok.word_index
{'is':?1, ?'maoli':?2, ?'i':?3, ?'am':?4, ?'dalao':?5, ?'my':?6, ?'name':?7, ?'very':?8, ?'cool':?9}
#?图像预处理 #?ImageDataGenerator?类 from?keras.preprocessing.image?import?ImageDataGenerator #?下面是官网的cifar10例子 (x_train,?y_train),?(x_test,?y_test)?=?cifar10.load_data() y_train?=?np_utils.to_categorical(y_train,?num_classes) y_test?=?np_utils.to_categorical(y_test,?num_classes) datagen?=?ImageDataGenerator( ????featurewise_center=True, ????featurewise_std_normalization=True, ????rotation_range=20, ????width_shift_range=0.2, ????height_shift_range=0.2, ????horizontal_flip=True) #?计算特征归一化所需的数量 #?(如果应用?ZCA?白化,将计算标准差,均值,主成分) datagen.fit(x_train) #?使用实时数据增益的批数据对模型进行拟合: model.fit_generator(datagen.flow(x_train,?y_train,?batch_size=32), ????????????????????steps_per_epoch=len(x_train)?/?32,?epochs=epochs) #?这里有一个更?「手动」的例子 for?e?in?range(epochs): ????print('Epoch',?e) ????batches?=?0 ????for?x_batch,?y_batch?in?datagen.flow(x_train,?y_train,?batch_size=32): ????????model.fit(x_batch,?y_batch) ????????batches?+=?1 ????????if?batches?>=?len(x_train)?/?32: ????????????#?我们需要手动打破循环, ????????????#?因为生成器会无限循环 ????????????break
Keras 模型
在 Keras 中有两类主要的模型:Sequential 顺序模型 和 使用函数式 API 的 Model 类模型。
#?这里主要讲下使用函数式?API from?keras.models?import?Model from?keras.layers?import?Input,?Dense a?=?Input(shape=(32,)) b?=?Dense(32)(a) model?=?Model(inputs=a,?outputs=b) model.summary()
_________________________________________________________________ Layer?(type)?????????????????Output?Shape??????????????Param?#??? ================================================================= input_2?(InputLayer)?????????(None,?32)????????????????0????????? _________________________________________________________________ dense_2?(Dense)??????????????(None,?32)????????????????1056?????? ================================================================= Total?params:?1,056 Trainable?params:?1,056 Non-trainable?params:?0 _________________________________________________________________
模型可视化
from?keras.utils?import?plot_model plot_model(model,?to_file='model.png')
#?训练可视化 import?matplotlib.pyplot?as?plt history?=?model.fit(x,?y,?validation_split=0.25,?epochs=50,?batch_size=16,?verbose=1) #?绘制训练?&?验证的准确率值 plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model?accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train',?'Test'],?loc='upper?left') plt.show() #?绘制训练?&?验证的损失值 plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model?loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train',?'Test'],?loc='upper?left') plt.show()
实战手写字体
import?keras from?keras?import?layers import?numpy?as?np import?matplotlib.pyplot?as?plt %matplotlib?inline from?keras.datasets?import?mnist (train_image,train_label),(test_image,test_label)?=?mnist.load_data() train_image=?np.expand_dims(train_image,axis=-1) test_image=?np.expand_dims(test_image,axis=-1) model?=?keras.Sequential() model.add(layers.Conv2D(64,(3,3),activation='relu',input_shape=(28,28,1))) model.add(layers.Conv2D(64,(3,3),activation='relu')) model.add(layers.MaxPool2D()) model.add(layers.Flatten()) model.add(layers.Dense(256,activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(10,activation?=?'softmax')) model测试数据pile(optimizer='adam',loss?='sparse_categorical_crossentropy',metrics=['acc']) model.fit(train_image,train_label,epochs=5,batch_size=512)
Epoch?1/5 60000/60000?[==============================]?-?237s?4ms/step?-?loss:?1.6534?-?acc:?0.8529 Epoch?2/5 60000/60000?[==============================]?-?237s?4ms/step?-?loss:?0.0784?-?acc:?0.9762 Epoch?3/5 60000/60000?[==============================]?-?241s?4ms/step?-?loss:?0.0521?-?acc:?0.9840 Epoch?4/5 60000/60000?[==============================]?-?240s?4ms/step?-?loss:?0.0399?-?acc:?0.9879 Epoch?5/5 60000/60000?[==============================]?-?236s?4ms/step?-?loss:?0.0314?-?acc:?0.9896 <keras.callbacks.History?at?0x165e8383438>
最后,祝有所学习,有所成长
回复【 1024 】获取学习资料
转发,好看支持一下,感谢
你的转发,就是对我最大的支持
声明:本文来自网络,不代表【好得很程序员自学网】立场,转载请注明出处:http://www.haodehen.cn/did127906