使用keras.preprocessing进行文本序列化
使用keras.preprocessing进行文本序列化。
from keras.preprocessing import text, sequence max_features = 200000 maxlen = 500 list_sentences_train = train["text"].fillna("[na]").values list_sentences_test = test["text"].fillna("[na]").values tokenizer = text.Tokenizer(num_words=max_features) tokenizer.fit_on_texts(list(list_sentences_train)) list_tokenized_train = tokenizer.texts_to_sequences(list_sentences_train) list_tokenized_test = tokenizer.texts_to_sequences(list_sentences_test) train_sequence = sequence.pad_sequences(list_tokenized_train, maxlen=maxlen) test_sequence = sequence.pad_sequences(list_tokenized_test, maxlen=maxlen)