nltk 获取 gutenberg 语料,gensim 生成词库和 onehot 编码
nltk 获取 gutenberg 语料 gensim 生成词库和 onehot 编码 正在尝试基于 Tensorflow LSTM 模型开发另外一个项目,需要自然语言处理的工具和语料。 import nltk import numpy as np from nltk.corpus import gutenberg from gensim import corpora, models, similarities class Book2Array(object): sentences=None token2id_dic=None def __init__(self,sentences): self.sentences=sentences self.token2id_dic=self.get_token2id_dic() def get_sentences(self): #macbeth_sentences = gutenberg.sents('shakespeare-macbeth.txt') #print(macbeth_sentences) #print(type(macbeth_s...