data_loader = DataLoader(args) tok = lambda x: jieba.lcut(x, cut_all=False) train_x, train_y, dev_x, dev_y, test_x = data_loader.get_dateSet(tok) train_gen = DataSetGenerator(train_x, train_y) train_dataset = mds.GeneratorDataset(train_gen, shuffle=True, column_names=['text',...
'r',encoding='utf-8').read() 9 text = text.replace('\n','').replace('\u3000','') 10 text_cut = jieba.lcut(text) 11 text_cut = ' '.join(text_cut) 12 13 #过滤一些没有关系