# Load dataprint("Loading data...")# train data: the note, first word is the class, the others are note# output data: the engineer name list# train again with other dataoutput ="support_engineers.csv"# vector: glove & word2vectrain ="dataset1_clean_1"vector ="w2v.txt"# seq_size...
def _get_embedding_matrix(self, tokenizer): model = KeyedVectors.load_word2vec_format(self.pretrained_filepath, binary=True) emb_mean, emb_std = model.syn0.mean(), model.syn0.std() word_index = tokenizer.word_index nb_words = min(self.max_features, len(word_index)) embedding_matrix...
path.join(root_path, 'pretrain/nre/' + model_name + '.pth.tar') if model_name == 'wiki80_cnn_softmax': download_pretrain(model_name, root_path=root_path) download('glove', root_path=root_path) download('wiki80', root_path=root_path) wordi2d = json.load(open(os.path.join(...
tokenizer = hanlp.load(‘SEMEVAL16_EMBEDDINGS_300_TEXT_CN’) tokenizer = hanlp.load(‘GLOVE_6B_ROOT’) tokenizer = hanlp.load(‘GLOVE_6B_50D’) tokenizer = hanlp.load(‘GLOVE_6B_100D’) tokenizer = hanlp.load(‘GLOVE_6B_200D’) tokenizer = hanlp.load(‘GLOVE_6B_300D’) tokenizer...