encoding='utf-8')words= english.words()# Calculate the frequency distribution of thewordsin the corpusword_frequency_distribution = FreqDist([word.lower()forwordinwords])# Get the sentences of the corpus, all in lower case, with infrequentwordsreplaced by the token "<unknown>"sentences = ...
self.nlp = spacy.load(spacy_model, parser=False, tagger=True, entity=False) Token.set_extension("replacement", default=None, force=True) 开发者ID:RTIInternational,项目名称:gobbli,代码行数:27,代码来源:wordnet.py 注:本文中的nltk.corpus方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理...
corpus_group.add_argument('--word-tokenizer', default='nltk.tokenize.WordPunctTokenizer', help='Full module path to a tokenizer class, defaults to %(default)s.') args = parser.parse_args()### corpus reader ###source_corpus =load_corpus_reader(args.source_corpus, reader=args.reader, file...
checked_conns = []forartinarticles: checked_conns.append(connParser.print_features(art, which, conn_feat_file)) conn_feat_file.close() conn_pred_name = FILE_PATH +'/../tmp/conn.pred'Corpus.test_with_opennlp(conn_feat_name, connParser.model_file, conn_pred_name) conn_res = [l.strip...
开发者ID:ongxuanhong,项目名称:jazzparser-master-thesis,代码行数:30,代码来源:knbc.py 示例3: test ▲点赞 3▼ # 需要导入模块: from nltk.corpus.util import LazyCorpusLoader [as 别名]# 或者: from nltk.corpus.util.LazyCorpusLoader importwords[as 别名]deftest():fromnltk.corpus.utilimportLazyCor...
开发者ID:jonpiffle,项目名称:ltag_parser,代码行数:14,代码来源:run_parser.py 示例9: main ▲点赞 1▼ defmain():# matplotlib.use('Qt5Agg')# import matplotlib.pyplot as pltdownload('punkt')# Download and load the english europarl corpusdownloader.download('europarl_raw') ...
开发者ID:jonpiffle,项目名称:ltag_parser,代码行数:16,代码来源:run_parser.py 示例4: LazyCorpusLoader ▲点赞 1▼ # 需要导入模块: from nltk.corpus.util import LazyCorpusLoader [as 别名]# 或者: from nltk.corpus.util.LazyCorpusLoader importfileids[as 别名]wordlist = LazyCorpusLoader('bamana/wor...
word = random.randrange(1, len(sentence)-1) sentence[word] = random.choice(vocabulary) word = random.choice(sentence[1:-2]) word = random.randrange(1, len(sentence) -1) letter = random.randrange(0, len(sentence[word])) sentence[word] = sentence[word][0:letter] + random.choice(lower...
开发者ID:ongxuanhong,项目名称:jazzparser-master-thesis,代码行数:11,代码来源:knbc.py 示例5: treebank_tagger_demo ▲点赞 1▼ # 需要导入模块: from nltk.corpus.util import LazyCorpusLoader [as 别名]# 或者: from nltk.corpus.util.LazyCorpusLoader importtagged_words[as 别名]deftreebank_tagger_...