from_pretrained("bert-base-uncased") text = "foo 雲 bar" tokens=tokenizer.tokenize(text) print("tokens : ", tokens) inputs = tokenizer(text, return_special_tokens_mask=True) print("mask : ", inputs["special_tokens_mask"]) print("mask from input ids : ", tokenizer.get_special_...