eos_token_id # a transformer tokenizer was given with byte_decoder elif hasattr(tokenizer, "convert_ids_to_tokens"): byte_tokens = [bytes(tokenizer.convert_tokens_to_string(['a', tokenizer.convert_ids_to_tokens(i)])[1:], encoding="utf8") for i in range(tokenizer.vocab_size)] bos_...
"bos_token_id": self.tokenizer.bos_token_id, "pad_token_id": self.tokenizer.pad_token_id } generate_ids = self.model.generate(**generate_input) text = self.tokenizer.decode(generate_ids[0]) return text @property def _llm_type(self) -> str: return "Atom" 75 changes: 75 additions ...
Model metadata: {'tokenizer.ggml.add_eos_token': 'false', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'tokenizer.ggml.add_bos_token': 'true', 'llama....
Model metadata: {'tokenizer.ggml.add_eos_token': 'false', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'tokenizer.ggml.add_bos_token': 'true', 'llama....
'tokenizer.ggml.bos_token_id': '1', 'llama.attention.head_count': '32', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '15'} Using fallback chat format: None llama...
Model metadata: {'tokenizer.ggml.add_eos_token': 'false', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'tokenizer.ggml.add_bos_token': 'true', 'llama...
response = tokenizer(text=f"{example['output']}", add_special_tokens=False) input_ids = [tokenizer.bos_token_id] + instruction["input_ids"] + response["input_ids"] + [tokenizer.eos_token_id] labels = [tokenizer.bos_token_id] + [-100] * len(instruction["input_ids"]) + response...
(-100)iftoken==self.tokenizer.pad_token_idelsetokenfortokeninlabel]forlabeliny['input_ids']])y['input_ids']=inpreturn{'input_ids':x['input_ids'][0],'attention_mask':x['attention_mask'][0],'decoder_input_ids':y['input_ids'][0],'decoder_attention_mask':y['attention_mask'][0...
base_model: ./mistral-7b-v0.1 model_type: MistralForCausalLM tokenizer_type: LlamaTokenizer is_mistral_derived_model: true load_in_8bit: false load_in_4bit: true strict: false sequence_len: 4096 bf16: true fp16: false tf32: false flash_attention: true special_tokens: bos_token: "" ...
MAX_LENGTH = 384 # Llama分词器会将一个中文字切分为多个token,因此需要放开一些最大长度,保证数据的完整性 input_ids, attention_mask, labels = [], [], [] instruction = tokenizer(f"<bos><start_of_turn>user\n{example['instruction'] + example['input']}<end_of_turn>\n<start_of_turn>mode...