tokens_to_add += [ AddedToken(token, special=True) if isinstance(token, str) else token for token in self.all_special_tokens_extended if token not in encoder and token not in tokens_to_add ] Contributor itazap Sep 6, 2024 ensures tokens are added as special if they are in all_spec...
class TextEncoder(object): """Base class for converting from ints to/from human readable strings.""" def __init__(self, num_reserved_ids = NUM_RESERVED_TOKENS): self._num_reserved_ids = num_reserved_ids @property def num_reserved_ids(self): return self._num_reserved_ids def encode(...
return getattr(self, self.model_name).embed_tokens(*args, **kwargs) Member DarkLight1337 Sep 27, 2024 Is this supposed to be applicable to different models? If so, I suggest moving this to a common file.vllm/model_executor/models/module_mapping.py Comment on lines +53 to +59 ...
] = Path("config/prompts/ignore_tokens.txt"), out_dir: Annotated[ Optional[Path], typer.Option( "--out-dir", "-o", path_type=Path, file_okay=False, help="output directory", ), ] = Path("stylize/"), fps: Annotated[ int, typer.Option( "--fps", "-f", min=1, max=120, ...
" EncoderRepetitionPenaltyLogitsProcessor,\n", " EpsilonLogitsWarper,\n", " EtaLogitsWarper,\n", " ExponentialDecayLengthPenalty,\n", " ForcedBOSTokenLogitsProcessor,\n", " ForcedEOSTokenLogitsProcessor,\n", " ForceTokensLogitsProcessor,\n", " HammingDiversityLogitsProcessor,\n", " InfNanRem...
if ENCODER is None: Expand All @@ -116,19 +129,22 @@ def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"): content = ENCODER.decode(tokens) return content def pack_user_ass_to_openai_messages(*args: str): roles = ["user", "assistant"] return [ {"...
self.sd.text_encoder.train() def hook_train_loop(self, batch): with torch.no_grad(): dtype = get_torch_dtype(self.train_config.dtype) noisy_latents, noise, timesteps, conditioned_prompts, imgs = self.process_general_training_batch(batch) network_weight_list = batch.get_network_weight_...
output_tokenized_ids = tokenizer(output_prompt, padding="do_not_pad", return_tensors='pt', truncation=False, add_special_tokens=False)["input_ids"] combined_embeds, atts, label_ids = model.encode(wav.cuda(), pre_tokenized_ids.cuda(), post_tokenized_ids.cuda(), output_tokenized_ids.cu...
max_new_tokens=128, crop_question=True, num_samples=None, language=None ) -> None: assert ( base_model is not None or gt_data is not None @@ -62,11 +109,21 @@ def __init__( self._crop_question = crop_question self.num_samples = num_samples # Take language from the base...
special_tokens: Dict[str, int], explicit_n_vocab: Optional[int] = None, ): self.name = name @@ -39,7 +40,7 @@ def __repr__(self) -> str: # Encoding # === def encode_ordinary(self, text: str) -> list[int]: def encode_ordinary(self, text: str) -> List[int]: """...