logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>") sys.exit(1) # TODO: add models here, base models preferred models = [ {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", }, {"na...
# generate the source code for the convert-hf-to-gguf.py:get_vocab_base_pre() function: # TODO: auto-update convert-hf-to-gguf.py with the generated function src_ifs="" formodelinmodels: Expand DownExpand Up@@ -224,11 +208,18 @@ def get_vocab_base_pre(self, tokenizer) -> str...
2 changes: 1 addition & 1 deletion 2 convert_hf_to_gguf.py Original file line numberDiff line numberDiff line change @@ -2719,7 +2719,7 @@ def set_vocab(self): for line in lines: parts = line.split(' ') assert len(parts) >= 3 _, token, token_len = int(parts[0]), ast...
I have searched through the documentation to find an explanation, but nothing in the various limitations documents suggests that this shouldn't be possible? The flow was previously writing the base64 as a string to the relevant cell(s) in Excel without issue. It's only when I trie...
if'NO_LOCAL_GGUF'notinos.environ: sys.path.insert(1,str(Path(__file__).parent/'gguf-py'/'gguf')) importgguf NUMPY_TYPE_TO_FTYPE:dict[str,int]={"float32":0,"float16":1} defwrite_file_header(fout:BinaryIO,params:dict[str,Any])->None: ...
teespolyglot to SergeiBaklanSep 10, 2024 Thanks SergeiBaklan, your reply essentially pointed me in the right direction. Power Automate clearly didn't like me passing in the base64 string directly from the PowerApps input, so I've initialised a string variable and ...
from torch import Tensor if 'NO_LOCAL_GGUF' not in os.environ: sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) import gguf logger = logging.getLogger("hf-to-gguf") ### MODEL DEFINITIONS ### class SentencePieceTokenTypes(IntEnum): NORMAL = 1 UNKNOWN = 2 CONTROL =...
python llama.cpp/convert.py -h Convert the HF model to GGUF model: python llama.cpp/convert.py vicuna-hf \ --outfile vicuna-13b-v1.5.gguf \ --outtype q8_0 In this case we're also quantizing the model to 8 bit by setting --outtype q8_0. Quantizing helps improve inference speed,...
if 'NO_LOCAL_GGUF' not in os.environ: sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) import gguf logger = logging.getLogger("ggml-to-gguf") class GGMLFormat(IntEnum): GGML = 0 GGMF = 1 GGJT = 2 class GGMLFType(IntEnum): ALL_F32 = 0 MO...
def _hf_permute_qk(self, weights, n_head: int, n_head_kv: int): if n_head_kv is not None and n_head != n_head_kv: n_head = n_head_kv return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])...