-
Notifications
You must be signed in to change notification settings - Fork 240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
How to convert model to GGUF after fine-tuned? #64
Comments
I am having the same question |
I am using Llama.cpp to convert the model format to GGUF, but it seems to only support the Mistral v0.1 format. Therefore, I made some name conversions. from mistral_inference.model import Transformer
from safetensors.torch import save_file, load_file
# Define the reverse layer name conversion rules
def reverse_convert_layer_name(name):
reverse_layer_mapping = {
"tok_embeddings.weight": "model.embed_tokens.weight",
"norm.weight": "model.norm.weight",
"output.weight": "lm_head.weight"
}
if name in reverse_layer_mapping:
return reverse_layer_mapping[name]
parts = name.split(".")
if len(parts) < 3:
return name
layer_num = parts[1]
if parts[2] == "ffn_norm":
return f"model.layers.{layer_num}.input_layernorm.weight"
elif parts[2] == "attention_norm":
return f"model.layers.{layer_num}.post_attention_layernorm.weight"
elif parts[2] == "attention":
attn_reverse_mapping = {
"wk": "k_proj",
"wv": "v_proj",
"wq": "q_proj",
"wo": "o_proj"
}
if parts[3] in attn_reverse_mapping:
return f"model.layers.{layer_num}.self_attn.{attn_reverse_mapping[parts[3]]}.weight"
elif parts[2] == "feed_forward":
mlp_reverse_mapping = {
"w2": "down_proj",
"w1": "gate_proj",
"w3": "up_proj"
}
if parts[3] in mlp_reverse_mapping:
return f"model.layers.{layer_num}.mlp.{mlp_reverse_mapping[parts[3]]}.weight"
return name
# Load the original model
model = Transformer.from_folder("/mnt/share/LLM/Breeze-7B-Instruct-v1_0")
model.to('cpu')
# Load the LoRA weights
lora_weights = load_file("/mnt/share/LLM/mistral_models/breeze-7b-lora/checkpoints/checkpoint_000100/consolidated/lora.safetensors")
# Apply the LoRA weights to the model
for name, param in model.named_parameters():
if name in lora_weights:
param.data += lora_weights[name].data
# Extract the model's state_dict
state_dict = model.state_dict()
# Create a new dictionary to store the converted layer names
new_state_dict = {}
for name, param in state_dict.items():
new_name = reverse_convert_layer_name(name)
new_state_dict[new_name] = param
# Save the new model as a safetensors file
save_file(new_state_dict, "/mnt/share/LLM/Breeze-7B-z0.1/model.safetensors")
# Confirm the save was successful
print("Model has been successfully saved as a safetensors file.") Config.json ChangesThe {
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"output_router_logits": true,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.37.2",
"use_cache": false,
"vocab_size": xxxxxx
} Please replace |
You can also use convert_mistral_weights_to_hf.py
|
How to convert model to GGUF after fine-tuned?
The text was updated successfully, but these errors were encountered: