-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathllama_models.yaml
29 lines (29 loc) · 955 Bytes
/
llama_models.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Huggingface.co models to use for the llama2 chatbot
[
{
# 0 => Poor: High mem footprint >20GB (GPU needed but still slow)
"architecture": "original",
"name": "meta-llama/Llama-2-7b-chat-hf",
"online" : True
},
{
# 1 => Medium/Good: Very fast and low mem footprint ~3GB (CPU/GPU)
"architecture": "ggml",
"name": "TheBloke/Llama-2-7B-Chat-GGML",
"file": "llama-2-7b-chat.ggmlv3.q4_K_M.bin",
"online" : True
},
{
# 2 => Best: Very low mem footprint and emojies! (GPU needed)
"architecture": "gptq",
"name": "TheBloke/Llama-2-7b-Chat-GPTQ",
"online" : True
},
{
# 4 => Poorest: Highest mem footprint >32 GB RAM (GPU needed)
# This requires you to run 'make train-original' to generate it
"architecture": "tlrsft",
"name": "olafrv/Llama-2-7b-chat-hf-trained",
"online": False
}
]