llama-cli.cpp vulkan (amd 5600G APU):
ls -la Qwen2.5-Coder-14B-Q6_K.gguf
-rw-r--r-- 1 root root 12124684320 lis 11 19:07 Qwen2.5-Coder-14B-Q6_K.gguf
print_info: file size = 11,29 GiB (6,56 BPW)
load_tensors: offloaded 49/49 layers to GPU
load_tensors: CPU_Mapped model buffer size = 609,08 MiB
load_tensors: Vulkan0 model buffer size = 10948,23 MiB
"fdinfo": {
"10049": {
"name": "llama-cli",
"usage": {
"CPU": {
"unit": "%",
"value": 0
},
"Compute": {
"unit": "%",
"value": 0
},
"DMA": {
"unit": "%",
"value": 0
},
"Decode": {
"unit": "%",
"value": 0
},
"Encode": {
"unit": "%",
"value": 0
},
"GFX": {
"unit": "%",
"value": 0
},
"GTT": {
"unit": "MiB",
"value": 12770
},
"Media": {
"unit": "%",
"value": 0
},
"VRAM": {
"unit": "MiB",
"value": 8
}
}
},