Qwen
Unsloth

Qwen3-4B-Instruct-2507 benchmark on an Apple's logo.M3 · 24 GB

<- Runs

Prompt tokens

122,880

Generation tokens

30,720

Trials passed

30/30

Verified

11.2 tok/s

234.0 tok/s

Peak memory

0.71/24 GB

Runs poorly

Trials

Decode / Prefill Speeds

Metadata

metadata.json
{
"runId": "run_91a5f062-dc08-41c1-ae32-02a87692c401",
"bundleId": "llamacpp-qwen3-4b-instruct-2507-q8_0.gguf-4f29f4",
"status": "verified",
"promptTokens": 122880,
"completionTokens": 30720,
"contextLength": 5120,
"harness": {
"version": "0.1.10",
"gitSha": "unknown"
},
"runtime": {
"name": "llama.cpp",
"version": "b8480",
"buildFlags": "metal"
},
"model": {
"displayName": "Qwen3-4B-Instruct-2507",
"format": "gguf",
"quant": "q8_0",
"architecture": "qwen3",
"source": "unsloth/Qwen3-4B-Instruct-2507-GGUF:Qwen3-4B-Instruct-2507-Q8_0.gguf",
"fileSizeBytes": 4280405600,
"lab": {
"name": "Qwen",
"slug": "qwen"
},
"quantizedBy": {
"name": "Unsloth",
"slug": "unsloth"
}
},
"device": {
"cpu": "Apple M3",
"cpuCores": 8,
"gpu": "Apple M3",
"gpuCores": 10,
"gpuCount": 1,
"ramGb": 24,
"osName": "macOS",
"osVersion": "15.7.3"
},
"decodeTpsMean": 11.2,
"prefillTpsMean": 234,
"ttftP50Ms": 17463.45,
"idleTpsMean": 716,
"peakRssMb": 730,
"trialsPassed": 30,
"trialsTotal": 30,
"runnabilityScore": 0.35353333333333337,
"bundleSha256": "58830d9ec83fb05d342a0792c75a63b0c48436b9abb84d8389f9e732f895451e",
"createdAt": "2026-03-23T22:08:02.429Z"
}