Qwen
Unsloth

Qwen3-4B benchmark on a NVIDIA's logo.GeForce RTX 4060 Laptop GPU

<- Runs

Prompt tokens

40,960

Generation tokens

10,240

Trials passed

10/10

Verified

64.0 tok/s

1,754.9 tok/s

Peak memory

0.90/15 GB

Runs well

Trials

Decode / Prefill Speeds

Metadata

metadata.json
{
"runId": "run_47525418-26ed-4d51-bd2d-512d69baeebb",
"bundleId": "llamacpp-qwen3-4b-q4_k_m.gguf-1607a4",
"status": "verified",
"promptTokens": 40960,
"completionTokens": 10240,
"contextLength": 5120,
"harness": {
"version": "0.1.16",
"gitSha": "08f2145"
},
"runtime": {
"name": "llama.cpp",
"version": "b1",
"buildFlags": "metal"
},
"model": {
"displayName": "Qwen3-4B",
"format": "gguf",
"quant": "q4_k_m",
"architecture": "qwen3",
"source": "unsloth/Qwen3-4B-GGUF:Qwen3-4B-Q4_K_M.gguf",
"fileSizeBytes": 2497281312,
"lab": {
"name": "Qwen",
"slug": "qwen"
},
"quantizedBy": {
"name": "Unsloth",
"slug": "unsloth"
}
},
"device": {
"cpu": "AMD Ryzen 7 7840HS w/ Radeon 780M Graphics",
"cpuCores": 16,
"gpu": "NVIDIA GeForce RTX 4060 Laptop GPU",
"gpuCores": 1,
"gpuCount": 1,
"ramGb": 15,
"osName": "Arch Linux",
"osVersion": "Unknown"
},
"decodeTpsMean": 64,
"prefillTpsMean": 1754.9,
"ttftP50Ms": 2347.52,
"idleTpsMean": 882.2,
"peakRssMb": 921.6,
"trialsPassed": 10,
"trialsTotal": 10,
"runnabilityScore": 0.834843,
"bundleSha256": "39bfc33004364bb721fa3d054ca190fee6028ea7a0f12f4e73d0b8a0624396ab",
"createdAt": "2026-03-31T00:39:45.136Z"
}