0xSero
Gabe Garcia

Gemma 4 20B A4B IT REAP benchmark on an Apple's logo.M4 Pro · 64 GB

<- Runs

Prompt tokens

40,960

Generation tokens

10,240

Trials passed

10/10

Verified

68.6 tok/s

754.3 tok/s

Peak memory

14.00/64 GB

Runs well

Trials

Decode / Prefill Speeds

Metadata

metadata.json
{
"runId": "run_b1a7e3ac-44ac-4018-98d2-10044747beb1",
"bundleId": "mlx-gemma-4-21b-reap-tool-calling-mlx-4bit-e9cf2a",
"status": "verified",
"promptTokens": 40960,
"completionTokens": 10240,
"contextLength": 5120,
"harness": {
"version": "0.1.20",
"gitSha": "unknown"
},
"runtime": {
"name": "mlx_lm",
"version": "0.31.2",
"buildFlags": null
},
"model": {
"displayName": "Gemma 4 20B A4B IT REAP",
"format": "mlx",
"quant": "4bit",
"architecture": "gemma4",
"source": "deadbydawn101/gemma-4-21b-REAP-Tool-Calling-mlx-4bit",
"fileSizeBytes": 12825553410,
"lab": {
"name": "0xSero",
"slug": "0xsero"
},
"quantizedBy": {
"name": "Gabe Garcia",
"slug": "deadbydawn101"
}
},
"device": {
"cpu": "Apple M4 Pro",
"cpuCores": 14,
"gpu": "Apple M4 Pro",
"gpuCores": 20,
"gpuCount": 1,
"ramGb": 64,
"osName": "macOS",
"osVersion": "26.3"
},
"decodeTpsMean": 68.6,
"prefillTpsMean": 754.3,
"ttftP50Ms": 5328.72,
"idleTpsMean": 12288,
"peakRssMb": 14336,
"trialsPassed": 10,
"trialsTotal": 10,
"runnabilityScore": 0.73458,
"bundleSha256": "ca7421d446266d25b2398995fc5546c21f8458dd5d67ff27b55b4404724b148f",
"createdAt": "2026-04-16T03:25:27.533Z"
}