Nebius Token Factory — kaged models

id: nebius

npm: @ai-sdk/openai-compatible

env: NEBIUS_API_KEY

api: https://api.tokenfactory.nebius.com/v1

doc: https://docs.tokenfactory.nebius.com/

Models

DeepSeek V4 Pro

deepseek-ai/DeepSeek-V4-Pro

in $1.75/M

out $3.50/M

cache read $0.15/M

ctx: 1,000,000 max out: 384,000 in: text out: text

reasoning tools vision structured temp open weights

DeepSeek-V3.2

deepseek-ai/DeepSeek-V3.2

in $0.30/M

out $0.45/M

reason $0.45/M

cache read $0.03/M

cache write $0.38/M

ctx: 163,000 max out: 16,384 in: text out: text

reasoning tools vision structured temp open weights deprecated

DeepSeek-V3.2-fast

deepseek-ai/DeepSeek-V3.2-fast

in $0.40/M

out $2.00/M

cache read $0.04/M

cache write $0.50/M

ctx: 8,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights deprecated

Gemma-3-27b-it

google/gemma-3-27b-it

in $0.10/M

out $0.30/M

cache read $0.01/M

cache write $0.13/M

ctx: 110,000 max out: 8,192 in: text, image out: text

reasoning tools vision structured temp open weights

GLM-5

zai-org/GLM-5

in $1.00/M

out $3.20/M

cache read $0.10/M

cache write $1.00/M

ctx: 200,000 max out: 16,384 in: text out: text

reasoning tools vision structured temp open weights deprecated

gpt-oss-120b

openai/gpt-oss-120b

in $0.15/M

out $0.60/M

reason $0.60/M

cache read $0.01/M

cache write $0.18/M

ctx: 128,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

gpt-oss-120b-fast

openai/gpt-oss-120b-fast

in $0.10/M

out $0.50/M

cache read $0.01/M

cache write $0.13/M

ctx: 8,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights deprecated

Hermes-4-405B

NousResearch/Hermes-4-405B

in $1.00/M

out $3.00/M

reason $3.00/M

cache read $0.10/M

cache write $1.25/M

ctx: 128,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

Hermes-4-70B

NousResearch/Hermes-4-70B

in $0.13/M

out $0.40/M

reason $0.40/M

cache read $0.01/M

cache write $0.16/M

ctx: 128,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

INTELLECT-3

PrimeIntellect/INTELLECT-3

in $0.20/M

out $1.10/M

cache read $0.02/M

cache write $0.25/M

ctx: 128,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights deprecated

Kimi-K2.5

moonshotai/Kimi-K2.5

in $0.50/M

out $2.50/M

reason $2.50/M

cache read $0.05/M

cache write $0.63/M

ctx: 256,000 max out: 8,192 in: text, image out: text

reasoning tools vision structured temp open weights deprecated

Kimi-K2.5-fast

moonshotai/Kimi-K2.5-fast

in $0.50/M

out $2.50/M

cache read $0.05/M

cache write $0.63/M

ctx: 256,000 max out: 8,192 in: text, image out: text

reasoning tools vision structured temp open weights deprecated

Llama-3.1-Nemotron-Ultra-253B-v1

nvidia/Llama-3_1-Nemotron-Ultra-253B-v1

in $0.60/M

out $1.80/M

cache read $0.06/M

cache write $0.75/M

ctx: 128,000 max out: 4,096 in: text out: text

reasoning tools vision structured temp open weights

Llama-3.3-70B-Instruct

meta-llama/Llama-3.3-70B-Instruct

in $0.13/M

out $0.40/M

cache read $0.01/M

cache write $0.16/M

ctx: 128,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

MiniMax-M2.5

MiniMaxAI/MiniMax-M2.5

in $0.30/M

out $1.20/M

cache read $0.03/M

cache write $0.38/M

ctx: 196,608 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

MiniMax-M2.5-fast

MiniMaxAI/MiniMax-M2.5-fast

in $0.30/M

out $1.20/M

cache read $0.03/M

cache write $0.38/M

ctx: 8,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights deprecated

Nemotron-3-Nano-30B-A3B

nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B

in $0.06/M

out $0.24/M

cache read $0.01/M

cache write $0.07/M

ctx: 32,000 max out: 4,096 in: text out: text

reasoning tools vision structured temp open weights

Nemotron-3-Nano-Omni

nvidia/Nemotron-3-Nano-Omni

in $0.06/M

out $0.24/M

cache read $0.01/M

cache write $0.07/M

ctx: 65,536 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

Nemotron-3-Super-120B-A12B

nvidia/nemotron-3-super-120b-a12b

in $0.30/M

out $0.90/M

ctx: 256,000 max out: 32,768 in: text out: text

reasoning tools vision structured temp open weights

Qwen2.5-VL-72B-Instruct

Qwen/Qwen2.5-VL-72B-Instruct

in $0.25/M

out $0.75/M

cache read $0.03/M

cache write $0.31/M

ctx: 128,000 max out: 8,192 in: text, image out: text

reasoning tools vision structured temp open weights

Qwen3 235B A22B Instruct 2507

Qwen/Qwen3-235B-A22B-Instruct-2507

in $0.20/M

out $0.60/M

ctx: 262,144 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

Qwen3-235B-A22B-Thinking-2507-fast

Qwen/Qwen3-235B-A22B-Thinking-2507-fast

in $0.50/M

out $2.00/M

cache read $0.05/M

cache write $0.63/M

ctx: 8,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights deprecated

Qwen3-30B-A3B-Instruct-2507

Qwen/Qwen3-30B-A3B-Instruct-2507

in $0.10/M

out $0.30/M

cache read $0.01/M

cache write $0.13/M

ctx: 128,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

Qwen3-32B

Qwen/Qwen3-32B

in $0.10/M

out $0.30/M

cache read $0.01/M

cache write $0.13/M

ctx: 128,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

Qwen3-Embedding-8B

Qwen/Qwen3-Embedding-8B

in $0.01/M

out $0.00/M

ctx: 32,768 max out: 0 in: text out: text

reasoning tools vision structured temp open weights

Qwen3-Next-80B-A3B-Thinking

Qwen/Qwen3-Next-80B-A3B-Thinking

in $0.15/M

out $1.20/M

reason $1.20/M

cache read $0.01/M

cache write $0.18/M

ctx: 128,000 max out: 16,384 in: text out: text

reasoning tools vision structured temp open weights

Qwen3-Next-80B-A3B-Thinking-fast

Qwen/Qwen3-Next-80B-A3B-Thinking-fast

in $0.15/M

out $1.20/M

cache read $0.01/M

cache write $0.19/M

ctx: 8,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights deprecated

Qwen3.5-397B-A17B

Qwen/Qwen3.5-397B-A17B

in $0.60/M

out $3.60/M

cache read $0.06/M

cache write $0.75/M

ctx: 262,144 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights

Qwen3.5-397B-A17B-fast

Qwen/Qwen3.5-397B-A17B-fast

in $0.60/M

out $3.60/M

cache read $0.06/M

cache write $0.75/M

ctx: 8,000 max out: 8,192 in: text out: text

reasoning tools vision structured temp open weights deprecated