id: nebius
npm: @ai-sdk/openai-compatible
env: NEBIUS_API_KEY
api: https://api.tokenfactory.nebius.com/v1
Models
DeepSeek V4 Pro
deepseek-ai/DeepSeek-V4-Proin $1.75/M
out $3.50/M
cache read $0.15/M
ctx: 1,000,000
max out: 384,000
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
DeepSeek-V3.2
deepseek-ai/DeepSeek-V3.2in $0.30/M
out $0.45/M
reason $0.45/M
cache read $0.03/M
cache write $0.38/M
ctx: 163,000
max out: 16,384
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
DeepSeek-V3.2-fast
deepseek-ai/DeepSeek-V3.2-fastin $0.40/M
out $2.00/M
cache read $0.04/M
cache write $0.50/M
ctx: 8,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
Gemma-3-27b-it
google/gemma-3-27b-itin $0.10/M
out $0.30/M
cache read $0.01/M
cache write $0.13/M
ctx: 110,000
max out: 8,192
in: text, image
out: text
reasoning
tools
vision
structured
temp
open weights
GLM-5
zai-org/GLM-5in $1.00/M
out $3.20/M
cache read $0.10/M
cache write $1.00/M
ctx: 200,000
max out: 16,384
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
gpt-oss-120b
openai/gpt-oss-120bin $0.15/M
out $0.60/M
reason $0.60/M
cache read $0.01/M
cache write $0.18/M
ctx: 128,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
gpt-oss-120b-fast
openai/gpt-oss-120b-fastin $0.10/M
out $0.50/M
cache read $0.01/M
cache write $0.13/M
ctx: 8,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
Hermes-4-405B
NousResearch/Hermes-4-405Bin $1.00/M
out $3.00/M
reason $3.00/M
cache read $0.10/M
cache write $1.25/M
ctx: 128,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Hermes-4-70B
NousResearch/Hermes-4-70Bin $0.13/M
out $0.40/M
reason $0.40/M
cache read $0.01/M
cache write $0.16/M
ctx: 128,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
INTELLECT-3
PrimeIntellect/INTELLECT-3in $0.20/M
out $1.10/M
cache read $0.02/M
cache write $0.25/M
ctx: 128,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
Kimi-K2.5
moonshotai/Kimi-K2.5in $0.50/M
out $2.50/M
reason $2.50/M
cache read $0.05/M
cache write $0.63/M
ctx: 256,000
max out: 8,192
in: text, image
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
Kimi-K2.5-fast
moonshotai/Kimi-K2.5-fastin $0.50/M
out $2.50/M
cache read $0.05/M
cache write $0.63/M
ctx: 256,000
max out: 8,192
in: text, image
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
Llama-3.1-Nemotron-Ultra-253B-v1
nvidia/Llama-3_1-Nemotron-Ultra-253B-v1in $0.60/M
out $1.80/M
cache read $0.06/M
cache write $0.75/M
ctx: 128,000
max out: 4,096
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Llama-3.3-70B-Instruct
meta-llama/Llama-3.3-70B-Instructin $0.13/M
out $0.40/M
cache read $0.01/M
cache write $0.16/M
ctx: 128,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
MiniMax-M2.5
MiniMaxAI/MiniMax-M2.5in $0.30/M
out $1.20/M
cache read $0.03/M
cache write $0.38/M
ctx: 196,608
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
MiniMax-M2.5-fast
MiniMaxAI/MiniMax-M2.5-fastin $0.30/M
out $1.20/M
cache read $0.03/M
cache write $0.38/M
ctx: 8,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
Nemotron-3-Nano-30B-A3B
nvidia/NVIDIA-Nemotron-3-Nano-30B-A3Bin $0.06/M
out $0.24/M
cache read $0.01/M
cache write $0.07/M
ctx: 32,000
max out: 4,096
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Nemotron-3-Nano-Omni
nvidia/Nemotron-3-Nano-Omniin $0.06/M
out $0.24/M
cache read $0.01/M
cache write $0.07/M
ctx: 65,536
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Nemotron-3-Super-120B-A12B
nvidia/nemotron-3-super-120b-a12bin $0.30/M
out $0.90/M
ctx: 256,000
max out: 32,768
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Qwen2.5-VL-72B-Instruct
Qwen/Qwen2.5-VL-72B-Instructin $0.25/M
out $0.75/M
cache read $0.03/M
cache write $0.31/M
ctx: 128,000
max out: 8,192
in: text, image
out: text
reasoning
tools
vision
structured
temp
open weights
Qwen3 235B A22B Instruct 2507
Qwen/Qwen3-235B-A22B-Instruct-2507in $0.20/M
out $0.60/M
ctx: 262,144
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Qwen3-235B-A22B-Thinking-2507-fast
Qwen/Qwen3-235B-A22B-Thinking-2507-fastin $0.50/M
out $2.00/M
cache read $0.05/M
cache write $0.63/M
ctx: 8,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
Qwen3-30B-A3B-Instruct-2507
Qwen/Qwen3-30B-A3B-Instruct-2507in $0.10/M
out $0.30/M
cache read $0.01/M
cache write $0.13/M
ctx: 128,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Qwen3-32B
Qwen/Qwen3-32Bin $0.10/M
out $0.30/M
cache read $0.01/M
cache write $0.13/M
ctx: 128,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Qwen3-Embedding-8B
Qwen/Qwen3-Embedding-8Bin $0.01/M
out $0.00/M
ctx: 32,768
max out: 0
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Qwen3-Next-80B-A3B-Thinking
Qwen/Qwen3-Next-80B-A3B-Thinkingin $0.15/M
out $1.20/M
reason $1.20/M
cache read $0.01/M
cache write $0.18/M
ctx: 128,000
max out: 16,384
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Qwen3-Next-80B-A3B-Thinking-fast
Qwen/Qwen3-Next-80B-A3B-Thinking-fastin $0.15/M
out $1.20/M
cache read $0.01/M
cache write $0.19/M
ctx: 8,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated
Qwen3.5-397B-A17B
Qwen/Qwen3.5-397B-A17Bin $0.60/M
out $3.60/M
cache read $0.06/M
cache write $0.75/M
ctx: 262,144
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
Qwen3.5-397B-A17B-fast
Qwen/Qwen3.5-397B-A17B-fastin $0.60/M
out $3.60/M
cache read $0.06/M
cache write $0.75/M
ctx: 8,000
max out: 8,192
in: text
out: text
reasoning
tools
vision
structured
temp
open weights
deprecated