Overview

Provider Type

API

API Endpoint


                                                        https://api-inference.huggingface.co/models

Free Tier Highlights

300 Requests / hour

Quick Start Guide

1

Create Account

Sign up for a free account at HuggingFace.co.

2

Get Access Token

Go to settings > Access Tokens and create a new 'Read' token.

3

Pick a Model

Browse the model hub and click 'Deploy > Inference API' to get the URL for any supported model.

Available Models

Model Name	ID	Context	Capabilities
Llama 3.2 11B Vision Free	`meta-llama/Llama-3.2-11B-Vision-Instruct`	128 000	Text Vision
Llama 3.1 8B Instruct Free	`meta-llama/Meta-Llama-3.1-8B-Instruct`	128 000	-
Qwen 2.5 72B Instruct Free	`Qwen/Qwen2.5-72B-Instruct`	32 000	-
Gemma 2 9B Instruct Free	`google/gemma-2-9b-it`	8 000	-
Flux.1 Dev Free	`black-forest-labs/FLUX.1-dev`	Image	-

Integration Examples

Ready-to-use code snippets for your applications.

Select a Model

Select Model

main.py

import requests

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct"
headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "Can you please let us know more details about your",
})
print(output)

index.js

async function query(data) {
	const response = await fetch(
		"https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct",
		{
			headers: { Authorization: "Bearer YOUR_HF_TOKEN" },
			method: "POST",
			body: JSON.stringify(data),
		}
	);
	const result = await response.json();
	return result;
}

query({"inputs": "Can you please let us know more details about your"}).then((response) => {
	console.log(JSON.stringify(response));
});

terminal

curl https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct \
	-X POST \
	-d '{"inputs": "Can you please let us know more details about your"}' \
	-H 'Content-Type: application/json' \
	-H 'Authorization: Bearer YOUR_HF_TOKEN'

example.php

<?php
$apiKey = 'YOUR_API_KEY';
$url = "https://api-inference.huggingface.co/models/models/meta-llama/Llama-3.2-11B-Vision-Instruct:generateContent?key=$apiKey";

$data = [
    "contents" => [
        ["parts" => [["text" => "Explain quantum mechanics"]]]
    ]
];

$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);

$response = curl_exec($ch);
curl_close($ch);

echo $response;
?>

main.go

package main

import (
    "context"
    "fmt"
    "log"
    "github.com/google/generative-ai-go/genai"
    "google.golang.org/api/option"
)

func main() {
    ctx := context.Background()
    client, err := genai.NewClient(ctx, option.WithAPIKey("YOUR_API_KEY"))
    if err != nil { log.Fatal(err) }
    defer client.Close()

    model := client.GenerativeModel("meta-llama/Llama-3.2-11B-Vision-Instruct")
    resp, err := model.GenerateContent(ctx, genai.Text("Explain quantum mechanics"))
    if err != nil { log.Fatal(err) }
    
    // ...
}

main.py

import requests

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct"
headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "Can you please let us know more details about your",
})
print(output)

index.js

async function query(data) {
	const response = await fetch(
		"https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct",
		{
			headers: { Authorization: "Bearer YOUR_HF_TOKEN" },
			method: "POST",
			body: JSON.stringify(data),
		}
	);
	const result = await response.json();
	return result;
}

query({"inputs": "Can you please let us know more details about your"}).then((response) => {
	console.log(JSON.stringify(response));
});

terminal

curl https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct \
	-X POST \
	-d '{"inputs": "Can you please let us know more details about your"}' \
	-H 'Content-Type: application/json' \
	-H 'Authorization: Bearer YOUR_HF_TOKEN'

example.php

<?php
$apiKey = 'YOUR_API_KEY';
$url = "https://api-inference.huggingface.co/models/models/meta-llama/Meta-Llama-3.1-8B-Instruct:generateContent?key=$apiKey";

$data = [
    "contents" => [
        ["parts" => [["text" => "Explain quantum mechanics"]]]
    ]
];

$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);

$response = curl_exec($ch);
curl_close($ch);

echo $response;
?>

main.go

package main

import (
    "context"
    "fmt"
    "log"
    "github.com/google/generative-ai-go/genai"
    "google.golang.org/api/option"
)

func main() {
    ctx := context.Background()
    client, err := genai.NewClient(ctx, option.WithAPIKey("YOUR_API_KEY"))
    if err != nil { log.Fatal(err) }
    defer client.Close()

    model := client.GenerativeModel("meta-llama/Meta-Llama-3.1-8B-Instruct")
    resp, err := model.GenerateContent(ctx, genai.Text("Explain quantum mechanics"))
    if err != nil { log.Fatal(err) }
    
    // ...
}

main.py

import requests

API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct"
headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "Can you please let us know more details about your",
})
print(output)

index.js

async function query(data) {
	const response = await fetch(
		"https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
		{
			headers: { Authorization: "Bearer YOUR_HF_TOKEN" },
			method: "POST",
			body: JSON.stringify(data),
		}
	);
	const result = await response.json();
	return result;
}

query({"inputs": "Can you please let us know more details about your"}).then((response) => {
	console.log(JSON.stringify(response));
});

terminal

curl https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct \
	-X POST \
	-d '{"inputs": "Can you please let us know more details about your"}' \
	-H 'Content-Type: application/json' \
	-H 'Authorization: Bearer YOUR_HF_TOKEN'

example.php

<?php
$apiKey = 'YOUR_API_KEY';
$url = "https://api-inference.huggingface.co/models/models/Qwen/Qwen2.5-72B-Instruct:generateContent?key=$apiKey";

$data = [
    "contents" => [
        ["parts" => [["text" => "Explain quantum mechanics"]]]
    ]
];

$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);

$response = curl_exec($ch);
curl_close($ch);

echo $response;
?>

main.go

package main

import (
    "context"
    "fmt"
    "log"
    "github.com/google/generative-ai-go/genai"
    "google.golang.org/api/option"
)

func main() {
    ctx := context.Background()
    client, err := genai.NewClient(ctx, option.WithAPIKey("YOUR_API_KEY"))
    if err != nil { log.Fatal(err) }
    defer client.Close()

    model := client.GenerativeModel("Qwen/Qwen2.5-72B-Instruct")
    resp, err := model.GenerateContent(ctx, genai.Text("Explain quantum mechanics"))
    if err != nil { log.Fatal(err) }
    
    // ...
}

main.py

import requests

API_URL = "https://api-inference.huggingface.co/models/google/gemma-2-9b-it"
headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "Can you please let us know more details about your",
})
print(output)

index.js

async function query(data) {
	const response = await fetch(
		"https://api-inference.huggingface.co/models/google/gemma-2-9b-it",
		{
			headers: { Authorization: "Bearer YOUR_HF_TOKEN" },
			method: "POST",
			body: JSON.stringify(data),
		}
	);
	const result = await response.json();
	return result;
}

query({"inputs": "Can you please let us know more details about your"}).then((response) => {
	console.log(JSON.stringify(response));
});

terminal

curl https://api-inference.huggingface.co/models/google/gemma-2-9b-it \
	-X POST \
	-d '{"inputs": "Can you please let us know more details about your"}' \
	-H 'Content-Type: application/json' \
	-H 'Authorization: Bearer YOUR_HF_TOKEN'

example.php

<?php
$apiKey = 'YOUR_API_KEY';
$url = "https://api-inference.huggingface.co/models/models/google/gemma-2-9b-it:generateContent?key=$apiKey";

$data = [
    "contents" => [
        ["parts" => [["text" => "Explain quantum mechanics"]]]
    ]
];

$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);

$response = curl_exec($ch);
curl_close($ch);

echo $response;
?>

main.go

package main

import (
    "context"
    "fmt"
    "log"
    "github.com/google/generative-ai-go/genai"
    "google.golang.org/api/option"
)

func main() {
    ctx := context.Background()
    client, err := genai.NewClient(ctx, option.WithAPIKey("YOUR_API_KEY"))
    if err != nil { log.Fatal(err) }
    defer client.Close()

    model := client.GenerativeModel("google/gemma-2-9b-it")
    resp, err := model.GenerateContent(ctx, genai.Text("Explain quantum mechanics"))
    if err != nil { log.Fatal(err) }
    
    // ...
}

main.py

import requests

API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "Can you please let us know more details about your",
})
print(output)

index.js

async function query(data) {
	const response = await fetch(
		"https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev",
		{
			headers: { Authorization: "Bearer YOUR_HF_TOKEN" },
			method: "POST",
			body: JSON.stringify(data),
		}
	);
	const result = await response.json();
	return result;
}

query({"inputs": "Can you please let us know more details about your"}).then((response) => {
	console.log(JSON.stringify(response));
});

terminal

curl https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev \
	-X POST \
	-d '{"inputs": "Can you please let us know more details about your"}' \
	-H 'Content-Type: application/json' \
	-H 'Authorization: Bearer YOUR_HF_TOKEN'

example.php

<?php
$apiKey = 'YOUR_API_KEY';
$url = "https://api-inference.huggingface.co/models/models/black-forest-labs/FLUX.1-dev:generateContent?key=$apiKey";

$data = [
    "contents" => [
        ["parts" => [["text" => "Explain quantum mechanics"]]]
    ]
];

$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($data));
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);

$response = curl_exec($ch);
curl_close($ch);

echo $response;
?>

main.go

package main

import (
    "context"
    "fmt"
    "log"
    "github.com/google/generative-ai-go/genai"
    "google.golang.org/api/option"
)

func main() {
    ctx := context.Background()
    client, err := genai.NewClient(ctx, option.WithAPIKey("YOUR_API_KEY"))
    if err != nil { log.Fatal(err) }
    defer client.Close()

    model := client.GenerativeModel("black-forest-labs/FLUX.1-dev")
    resp, err := model.GenerateContent(ctx, genai.Text("Explain quantum mechanics"))
    if err != nil { log.Fatal(err) }
    
    // ...
}

Free Tier Pricing & Limits

Rate Limit

Requests per minute

300 Requests / hour

Daily Quota

Requests per day

Dependent on global load

Token Limit

Tokens per minute

Max context of model

Monthly Quota

Per month limit

Free Forever (Rate Limited)

Use Cases

Prototyping & Testing

Learning NLP / ML

Lightweight Apps

Hackathons

Model Evaluation

Limitations & Considerations

Rate limited to ~300 request/hour for free users

Models larger than 10GB may not load

Cold starts can occur

No SLA on free tier

Community Hub

Live

Join the discussion, share tips, and rate Hugging Face Inference.

Quick Reactions

Add Discussion

Display Name

Comment

Comments are moderated. Be helpful and respectful.

Recent Activity

0 comments

Cookie Consent

Hugging Face Inference

Overview

Provider Type

API Endpoint

Free Tier Highlights

Why Choose Hugging Face Inference?

Quick Start Guide

Create Account

Get Access Token

Pick a Model

Available Models

Integration Examples

Select Model

Free Tier Pricing & Limits

Rate Limit

Daily Quota

Token Limit

Monthly Quota

Use Cases

Limitations & Considerations

Community Hub

Quick Reactions

Add Discussion

Recent Activity

Ready to Get Started?

Cookie Consent

Hugging Face Inference

Overview

Provider Type

API Endpoint

Free Tier Highlights

Why Choose Hugging Face Inference?

Quick Start Guide

Create Account

Get Access Token

Pick a Model

Available Models

Integration Examples

Select Model

Free Tier Pricing & Limits

Rate Limit

Daily Quota

Token Limit

Monthly Quota

Use Cases

Limitations & Considerations

Community Hub

Quick Reactions

Add Discussion

Recent Activity

Suggest an Edit

Ready to Get Started?