diff --git a/openapi.yaml b/openapi.yaml
index dea4b15..13d9f86 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -1192,11 +1192,12 @@ paths:
)
response = client.chat.completions.create(
- model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model="Qwen/Qwen3.5-9B",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What are some fun things to do in New York?"},
- ]
+ ],
+ reasoning={"enabled": False}
)
- lang: Python
label: Together AI SDK (v1)
@@ -1209,11 +1210,12 @@ paths:
)
response = client.chat.completions.create(
- model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model="Qwen/Qwen3.5-9B",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What are some fun things to do in New York?"},
- ]
+ ],
+ reasoning={"enabled": False}
)
print(response.choices[0].message.content)
@@ -1227,11 +1229,12 @@ paths:
});
const response = await client.chat.completions.create({
- model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model: "Qwen/Qwen3.5-9B",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", "content": "What are some fun things to do in New York?" },
],
+ reasoning: { enabled: false },
});
console.log(response.choices[0].message?.content);
@@ -1245,11 +1248,12 @@ paths:
});
const response = await client.chat.completions.create({
- model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model: "Qwen/Qwen3.5-9B",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", "content": "What are some fun things to do in New York?" },
],
+ reasoning: { enabled: false },
});
console.log(response.choices[0].message?.content);
@@ -1260,11 +1264,12 @@ paths:
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
- "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ "model": "Qwen/Qwen3.5-9B",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What are some fun things to do in New York?"}
- ]
+ ],
+ "reasoning": {"enabled": false}
}'
operationId: chat-completions
requestBody:
@@ -1337,7 +1342,7 @@ paths:
)
response = client.completions.create(
- model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model="Qwen/Qwen3.5-9B",
prompt="The largest city in France is",
max_tokens=1
)
@@ -1354,7 +1359,7 @@ paths:
)
response = client.completions.create(
- model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model="Qwen/Qwen3.5-9B",
prompt="The largest city in France is",
max_tokens=1
)
@@ -1370,7 +1375,7 @@ paths:
});
const response = await client.completions.create({
- model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model: "Qwen/Qwen3.5-9B",
prompt: "The largest city in France is",
max_tokens: 1,
});
@@ -1386,7 +1391,7 @@ paths:
});
const response = await client.completions.create({
- model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model: "Qwen/Qwen3.5-9B",
prompt: "The largest city in France is",
max_tokens: 1
});
@@ -1399,7 +1404,7 @@ paths:
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
- "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ "model": "Qwen/Qwen3.5-9B",
"prompt": "The largest city in France is",
"max_tokens": 1
}'
@@ -5126,7 +5131,7 @@ paths:
)
endpoint = client.endpoints.create(
- model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model="Qwen/Qwen3.5-9B-FP8",
hardware="1x_nvidia_a100_80gb_sxm",
autoscaling={
"min_replicas": 2,
@@ -5146,7 +5151,7 @@ paths:
)
endpoint = client.endpoints.create(
- model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model="Qwen/Qwen3.5-9B-FP8",
hardware="1x_nvidia_a100_80gb_sxm",
min_replicas=2,
max_replicas=5,
@@ -5163,7 +5168,7 @@ paths:
});
const endpoint = await client.endpoints.create({
- model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model: "Qwen/Qwen3.5-9B-FP8",
hardware: "1x_nvidia_a100_80gb_sxm",
autoscaling: {
max_replicas: 5,
@@ -5182,7 +5187,7 @@ paths:
});
const endpoint = await client.endpoints.create({
- model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ model: "Qwen/Qwen3.5-9B-FP8",
hardware: "1x_nvidia_a100_80gb_sxm",
autoscaling: {
max_replicas: 5,
@@ -5198,7 +5203,7 @@ paths:
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
- "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ "model": "Qwen/Qwen3.5-9B-FP8",
"hardware": "1x_nvidia_a100_80gb_sxm",
"autoscaling": {
"max_replicas": 5,
@@ -6277,14 +6282,14 @@ paths:
type="classify",
parameters=ParametersEvaluationClassifyParameters(
judge=ParametersEvaluationClassifyParametersJudge(
- model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+ model="openai/gpt-oss-120b",
model_source="serverless",
system_template="You are an expert evaluator...",
),
input_data_file_path="file-abc123",
labels=["good", "bad"],
pass_labels=["good"],
- model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+ model_to_evaluate="Qwen/Qwen3.5-9B"
)
)
@@ -6301,12 +6306,12 @@ paths:
response = client.evaluation.create(
type="classify",
- judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+ judge_model_name="openai/gpt-oss-120b",
judge_system_template="You are an expert evaluator...",
input_data_file_path="file-abc123",
labels=["good", "bad"],
pass_labels=["good"],
- model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+ model_to_evaluate="Qwen/Qwen3.5-9B"
)
print(response.workflow_id)
@@ -6323,14 +6328,14 @@ paths:
type: 'classify',
parameters: {
judge: {
- model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+ model: 'openai/gpt-oss-120b',
model_source: 'serverless',
system_template: 'You are an expert evaluator...',
},
input_data_file_path: 'file-abc123',
labels: ['good', 'bad'],
pass_labels: ['good'],
- model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+ model_to_evaluate: 'Qwen/Qwen3.5-9B',
},
});
@@ -6348,14 +6353,14 @@ paths:
type: 'classify',
parameters: {
judge: {
- model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+ model: 'openai/gpt-oss-120b',
model_source: 'serverless',
system_template: 'You are an expert evaluator...',
},
input_data_file_path: 'file-abc123',
labels: ['good', 'bad'],
pass_labels: ['good'],
- model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+ model_to_evaluate: 'Qwen/Qwen3.5-9B',
},
});
@@ -9096,15 +9101,15 @@ components:
The name of the model to query.
[See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
- example: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+ example: Qwen/Qwen3.5-9B
anyOf:
- type: string
enum:
- Qwen/Qwen2.5-72B-Instruct-Turbo
- Qwen/Qwen2.5-7B-Instruct-Turbo
+ - Qwen/Qwen3.5-9B
- meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
- meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
- - meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
- type: string
max_tokens:
type: integer
@@ -11879,7 +11884,7 @@ components:
model_id:
type: string
description: 'Model to use for processing batch requests'
- example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+ example: 'Qwen/Qwen3.5-9B'
BatchErrorResponse:
type: object
properties:
@@ -11930,7 +11935,7 @@ components:
description: 'Completion progress (0.0 to 100)'
model_id:
type: string
- example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+ example: 'Qwen/Qwen3.5-9B'
description: 'Model used for processing requests'
output_file_id:
type: string