Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 35 additions & 30 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1192,11 +1192,12 @@ paths:
)

response = client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model="Qwen/Qwen3.5-9B",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What are some fun things to do in New York?"},
]
],
reasoning={"enabled": False}
)
- lang: Python
label: Together AI SDK (v1)
Expand All @@ -1209,11 +1210,12 @@ paths:
)

response = client.chat.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model="Qwen/Qwen3.5-9B",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What are some fun things to do in New York?"},
]
],
reasoning={"enabled": False}
)

print(response.choices[0].message.content)
Expand All @@ -1227,11 +1229,12 @@ paths:
});

const response = await client.chat.completions.create({
model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model: "Qwen/Qwen3.5-9B",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", "content": "What are some fun things to do in New York?" },
],
reasoning: { enabled: false },
});

console.log(response.choices[0].message?.content);
Expand All @@ -1245,11 +1248,12 @@ paths:
});

const response = await client.chat.completions.create({
model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model: "Qwen/Qwen3.5-9B",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", "content": "What are some fun things to do in New York?" },
],
reasoning: { enabled: false },
});

console.log(response.choices[0].message?.content);
Expand All @@ -1260,11 +1264,12 @@ paths:
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"model": "Qwen/Qwen3.5-9B",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What are some fun things to do in New York?"}
]
],
"reasoning": {"enabled": false}
}'
operationId: chat-completions
requestBody:
Expand Down Expand Up @@ -1337,7 +1342,7 @@ paths:
)

response = client.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model="Qwen/Qwen3.5-9B",
prompt="The largest city in France is",
max_tokens=1
)
Expand All @@ -1354,7 +1359,7 @@ paths:
)

response = client.completions.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model="Qwen/Qwen3.5-9B",
prompt="The largest city in France is",
max_tokens=1
)
Expand All @@ -1370,7 +1375,7 @@ paths:
});

const response = await client.completions.create({
model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model: "Qwen/Qwen3.5-9B",
prompt: "The largest city in France is",
max_tokens: 1,
});
Expand All @@ -1386,7 +1391,7 @@ paths:
});

const response = await client.completions.create({
model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model: "Qwen/Qwen3.5-9B",
prompt: "The largest city in France is",
max_tokens: 1
});
Expand All @@ -1399,7 +1404,7 @@ paths:
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"model": "Qwen/Qwen3.5-9B",
"prompt": "The largest city in France is",
"max_tokens": 1
}'
Expand Down Expand Up @@ -5126,7 +5131,7 @@ paths:
)

endpoint = client.endpoints.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model="Qwen/Qwen3.5-9B-FP8",
hardware="1x_nvidia_a100_80gb_sxm",
autoscaling={
"min_replicas": 2,
Expand All @@ -5146,7 +5151,7 @@ paths:
)

endpoint = client.endpoints.create(
model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model="Qwen/Qwen3.5-9B-FP8",
hardware="1x_nvidia_a100_80gb_sxm",
min_replicas=2,
max_replicas=5,
Expand All @@ -5163,7 +5168,7 @@ paths:
});

const endpoint = await client.endpoints.create({
model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model: "Qwen/Qwen3.5-9B-FP8",
hardware: "1x_nvidia_a100_80gb_sxm",
autoscaling: {
max_replicas: 5,
Expand All @@ -5182,7 +5187,7 @@ paths:
});

const endpoint = await client.endpoints.create({
model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
model: "Qwen/Qwen3.5-9B-FP8",
hardware: "1x_nvidia_a100_80gb_sxm",
autoscaling: {
max_replicas: 5,
Expand All @@ -5198,7 +5203,7 @@ paths:
-H "Authorization: Bearer $TOGETHER_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"model": "Qwen/Qwen3.5-9B-FP8",
"hardware": "1x_nvidia_a100_80gb_sxm",
"autoscaling": {
"max_replicas": 5,
Expand Down Expand Up @@ -6277,14 +6282,14 @@ paths:
type="classify",
parameters=ParametersEvaluationClassifyParameters(
judge=ParametersEvaluationClassifyParametersJudge(
model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
model="openai/gpt-oss-120b",
model_source="serverless",
system_template="You are an expert evaluator...",
),
input_data_file_path="file-abc123",
labels=["good", "bad"],
pass_labels=["good"],
model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
model_to_evaluate="Qwen/Qwen3.5-9B"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should change the judge model in all these from model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo' to model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo' -> model: 'openai/gpt-oss-120b'

)
)

Expand All @@ -6301,12 +6306,12 @@ paths:

response = client.evaluation.create(
type="classify",
judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
judge_model_name="openai/gpt-oss-120b",
judge_system_template="You are an expert evaluator...",
input_data_file_path="file-abc123",
labels=["good", "bad"],
pass_labels=["good"],
model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
model_to_evaluate="Qwen/Qwen3.5-9B"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should change the judge model in all these from model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo' to model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo' -> model: 'openai/gpt-oss-120b'

)

print(response.workflow_id)
Expand All @@ -6323,14 +6328,14 @@ paths:
type: 'classify',
parameters: {
judge: {
model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
model: 'openai/gpt-oss-120b',
model_source: 'serverless',
system_template: 'You are an expert evaluator...',
},
input_data_file_path: 'file-abc123',
labels: ['good', 'bad'],
pass_labels: ['good'],
model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
model_to_evaluate: 'Qwen/Qwen3.5-9B',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should change the judge model in all these from model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo' to model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo' -> model: 'openai/gpt-oss-120b'

},
});

Expand All @@ -6348,14 +6353,14 @@ paths:
type: 'classify',
parameters: {
judge: {
model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
model: 'openai/gpt-oss-120b',
model_source: 'serverless',
system_template: 'You are an expert evaluator...',
},
input_data_file_path: 'file-abc123',
labels: ['good', 'bad'],
pass_labels: ['good'],
model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
model_to_evaluate: 'Qwen/Qwen3.5-9B',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should change the judge model in all these from model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo' to model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo' -> model: 'openai/gpt-oss-120b'

},
});

Expand Down Expand Up @@ -9096,15 +9101,15 @@ components:
The name of the model to query.<br>
<br>
[See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
example: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
example: Qwen/Qwen3.5-9B
anyOf:
- type: string
enum:
- Qwen/Qwen2.5-72B-Instruct-Turbo
- Qwen/Qwen2.5-7B-Instruct-Turbo
- Qwen/Qwen3.5-9B
- meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
- meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
- meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
- type: string
max_tokens:
type: integer
Expand Down Expand Up @@ -11879,7 +11884,7 @@ components:
model_id:
type: string
description: 'Model to use for processing batch requests'
example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
example: 'Qwen/Qwen3.5-9B'
BatchErrorResponse:
type: object
properties:
Expand Down Expand Up @@ -11930,7 +11935,7 @@ components:
description: 'Completion progress (0.0 to 100)'
model_id:
type: string
example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
example: 'Qwen/Qwen3.5-9B'
description: 'Model used for processing requests'
output_file_id:
type: string
Expand Down
Loading