diff --git a/openapi.yaml b/openapi.yaml index dea4b15..13d9f86 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -1192,11 +1192,12 @@ paths: ) response = client.chat.completions.create( - model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model="Qwen/Qwen3.5-9B", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What are some fun things to do in New York?"}, - ] + ], + reasoning={"enabled": False} ) - lang: Python label: Together AI SDK (v1) @@ -1209,11 +1210,12 @@ paths: ) response = client.chat.completions.create( - model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model="Qwen/Qwen3.5-9B", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What are some fun things to do in New York?"}, - ] + ], + reasoning={"enabled": False} ) print(response.choices[0].message.content) @@ -1227,11 +1229,12 @@ paths: }); const response = await client.chat.completions.create({ - model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model: "Qwen/Qwen3.5-9B", messages: [ { role: "system", content: "You are a helpful assistant." }, { role: "user", "content": "What are some fun things to do in New York?" }, ], + reasoning: { enabled: false }, }); console.log(response.choices[0].message?.content); @@ -1245,11 +1248,12 @@ paths: }); const response = await client.chat.completions.create({ - model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model: "Qwen/Qwen3.5-9B", messages: [ { role: "system", content: "You are a helpful assistant." }, { role: "user", "content": "What are some fun things to do in New York?" }, ], + reasoning: { enabled: false }, }); console.log(response.choices[0].message?.content); @@ -1260,11 +1264,12 @@ paths: -H "Authorization: Bearer $TOGETHER_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + "model": "Qwen/Qwen3.5-9B", "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "What are some fun things to do in New York?"} - ] + ], + "reasoning": {"enabled": false} }' operationId: chat-completions requestBody: @@ -1337,7 +1342,7 @@ paths: ) response = client.completions.create( - model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model="Qwen/Qwen3.5-9B", prompt="The largest city in France is", max_tokens=1 ) @@ -1354,7 +1359,7 @@ paths: ) response = client.completions.create( - model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model="Qwen/Qwen3.5-9B", prompt="The largest city in France is", max_tokens=1 ) @@ -1370,7 +1375,7 @@ paths: }); const response = await client.completions.create({ - model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model: "Qwen/Qwen3.5-9B", prompt: "The largest city in France is", max_tokens: 1, }); @@ -1386,7 +1391,7 @@ paths: }); const response = await client.completions.create({ - model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model: "Qwen/Qwen3.5-9B", prompt: "The largest city in France is", max_tokens: 1 }); @@ -1399,7 +1404,7 @@ paths: -H "Authorization: Bearer $TOGETHER_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + "model": "Qwen/Qwen3.5-9B", "prompt": "The largest city in France is", "max_tokens": 1 }' @@ -5126,7 +5131,7 @@ paths: ) endpoint = client.endpoints.create( - model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model="Qwen/Qwen3.5-9B-FP8", hardware="1x_nvidia_a100_80gb_sxm", autoscaling={ "min_replicas": 2, @@ -5146,7 +5151,7 @@ paths: ) endpoint = client.endpoints.create( - model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model="Qwen/Qwen3.5-9B-FP8", hardware="1x_nvidia_a100_80gb_sxm", min_replicas=2, max_replicas=5, @@ -5163,7 +5168,7 @@ paths: }); const endpoint = await client.endpoints.create({ - model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model: "Qwen/Qwen3.5-9B-FP8", hardware: "1x_nvidia_a100_80gb_sxm", autoscaling: { max_replicas: 5, @@ -5182,7 +5187,7 @@ paths: }); const endpoint = await client.endpoints.create({ - model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + model: "Qwen/Qwen3.5-9B-FP8", hardware: "1x_nvidia_a100_80gb_sxm", autoscaling: { max_replicas: 5, @@ -5198,7 +5203,7 @@ paths: -H "Authorization: Bearer $TOGETHER_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + "model": "Qwen/Qwen3.5-9B-FP8", "hardware": "1x_nvidia_a100_80gb_sxm", "autoscaling": { "max_replicas": 5, @@ -6277,14 +6282,14 @@ paths: type="classify", parameters=ParametersEvaluationClassifyParameters( judge=ParametersEvaluationClassifyParametersJudge( - model="meta-llama/Llama-3.1-70B-Instruct-Turbo", + model="openai/gpt-oss-120b", model_source="serverless", system_template="You are an expert evaluator...", ), input_data_file_path="file-abc123", labels=["good", "bad"], pass_labels=["good"], - model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo" + model_to_evaluate="Qwen/Qwen3.5-9B" ) ) @@ -6301,12 +6306,12 @@ paths: response = client.evaluation.create( type="classify", - judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo", + judge_model_name="openai/gpt-oss-120b", judge_system_template="You are an expert evaluator...", input_data_file_path="file-abc123", labels=["good", "bad"], pass_labels=["good"], - model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo" + model_to_evaluate="Qwen/Qwen3.5-9B" ) print(response.workflow_id) @@ -6323,14 +6328,14 @@ paths: type: 'classify', parameters: { judge: { - model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo', + model: 'openai/gpt-oss-120b', model_source: 'serverless', system_template: 'You are an expert evaluator...', }, input_data_file_path: 'file-abc123', labels: ['good', 'bad'], pass_labels: ['good'], - model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo', + model_to_evaluate: 'Qwen/Qwen3.5-9B', }, }); @@ -6348,14 +6353,14 @@ paths: type: 'classify', parameters: { judge: { - model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo', + model: 'openai/gpt-oss-120b', model_source: 'serverless', system_template: 'You are an expert evaluator...', }, input_data_file_path: 'file-abc123', labels: ['good', 'bad'], pass_labels: ['good'], - model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo', + model_to_evaluate: 'Qwen/Qwen3.5-9B', }, }); @@ -9096,15 +9101,15 @@ components: The name of the model to query.

[See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models) - example: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + example: Qwen/Qwen3.5-9B anyOf: - type: string enum: - Qwen/Qwen2.5-72B-Instruct-Turbo - Qwen/Qwen2.5-7B-Instruct-Turbo + - Qwen/Qwen3.5-9B - meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo - meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - - meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - type: string max_tokens: type: integer @@ -11879,7 +11884,7 @@ components: model_id: type: string description: 'Model to use for processing batch requests' - example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' + example: 'Qwen/Qwen3.5-9B' BatchErrorResponse: type: object properties: @@ -11930,7 +11935,7 @@ components: description: 'Completion progress (0.0 to 100)' model_id: type: string - example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' + example: 'Qwen/Qwen3.5-9B' description: 'Model used for processing requests' output_file_id: type: string