diff --git a/src/together/cli/api/chat.py b/src/together/cli/api/chat.py
index d95b760..d778d7f 100644
--- a/src/together/cli/api/chat.py
+++ b/src/together/cli/api/chat.py
@@ -2,7 +2,7 @@
 
 import cmd
 import json
-from typing import List, Tuple
+from typing import Any, Dict, List, Tuple
 
 import click
 
@@ -181,6 +181,12 @@ def interactive(
     "--frequency-penalty", type=float, help="Frequency penalty sampling method"
 )
 @click.option("--min-p", type=float, help="Min p sampling")
+@click.option(
+    "--audio-url",
+    type=str,
+    multiple=True,
+    help="Audio URL to attach to the last user message",
+)
 @click.option("--no-stream", is_flag=True, help="Disable streaming")
 @click.option("--logprobs", type=int, help="Return logprobs. Only works with --raw.")
 @click.option("--echo", is_flag=True, help="Echo prompt. Only works with --raw.")
@@ -200,6 +206,7 @@ def chat(
     presence_penalty: float | None = None,
     frequency_penalty: float | None = None,
     min_p: float | None = None,
+    audio_url: List[str] | None = None,
     no_stream: bool = False,
     logprobs: int | None = None,
     echo: bool | None = None,
@@ -210,7 +217,22 @@ def chat(
     """Generate chat completions from messages"""
     client: Together = ctx.obj
 
-    messages = [{"role": msg[0], "content": msg[1]} for msg in message]
+    messages: List[Dict[str, Any]] = [
+        {"role": msg[0], "content": msg[1]} for msg in message
+    ]
+
+    if audio_url and messages:
+        last_msg = messages[-1]
+        if last_msg["role"] == "user":
+            # Convert content to list if it is string
+            if isinstance(last_msg["content"], str):
+                last_msg["content"] = [{"type": "text", "text": last_msg["content"]}]
+
+            # Append audio URLs
+            for url in audio_url:
+                last_msg["content"].append(
+                    {"type": "audio_url", "audio_url": {"url": url}}
+                )
 
     response = client.chat.completions.create(
         model=model,
diff --git a/src/together/types/chat_completions.py b/src/together/types/chat_completions.py
index 8e5132f..0efd34f 100644
--- a/src/together/types/chat_completions.py
+++ b/src/together/types/chat_completions.py
@@ -46,6 +46,7 @@ class ChatCompletionMessageContentType(str, Enum):
     TEXT = "text"
     IMAGE_URL = "image_url"
     VIDEO_URL = "video_url"
+    AUDIO_URL = "audio_url"
 
 
 class ChatCompletionMessageContentImageURL(BaseModel):
@@ -56,11 +57,16 @@ class ChatCompletionMessageContentVideoURL(BaseModel):
     url: str
 
 
+class ChatCompletionMessageContentAudioURL(BaseModel):
+    url: str
+
+
 class ChatCompletionMessageContent(BaseModel):
     type: ChatCompletionMessageContentType
     text: str | None = None
     image_url: ChatCompletionMessageContentImageURL | None = None
     video_url: ChatCompletionMessageContentVideoURL | None = None
+    audio_url: ChatCompletionMessageContentAudioURL | None = None
 
 
 class ChatCompletionMessage(BaseModel):