Add default implementation for generated_image_to_input

jiwon-oai · jiwon-oai · commit 876c31bbff88 · 2025-12-18T11:36:40.000-08:00
diff --git a/chatkit/agents.py b/chatkit/agents.py
@@ -25,6 +25,7 @@
     EasyInputMessageParam,
     ResponseFunctionToolCallParam,
     ResponseInputContentParam,
+    ResponseInputImageParam,
     ResponseInputMessageContentListParam,
     ResponseInputTextParam,
     ResponseOutputText,
@@ -377,7 +378,7 @@ class ResponseStreamConverter:
     assigned a progress value of 0.
     """
 
-    def __init__(self, partial_images: int | None = None):
+    def __init__(self, *, partial_images: int | None = None):
         """
         Args:
             partial_images: The expected number of partial image updates for image
@@ -419,10 +420,10 @@ def partial_image_index_to_progress(self, partial_image_index: int) -> float:
             A float between 0 and 1 representing progress for the image
             generation result.
         """
-        if self.partial_images is None:
-            return 0
+        if self.partial_images is None or self.partial_images <= 0:
+            return 0.0
 
-        return partial_image_index / self.partial_images
+        return min(1.0, partial_image_index / self.partial_images)
 
 
 _DEFAULT_RESPONSE_STREAM_CONVERTER = ResponseStreamConverter()
@@ -708,7 +709,10 @@ def end_workflow(item: WorkflowItem):
                     if not ctx.generated_image_item:
                         continue
 
-                    url = await converter.base64_image_to_url(item.id, item.result)
+                    url = await converter.base64_image_to_url(
+                        image_id=item.id,
+                        base64_image=item.result,
+                    )
                     image = GeneratedImage(id=item.id, url=url)
 
                     ctx.generated_image_item.image = image
@@ -720,9 +724,9 @@ def end_workflow(item: WorkflowItem):
                     continue
 
                 url = await converter.base64_image_to_url(
-                    event.item_id,
-                    event.partial_image_b64,
-                    event.partial_image_index,
+                    image_id=event.item_id,
+                    base64_image=event.partial_image_b64,
+                    partial_image_index=event.partial_image_index,
                 )
                 progress = converter.partial_image_index_to_progress(
                     event.partial_image_index
@@ -833,10 +837,27 @@ async def generated_image_to_input(
     ) -> TResponseInputItem | list[TResponseInputItem] | None:
         """
         Convert a GeneratedImageItem into input item(s) to send to the model.
-        Required when generated images are enabled.
+        Override this method to customize the conversion of generated images, such as when your
+        generated image url is not publicly reachable.
         """
-        raise NotImplementedError(
-            "A GeneratedImageItem was included in a UserMessageItem but Converter.generated_image_to_message_content was not implemented"
+        if not item.image:
+            return None
+
+        return Message(
+            type="message",
+            content=[
+                ResponseInputTextParam(
+                    type="input_text",
+                    text="The following image was generated by the agent.",
+                ),
+                ResponseInputImageParam(
+                    type="input_image",
+                    detail="auto",
+                    file_id=item.image.id,
+                    image_url=item.image.url,
+                ),
+            ],
+            role="user",
         )
 
     async def hidden_context_to_input(
diff --git a/tests/test_agents.py b/tests/test_agents.py
@@ -548,6 +548,45 @@ async def test_input_item_converter_user_input_with_tags_throws_by_default():
         await simple_to_agent_input(items)
 
 
+async def test_input_item_converter_generated_image_item():
+    items = [
+        GeneratedImageItem(
+            id="img_item_1",
+            thread_id=thread.id,
+            created_at=datetime.now(),
+            image=GeneratedImage(id="img_1", url="https://example.com/img.png"),
+        )
+    ]
+
+    input_items = await simple_to_agent_input(items)
+    assert len(input_items) == 1
+
+    message = cast(dict, input_items[0])
+    assert message.get("type") == "message"
+    assert message.get("role") == "user"
+
+    content = cast(list, message.get("content"))
+    assert content[0].get("type") == "input_text"
+    assert content[0].get("text") == "The following image was generated by the agent."
+    assert content[1].get("type") == "input_image"
+    assert content[1].get("file_id") == "img_1"
+    assert content[1].get("image_url") == "https://example.com/img.png"
+    assert content[1].get("detail") == "auto"
+
+
+async def test_input_item_converter_generated_image_item_without_image():
+    items = [
+        GeneratedImageItem(
+            id="img_item_1",
+            thread_id=thread.id,
+            created_at=datetime.now(),
+        )
+    ]
+
+    input_items = await simple_to_agent_input(items)
+    assert input_items == []
+
+
 async def test_input_item_converter_for_hidden_context_with_string_content():
     items = [
         HiddenContextItem(