Skip to content

Commit ca69910

Browse files
skeskinenWyattBlue
authored andcommitted
Writable and copyable attachment and data streams
1 parent 61e9fc6 commit ca69910

File tree

5 files changed

+215
-39
lines changed

5 files changed

+215
-39
lines changed

av/container/output.py

Lines changed: 87 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def add_stream_from_template(
134134
self, template: Stream, opaque: bool | None = None, **kwargs
135135
):
136136
"""
137-
Creates a new stream from a template. Supports video, audio, and subtitle streams.
137+
Creates a new stream from a template. Supports video, audio, subtitle, data and attachment streams.
138138
139139
:param template: Copy codec from another :class:`~av.stream.Stream` instance.
140140
:param opaque: If True, copy opaque data from the template's codec context.
@@ -145,9 +145,7 @@ def add_stream_from_template(
145145
opaque = template.type != "video"
146146

147147
if template.codec_context is None:
148-
raise ValueError(
149-
f"template stream of type {template.type} has no codec context"
150-
)
148+
return self._add_stream_without_codec_from_template(template, **kwargs)
151149

152150
codec_obj: Codec
153151
if opaque: # Copy ctx from template.
@@ -196,6 +194,79 @@ def add_stream_from_template(
196194

197195
return py_stream
198196

197+
def _add_stream_without_codec_from_template(
198+
self, template: Stream, **kwargs
199+
) -> Stream:
200+
codec_type: cython.int = template.ptr.codecpar.codec_type
201+
if codec_type not in {lib.AVMEDIA_TYPE_ATTACHMENT, lib.AVMEDIA_TYPE_DATA}:
202+
raise ValueError(
203+
f"template stream of type {template.type} has no codec context"
204+
)
205+
206+
stream: cython.pointer[lib.AVStream] = lib.avformat_new_stream(
207+
self.ptr, cython.NULL
208+
)
209+
if stream == cython.NULL:
210+
raise MemoryError("Could not allocate stream")
211+
212+
err_check(lib.avcodec_parameters_copy(stream.codecpar, template.ptr.codecpar))
213+
214+
# Mirror basic properties that are not derived from a codec context.
215+
stream.time_base = template.ptr.time_base
216+
stream.start_time = template.ptr.start_time
217+
stream.duration = template.ptr.duration
218+
stream.disposition = template.ptr.disposition
219+
220+
py_stream: Stream = wrap_stream(self, stream, None)
221+
self.streams.add_stream(py_stream)
222+
223+
py_stream.metadata = dict(template.metadata)
224+
225+
for k, v in kwargs.items():
226+
setattr(py_stream, k, v)
227+
228+
return py_stream
229+
230+
def add_attachment(self, name: str, mimetype: str, data: bytes):
231+
"""
232+
Create an attachment stream and embed its payload into the container header.
233+
234+
- Only supported by formats that support attachments (e.g. Matroska).
235+
- No per-packet muxing is required; attachments are written at header time.
236+
"""
237+
# Create stream with no codec (attachments are codec-less).
238+
stream: cython.pointer[lib.AVStream] = lib.avformat_new_stream(
239+
self.ptr, cython.NULL
240+
)
241+
if stream == cython.NULL:
242+
raise MemoryError("Could not allocate stream")
243+
244+
stream.codecpar.codec_type = lib.AVMEDIA_TYPE_ATTACHMENT
245+
stream.codecpar.codec_id = lib.AV_CODEC_ID_NONE
246+
247+
# Allocate and copy payload into codecpar.extradata.
248+
payload_size: cython.size_t = len(data)
249+
if payload_size:
250+
buf = cython.cast(cython.p_uchar, lib.av_malloc(payload_size + 1))
251+
if buf == cython.NULL:
252+
raise MemoryError("Could not allocate attachment data")
253+
# Copy bytes.
254+
for i in range(payload_size):
255+
buf[i] = data[i]
256+
buf[payload_size] = 0
257+
stream.codecpar.extradata = cython.cast(cython.p_uchar, buf)
258+
stream.codecpar.extradata_size = payload_size
259+
260+
# Wrap as user-land stream.
261+
meta_ptr = cython.address(stream.metadata)
262+
err_check(lib.av_dict_set(meta_ptr, b"filename", name.encode(), 0))
263+
mime_bytes = mimetype.encode()
264+
err_check(lib.av_dict_set(meta_ptr, b"mimetype", mime_bytes, 0))
265+
266+
py_stream: Stream = wrap_stream(self, stream, None)
267+
self.streams.add_stream(py_stream)
268+
return py_stream
269+
199270
def add_data_stream(self, codec_name=None, options: dict | None = None):
200271
"""add_data_stream(codec_name=None)
201272
@@ -270,21 +341,20 @@ def start_encoding(self):
270341
# Finalize and open all streams.
271342
for stream in self.streams:
272343
ctx = stream.codec_context
273-
# Skip codec context handling for data streams without codecs
344+
# Skip codec context handling for streams without codecs (e.g. data/attachments).
274345
if ctx is None:
275-
if stream.type != "data":
346+
if stream.type not in {"data", "attachment"}:
276347
raise ValueError(f"Stream {stream.index} has no codec context")
277-
continue
278-
279-
if not ctx.is_open:
280-
for k, v in self.options.items():
281-
ctx.options.setdefault(k, v)
282-
ctx.open()
283-
284-
# Track option consumption.
285-
for k in self.options:
286-
if k not in ctx.options:
287-
used_options.add(k)
348+
else:
349+
if not ctx.is_open:
350+
for k, v in self.options.items():
351+
ctx.options.setdefault(k, v)
352+
ctx.open()
353+
354+
# Track option consumption.
355+
for k in self.options:
356+
if k not in ctx.options:
357+
used_options.add(k)
288358

289359
stream._finalize_for_output()
290360

av/container/output.pyi

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@ from typing import Sequence, TypeVar, Union, overload
44
from av.audio import _AudioCodecName
55
from av.audio.stream import AudioStream
66
from av.packet import Packet
7-
from av.stream import DataStream
7+
from av.stream import AttachmentStream, DataStream, Stream
88
from av.subtitles.stream import SubtitleStream
99
from av.video import _VideoCodecName
1010
from av.video.stream import VideoStream
1111

1212
from .core import Container
1313

14-
_StreamT = TypeVar("_StreamT", bound=Union[VideoStream, AudioStream, SubtitleStream])
14+
_StreamT = TypeVar("_StreamT", bound=Stream)
1515

1616
class OutputContainer(Container):
1717
def __enter__(self) -> OutputContainer: ...
@@ -42,6 +42,9 @@ class OutputContainer(Container):
4242
def add_stream_from_template(
4343
self, template: _StreamT, opaque: bool | None = None, **kwargs
4444
) -> _StreamT: ...
45+
def add_attachment(
46+
self, name: str, mimetype: str, data: bytes
47+
) -> AttachmentStream: ...
4548
def add_data_stream(
4649
self, codec_name: str | None = None, options: dict[str, str] | None = None
4750
) -> DataStream: ...

av/stream.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ def _finalize_for_output(self):
148148
errors=self.container.metadata_errors,
149149
)
150150

151+
if self.codec_context is None:
152+
return
153+
151154
if not self.ptr.time_base.num:
152155
self.ptr.time_base = self.codec_context.ptr.time_base
153156

@@ -316,3 +319,17 @@ def mimetype(self):
316319
:rtype: str | None
317320
"""
318321
return self.metadata.get("mimetype")
322+
323+
@property
324+
def data(self):
325+
"""Return the raw attachment payload as bytes."""
326+
extradata: cython.p_uchar = self.ptr.codecpar.extradata
327+
size: cython.Py_ssize_t = self.ptr.codecpar.extradata_size
328+
if extradata == cython.NULL or size <= 0:
329+
return b""
330+
331+
payload = bytearray(size)
332+
for i in range(size):
333+
payload[i] = extradata[i]
334+
335+
return bytes(payload)

av/stream.pyi

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,5 @@ class AttachmentStream(Stream):
5959
type: Literal["attachment"]
6060
@property
6161
def mimetype(self) -> str | None: ...
62+
@property
63+
def data(self) -> bytes: ...

tests/test_streams.py

Lines changed: 104 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import os
22
from fractions import Fraction
3-
from typing import Any, cast
43

54
import pytest
65

76
import av
7+
import av.datasets
88

99
from .common import fate_suite
1010

@@ -13,7 +13,14 @@ class TestStreams:
1313
@pytest.fixture(autouse=True)
1414
def cleanup(self):
1515
yield
16-
for file in ("data.ts", "out.mkv"):
16+
for file in (
17+
"data.ts",
18+
"data_source.ts",
19+
"data_copy.ts",
20+
"out.mkv",
21+
"video_with_attachment.mkv",
22+
"remuxed_attachment.mkv",
23+
):
1724
if os.path.exists(file):
1825
os.remove(file)
1926

@@ -149,21 +156,98 @@ def test_data_stream(self) -> None:
149156
container.close()
150157

151158
def test_data_stream_from_template(self) -> None:
152-
"""Test that adding a data stream from a template raises ValueError."""
153-
154-
# Open an existing container with a data stream
155-
input_container = av.open(fate_suite("mxf/track_01_v02.mxf"))
156-
input_data_stream = input_container.streams.data[0]
157-
158-
# Create a new container and ensure using a data stream as a template raises ValueError
159-
output_container = av.open("out.mkv", "w")
160-
with pytest.raises(ValueError):
161-
# input_data_stream is a DataStream at runtime; the test asserts that
162-
# using it as a template raises ValueError. The static type stubs
163-
# intentionally restrict which Stream subclasses are valid templates,
164-
# so cast to Any here to keep the runtime check while satisfying
165-
# the type checker.
166-
output_container.add_stream_from_template(cast(Any, input_data_stream))
167-
168-
input_container.close()
169-
output_container.close()
159+
source_path = "data_source.ts"
160+
payloads = [b"payload-a", b"payload-b", b"payload-c"]
161+
162+
with av.open(source_path, "w") as source:
163+
source_stream = source.add_data_stream()
164+
for i, payload in enumerate(payloads):
165+
packet = av.Packet(payload)
166+
packet.pts = i
167+
packet.stream = source_stream
168+
source.mux(packet)
169+
170+
copied_payloads: list[bytes] = []
171+
172+
with av.open(source_path) as input_container:
173+
input_data_stream = input_container.streams.data[0]
174+
175+
with av.open("data_copy.ts", "w") as output_container:
176+
output_data_stream = output_container.add_stream_from_template(
177+
input_data_stream
178+
)
179+
180+
for packet in input_container.demux(input_data_stream):
181+
payload = bytes(packet)
182+
if not payload:
183+
continue
184+
copied_payloads.append(payload)
185+
clone = av.Packet(payload)
186+
clone.pts = packet.pts
187+
clone.dts = packet.dts
188+
clone.time_base = packet.time_base
189+
clone.stream = output_data_stream
190+
output_container.mux(clone)
191+
192+
with av.open("data_copy.ts") as remuxed:
193+
output_stream = remuxed.streams.data[0]
194+
assert output_stream.codec_context is None
195+
196+
remuxed_payloads: list[bytes] = []
197+
for packet in remuxed.demux(output_stream):
198+
payload = bytes(packet)
199+
if payload:
200+
remuxed_payloads.append(payload)
201+
202+
assert remuxed_payloads == copied_payloads
203+
204+
def test_attachment_stream(self) -> None:
205+
input_path = av.datasets.curated(
206+
"pexels/time-lapse-video-of-night-sky-857195.mp4"
207+
)
208+
input_ = av.open(input_path)
209+
out1_path = "video_with_attachment.mkv"
210+
211+
with av.open(out1_path, "w") as out1:
212+
out1.add_attachment(
213+
name="attachment.txt", mimetype="text/plain", data=b"hello\n"
214+
)
215+
216+
in_v = input_.streams.video[0]
217+
out_v = out1.add_stream_from_template(in_v)
218+
219+
for packet in input_.demux(in_v):
220+
if packet.dts is None:
221+
continue
222+
packet.stream = out_v
223+
out1.mux(packet)
224+
225+
input_.close()
226+
227+
with av.open(out1_path) as c:
228+
attachments = c.streams.attachments
229+
assert len(attachments) == 1
230+
att = attachments[0]
231+
assert att.name == "attachment.txt"
232+
assert att.mimetype == "text/plain"
233+
assert att.data == b"hello\n"
234+
235+
out2_path = "remuxed_attachment.mkv"
236+
with av.open(out1_path) as ic, av.open(out2_path, "w") as oc:
237+
stream_map = {}
238+
for s in ic.streams:
239+
stream_map[s.index] = oc.add_stream_from_template(s)
240+
241+
for packet in ic.demux(ic.streams.video):
242+
if packet.dts is None:
243+
continue
244+
packet.stream = stream_map[packet.stream.index]
245+
oc.mux(packet)
246+
247+
with av.open(out2_path) as c:
248+
attachments = c.streams.attachments
249+
assert len(attachments) == 1
250+
att = attachments[0]
251+
assert att.name == "attachment.txt"
252+
assert att.mimetype == "text/plain"
253+
assert att.data == b"hello\n"

0 commit comments

Comments
 (0)