Skip to content

Commit b45aca4

Browse files
committed
Extract GPS epoch timestamps from RMKN maker note in Ricoh Theta videos
Ricoh Theta cameras write CAMM Type 5 GPS data (lat/lon/alt only, no epoch timestamps). However, the RMKN (Ricoh Maker Note) box in the MP4 udta container includes a GPS IFD with GPSDateStamp and GPSTimeStamp tags—true GPS-derived UTC timestamps recorded at the start of video recording. This change: - Parses the RMKN TIFF/EXIF structure in camm_parser.py to extract the GPS datetime from the GPS IFD (tags 0x001D and 0x0007) - Adds a gps_datetime field to CAMMInfo to carry the extracted timestamp - Enriches CAMM Type 5 points with computed epoch timestamps in CAMMVideoExtractor using the formula: epoch = rmkn_gps_epoch + (point.time - first_point.time) - Converts enriched points to CAMMGPSPoint (Type 6) so downstream consumers receive proper GPS epoch times - Adds unit tests for RMKN parsing (valid, little-endian, missing GPS IFD, truncated, bad magic) and point enrichment logic
1 parent abc0056 commit b45aca4

File tree

3 files changed

+374
-2
lines changed

3 files changed

+374
-2
lines changed

mapillary_tools/camm/camm_parser.py

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88

99
import abc
1010
import dataclasses
11+
import datetime
1112
import io
1213
import logging
14+
import struct
1315
import typing as T
1416
from enum import Enum
1517

@@ -58,16 +60,22 @@ class CAMMInfo:
5860
magn: list[telemetry.MagnetometerData] | None = None
5961
make: str = ""
6062
model: str = ""
63+
# GPS datetime from RMKN (Ricoh Maker Note) EXIF data, if available.
64+
# This is a true GPS-derived UTC timestamp corresponding to the
65+
# first CAMM Type 5 GPS point in the video.
66+
gps_datetime: datetime.datetime | None = None
6167

6268

6369
def extract_camm_info(fp: T.BinaryIO, telemetry_only: bool = False) -> CAMMInfo | None:
6470
moov = MovieBoxParser.parse_stream(fp)
6571

6672
make, model = "", ""
73+
gps_datetime: datetime.datetime | None = None
6774
if not telemetry_only:
6875
udta_boxdata = moov.extract_udta_boxdata()
6976
if udta_boxdata is not None:
7077
make, model = _extract_camera_make_and_model_from_utda_boxdata(udta_boxdata)
78+
gps_datetime = _extract_gps_datetime_from_udta_boxdata(udta_boxdata)
7179

7280
gps_only_construct = _construct_with_selected_camm_types(
7381
[CAMMType.MIN_GPS, CAMMType.GPS]
@@ -121,7 +129,13 @@ def extract_camm_info(fp: T.BinaryIO, telemetry_only: bool = False) -> CAMMInfo
121129
elif isinstance(measurement, telemetry.CAMMGPSPoint):
122130
gps.append(measurement)
123131

124-
return CAMMInfo(mini_gps=mini_gps, gps=gps, make=make, model=model)
132+
return CAMMInfo(
133+
mini_gps=mini_gps,
134+
gps=gps,
135+
make=make,
136+
model=model,
137+
gps_datetime=gps_datetime,
138+
)
125139

126140
return None
127141

@@ -551,6 +565,164 @@ def _parse_quietly(data: bytes, type: bytes) -> bytes:
551565
return parsed["data"]
552566

553567

568+
def _extract_gps_datetime_from_udta_boxdata(
569+
utda_boxdata: dict,
570+
) -> datetime.datetime | None:
571+
"""Extract GPS datetime from the RMKN (Ricoh Maker Note) box in udta."""
572+
for box in utda_boxdata:
573+
if box.type == b"RMKN":
574+
gps_dt = _extract_gps_datetime_from_rmkn(box.data)
575+
if gps_dt is not None:
576+
return gps_dt
577+
return None
578+
579+
580+
def _extract_gps_datetime_from_rmkn(rmkn_data: bytes) -> datetime.datetime | None:
581+
"""Extract GPS datetime from RMKN (Ricoh Maker Note) EXIF data.
582+
583+
The RMKN box contains TIFF/EXIF data with a GPS IFD that includes
584+
GPSDateStamp and GPSTimeStamp tags. These are true GPS-derived UTC
585+
timestamps recorded by the camera at the start of video recording.
586+
587+
Returns a timezone-aware UTC datetime, or None if not available.
588+
"""
589+
if len(rmkn_data) < 8:
590+
return None
591+
592+
# Parse TIFF header
593+
byte_order = rmkn_data[:2]
594+
if byte_order == b"MM":
595+
endian = ">"
596+
elif byte_order == b"II":
597+
endian = "<"
598+
else:
599+
return None
600+
601+
magic = struct.unpack(f"{endian}H", rmkn_data[2:4])[0]
602+
if magic != 42:
603+
return None
604+
605+
ifd0_offset = struct.unpack(f"{endian}I", rmkn_data[4:8])[0]
606+
607+
# Parse IFD0 to find GPS IFD pointer (tag 0x8825)
608+
gps_ifd_offset = _find_ifd_tag_long(rmkn_data, endian, ifd0_offset, 0x8825)
609+
if gps_ifd_offset is None:
610+
return None
611+
612+
# Parse GPS IFD to find GPSDateStamp (0x001D) and GPSTimeStamp (0x0007)
613+
gps_date_str = _read_ifd_ascii_tag(rmkn_data, endian, gps_ifd_offset, 0x001D)
614+
gps_time_rationals = _read_ifd_rational_tag(
615+
rmkn_data, endian, gps_ifd_offset, 0x0007, count=3
616+
)
617+
618+
if gps_date_str is None or gps_time_rationals is None:
619+
return None
620+
621+
try:
622+
# GPSDateStamp is "YYYY:MM:DD"
623+
date_parts = gps_date_str.strip().split(":")
624+
year, month, day = int(date_parts[0]), int(date_parts[1]), int(date_parts[2])
625+
626+
# GPSTimeStamp is 3 RATIONAL values: hours, minutes, seconds
627+
hour = gps_time_rationals[0][0] // gps_time_rationals[0][1]
628+
minute = gps_time_rationals[1][0] // gps_time_rationals[1][1]
629+
sec_num, sec_den = gps_time_rationals[2]
630+
second = sec_num // sec_den
631+
microsecond = ((sec_num % sec_den) * 1_000_000) // sec_den if sec_den > 0 else 0
632+
633+
return datetime.datetime(
634+
year,
635+
month,
636+
day,
637+
hour,
638+
minute,
639+
second,
640+
microsecond,
641+
tzinfo=datetime.timezone.utc,
642+
)
643+
except (ValueError, IndexError, ZeroDivisionError):
644+
return None
645+
646+
647+
def _find_ifd_tag_long(
648+
data: bytes, endian: str, ifd_offset: int, target_tag: int
649+
) -> int | None:
650+
"""Find a LONG (4-byte) value for a specific tag in a TIFF IFD."""
651+
if ifd_offset + 2 > len(data):
652+
return None
653+
num_entries = struct.unpack(f"{endian}H", data[ifd_offset : ifd_offset + 2])[0]
654+
for i in range(num_entries):
655+
entry_offset = ifd_offset + 2 + i * 12
656+
if entry_offset + 12 > len(data):
657+
break
658+
tag = struct.unpack(f"{endian}H", data[entry_offset : entry_offset + 2])[0]
659+
if tag == target_tag:
660+
value = struct.unpack(
661+
f"{endian}I", data[entry_offset + 8 : entry_offset + 12]
662+
)[0]
663+
return value
664+
return None
665+
666+
667+
def _read_ifd_ascii_tag(
668+
data: bytes, endian: str, ifd_offset: int, target_tag: int
669+
) -> str | None:
670+
"""Read an ASCII string tag from a TIFF IFD."""
671+
if ifd_offset + 2 > len(data):
672+
return None
673+
num_entries = struct.unpack(f"{endian}H", data[ifd_offset : ifd_offset + 2])[0]
674+
for i in range(num_entries):
675+
entry_offset = ifd_offset + 2 + i * 12
676+
if entry_offset + 12 > len(data):
677+
break
678+
tag = struct.unpack(f"{endian}H", data[entry_offset : entry_offset + 2])[0]
679+
if tag == target_tag:
680+
count = struct.unpack(
681+
f"{endian}I", data[entry_offset + 4 : entry_offset + 8]
682+
)[0]
683+
if count <= 4:
684+
raw = data[entry_offset + 8 : entry_offset + 8 + count]
685+
else:
686+
offset = struct.unpack(
687+
f"{endian}I", data[entry_offset + 8 : entry_offset + 12]
688+
)[0]
689+
if offset + count > len(data):
690+
return None
691+
raw = data[offset : offset + count]
692+
return raw.rstrip(b"\x00").decode("ascii", errors="replace")
693+
return None
694+
695+
696+
def _read_ifd_rational_tag(
697+
data: bytes, endian: str, ifd_offset: int, target_tag: int, count: int = 1
698+
) -> list[tuple[int, int]] | None:
699+
"""Read RATIONAL values (numerator/denominator pairs) from a TIFF IFD tag."""
700+
if ifd_offset + 2 > len(data):
701+
return None
702+
num_entries = struct.unpack(f"{endian}H", data[ifd_offset : ifd_offset + 2])[0]
703+
for i in range(num_entries):
704+
entry_offset = ifd_offset + 2 + i * 12
705+
if entry_offset + 12 > len(data):
706+
break
707+
tag = struct.unpack(f"{endian}H", data[entry_offset : entry_offset + 2])[0]
708+
if tag == target_tag:
709+
offset = struct.unpack(
710+
f"{endian}I", data[entry_offset + 8 : entry_offset + 12]
711+
)[0]
712+
rationals = []
713+
for j in range(count):
714+
rat_offset = offset + j * 8
715+
if rat_offset + 8 > len(data):
716+
return None
717+
num = struct.unpack(f"{endian}I", data[rat_offset : rat_offset + 4])[0]
718+
den = struct.unpack(
719+
f"{endian}I", data[rat_offset + 4 : rat_offset + 8]
720+
)[0]
721+
rationals.append((num, den))
722+
return rationals
723+
return None
724+
725+
554726
def _extract_camera_make_and_model_from_utda_boxdata(
555727
utda_boxdata: dict,
556728
) -> tuple[str, str]:

mapillary_tools/geotag/video_extractors/native.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
from __future__ import annotations
77

8+
import datetime
9+
import logging
810
import sys
911
import typing as T
1012
from pathlib import Path
@@ -20,6 +22,8 @@
2022
from ...mp4 import construct_mp4_parser, simple_mp4_parser
2123
from .base import BaseVideoExtractor
2224

25+
LOG = logging.getLogger(__name__)
26+
2327

2428
class GoProVideoExtractor(BaseVideoExtractor):
2529
@override
@@ -69,15 +73,75 @@ def extract(self) -> types.VideoMetadata:
6973
if not camm_info.gps and not camm_info.mini_gps:
7074
raise exceptions.MapillaryGPXEmptyError("Empty GPS data found")
7175

76+
points: T.List[geo.Point]
77+
if camm_info.gps:
78+
points = T.cast(T.List[geo.Point], camm_info.gps)
79+
elif camm_info.mini_gps and camm_info.gps_datetime:
80+
# Type 5 points have no epoch timestamps, but the RMKN
81+
# maker note contains a GPS-derived UTC timestamp for the
82+
# first point. Use it to assign epoch times to all points.
83+
points = self._enrich_with_gps_datetime(
84+
camm_info.mini_gps, camm_info.gps_datetime
85+
)
86+
else:
87+
points = camm_info.mini_gps or []
88+
7289
return types.VideoMetadata(
7390
filename=self.video_path,
7491
filesize=utils.get_file_size(self.video_path),
7592
filetype=types.FileType.CAMM,
76-
points=T.cast(T.List[geo.Point], camm_info.gps or camm_info.mini_gps),
93+
points=points,
7794
make=camm_info.make,
7895
model=camm_info.model,
7996
)
8097

98+
@staticmethod
99+
def _enrich_with_gps_datetime(
100+
points: T.List[geo.Point],
101+
gps_datetime: "datetime.datetime",
102+
) -> T.List[geo.Point]:
103+
"""Assign GPS epoch timestamps to Type 5 points using an RMKN reference.
104+
105+
The gps_datetime (from the RMKN maker note) is a GPS-derived UTC
106+
timestamp corresponding to the first CAMM Type 5 GPS point.
107+
Each subsequent point's epoch is computed as:
108+
109+
epoch = gps_epoch + (point.time - first_point.time)
110+
"""
111+
112+
if not points:
113+
return points
114+
115+
gps_epoch = gps_datetime.timestamp()
116+
first_time = points[0].time
117+
118+
LOG.info(
119+
"Enriching %d CAMM Type 5 points with GPS epoch from RMKN timestamp %s",
120+
len(points),
121+
gps_datetime.isoformat(),
122+
)
123+
124+
enriched: T.List[geo.Point] = []
125+
for p in points:
126+
enriched.append(
127+
telemetry.CAMMGPSPoint(
128+
time=p.time,
129+
lat=p.lat,
130+
lon=p.lon,
131+
alt=p.alt,
132+
angle=p.angle,
133+
time_gps_epoch=gps_epoch + (p.time - first_time),
134+
gps_fix_type=3 if p.alt is not None else 2,
135+
horizontal_accuracy=0.0,
136+
vertical_accuracy=0.0,
137+
velocity_east=0.0,
138+
velocity_north=0.0,
139+
velocity_up=0.0,
140+
speed_accuracy=0.0,
141+
)
142+
)
143+
return enriched
144+
81145

82146
class BlackVueVideoExtractor(BaseVideoExtractor):
83147
@override

0 commit comments

Comments
 (0)