Skip to content

Commit 38f59c8

Browse files
committed
[timecode] Finalize VFR support #168
1 parent 11381c5 commit 38f59c8

10 files changed

Lines changed: 332 additions & 140 deletions

File tree

scenedetect/_cli/commands.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,12 +405,13 @@ def _save_xml_fcp(
405405
ElementTree.SubElement(sequence, "duration").text = f"{duration.frame_num}"
406406

407407
rate = ElementTree.SubElement(sequence, "rate")
408-
ElementTree.SubElement(rate, "timebase").text = str(context.video_stream.frame_rate)
408+
fps = float(context.video_stream.frame_rate)
409+
ElementTree.SubElement(rate, "timebase").text = str(round(fps))
409410
ElementTree.SubElement(rate, "ntsc").text = "False"
410411

411412
timecode = ElementTree.SubElement(sequence, "timecode")
412413
tc_rate = ElementTree.SubElement(timecode, "rate")
413-
ElementTree.SubElement(tc_rate, "timebase").text = str(context.video_stream.frame_rate)
414+
ElementTree.SubElement(tc_rate, "timebase").text = str(round(fps))
414415
ElementTree.SubElement(tc_rate, "ntsc").text = "False"
415416
ElementTree.SubElement(timecode, "frame").text = "0"
416417
ElementTree.SubElement(timecode, "displayformat").text = "NDF"
@@ -427,7 +428,7 @@ def _save_xml_fcp(
427428
ElementTree.SubElement(clip, "name").text = f"Shot {i + 1}"
428429
ElementTree.SubElement(clip, "enabled").text = "TRUE"
429430
ElementTree.SubElement(clip, "rate").append(
430-
ElementTree.fromstring(f"<timebase>{context.video_stream.frame_rate}</timebase>")
431+
ElementTree.fromstring(f"<timebase>{round(fps)}</timebase>")
431432
)
432433
# TODO: Are these supposed to be frame numbers or another format?
433434
ElementTree.SubElement(clip, "start").text = str(start.frame_num)
@@ -501,7 +502,7 @@ def save_otio(
501502
video_name = context.video_stream.name
502503
video_path = os.path.abspath(context.video_stream.path)
503504
video_base_name = os.path.basename(context.video_stream.path)
504-
frame_rate = context.video_stream.frame_rate
505+
frame_rate = float(context.video_stream.frame_rate)
505506

506507
# List of track mapping to resource type.
507508
# TODO(https://scenedetect.com/issues/497): Allow OTIO export without an audio track.

scenedetect/backends/moviepy.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717
"""
1818

1919
import typing as ty
20+
from fractions import Fraction
2021
from logging import getLogger
2122

2223
import cv2
2324
import numpy as np
2425
from moviepy.video.io.ffmpeg_reader import FFMPEG_VideoReader
2526

2627
from scenedetect.backends.opencv import VideoStreamCv2
27-
from scenedetect.common import _USE_PTS_IN_DEVELOPMENT, FrameTimecode
28+
from scenedetect.common import FrameTimecode, Timecode, framerate_to_fraction
2829
from scenedetect.platform import get_file_name
2930
from scenedetect.video_stream import SeekError, VideoOpenFailure, VideoStream
3031

@@ -83,9 +84,9 @@ def __init__(
8384
"""Unique name used to identify this backend."""
8485

8586
@property
86-
def frame_rate(self) -> float:
87-
"""Framerate in frames/sec."""
88-
return self._reader.fps
87+
def frame_rate(self) -> Fraction:
88+
"""Framerate in frames/sec as a rational Fraction."""
89+
return framerate_to_fraction(self._reader.fps)
8990

9091
@property
9192
def path(self) -> ty.Union[bytes, str]:
@@ -135,7 +136,14 @@ def position(self) -> FrameTimecode:
135136
calling `read`. This will always return 0 (e.g. be equal to `base_timecode`) if no frames
136137
have been `read` yet."""
137138
frame_number = max(self._frame_number - 1, 0)
138-
return FrameTimecode(frame_number, self.frame_rate)
139+
# Synthesize a Timecode from the frame count and rational framerate.
140+
# MoviePy assumes CFR, so this is equivalent to frame-based timing.
141+
# Use the framerate denominator as the time_base denominator for exact timing.
142+
fps = self.frame_rate
143+
time_base = Fraction(1, fps.numerator)
144+
pts = frame_number * fps.denominator
145+
timecode = Timecode(pts=pts, time_base=time_base)
146+
return FrameTimecode(timecode=timecode, fps=fps)
139147

140148
@property
141149
def position_ms(self) -> float:
@@ -173,10 +181,6 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
173181
ValueError: `target` is not a valid value (i.e. it is negative).
174182
"""
175183
success = False
176-
if _USE_PTS_IN_DEVELOPMENT:
177-
# TODO(https://scenedetect.com/issue/168): Need to handle PTS here.
178-
raise NotImplementedError()
179-
180184
if not isinstance(target, FrameTimecode):
181185
target = FrameTimecode(target, self.frame_rate)
182186
try:

scenedetect/backends/opencv.py

Lines changed: 30 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import cv2
2828
import numpy as np
2929

30-
from scenedetect.common import _USE_PTS_IN_DEVELOPMENT, MAX_FPS_DELTA, FrameTimecode, Timecode
30+
from scenedetect.common import MAX_FPS_DELTA, FrameTimecode, Timecode, framerate_to_fraction
3131
from scenedetect.platform import get_file_name
3232
from scenedetect.video_stream import (
3333
FrameRateUnavailable,
@@ -111,7 +111,7 @@ def __init__(
111111
self._cap: ty.Optional[cv2.VideoCapture] = (
112112
None # Reference to underlying cv2.VideoCapture object.
113113
)
114-
self._frame_rate: ty.Optional[float] = None
114+
self._frame_rate: ty.Optional[Fraction] = None
115115

116116
# VideoCapture state
117117
self._has_grabbed = False
@@ -144,7 +144,7 @@ def capture(self) -> cv2.VideoCapture:
144144
"""Unique name used to identify this backend."""
145145

146146
@property
147-
def frame_rate(self) -> float:
147+
def frame_rate(self) -> Fraction:
148148
assert self._frame_rate
149149
return self._frame_rate
150150

@@ -196,30 +196,25 @@ def aspect_ratio(self) -> float:
196196

197197
@property
198198
def timecode(self) -> Timecode:
199-
"""Current position within stream as a Timecode. This is not frame accurate."""
199+
"""Current position within stream as a Timecode."""
200200
# *NOTE*: Although OpenCV has `CAP_PROP_PTS`, it doesn't seem to be reliable. For now, we
201-
# use `CAP_PROP_POS_MSEC` instead, with a time base of 1/1000. Unfortunately this means that
202-
# rounding errors will affect frame accuracy with this backend.
203-
pts = self._cap.get(cv2.CAP_PROP_POS_MSEC)
204-
time_base = Fraction(1, 1000)
205-
return Timecode(pts=round(pts), time_base=time_base)
201+
# use `CAP_PROP_POS_MSEC` instead, converting to microseconds for sufficient precision to
202+
# avoid frame-boundary rounding errors at common framerates like 24000/1001.
203+
ms = self._cap.get(cv2.CAP_PROP_POS_MSEC)
204+
time_base = Fraction(1, 1000000)
205+
return Timecode(pts=round(ms * 1000), time_base=time_base)
206206

207207
@property
208208
def position(self) -> FrameTimecode:
209-
# TODO(https://scenedetect.com/issue/168): See if there is a better way to do this, or
210-
# add a config option before landing this.
211-
if _USE_PTS_IN_DEVELOPMENT:
212-
timecode = self.timecode
213-
# If PTS is 0 but we've read frames, derive from frame number.
214-
# This handles image sequences and cases where CAP_PROP_POS_MSEC is unreliable.
215-
if timecode.pts == 0 and self.frame_number > 0:
216-
time_sec = (self.frame_number - 1) / self.frame_rate
217-
pts = round(time_sec * 1000)
218-
timecode = Timecode(pts=pts, time_base=Fraction(1, 1000))
219-
return FrameTimecode(timecode=timecode, fps=self.frame_rate)
220-
if self.frame_number < 1:
221-
return self.base_timecode
222-
return self.base_timecode + (self.frame_number - 1)
209+
timecode = self.timecode
210+
# If PTS is 0 but we've read frames, derive from frame number.
211+
# This handles image sequences and cases where CAP_PROP_POS_MSEC is unreliable.
212+
if timecode.pts == 0 and self.frame_number > 0:
213+
fps = self.frame_rate
214+
time_base = Fraction(1, fps.numerator)
215+
pts = (self.frame_number - 1) * fps.denominator
216+
timecode = Timecode(pts=pts, time_base=time_base)
217+
return FrameTimecode(timecode=timecode, fps=self.frame_rate)
223218

224219
@property
225220
def position_ms(self) -> float:
@@ -235,8 +230,9 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
235230
if target < 0:
236231
raise ValueError("Target seek position cannot be negative!")
237232

238-
# TODO(https://scenedetect.com/issue/168): Shouldn't use frames for VFR video here.
239-
# Have to seek one behind and call grab() after to that the VideoCapture
233+
# Seeking is done via frame number since OpenCV doesn't support PTS-based seeking.
234+
# After seeking, position returns actual PTS from CAP_PROP_POS_MSEC.
235+
# Have to seek one behind and call grab() after so that the VideoCapture
240236
# returns a valid timestamp when using CAP_PROP_POS_MSEC.
241237
target_frame_cv2 = (self.base_timecode + target).frame_num
242238
if target_frame_cv2 > 0:
@@ -329,14 +325,11 @@ def _open_capture(self, framerate: ty.Optional[float] = None):
329325
raise FrameRateUnavailable()
330326

331327
self._cap = cap
332-
self._frame_rate = framerate
328+
self._frame_rate = framerate_to_fraction(framerate)
333329
self._has_grabbed = False
334330
cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 1.0) # https://github.com/opencv/opencv/issues/26795
335331

336332

337-
# TODO(https://scenedetect.com/issues/168): Support non-monotonic timing for `position`. VFR timecode
338-
# support is a prerequisite for this. Timecodes are currently calculated by multiplying the
339-
# framerate by number of frames. Actual elapsed time can be obtained via `position_ms` for now.
340333
class VideoCaptureAdapter(VideoStream):
341334
"""Adapter for existing VideoCapture objects. Unlike VideoStreamCv2, this class supports
342335
VideoCaptures which may not support seeking.
@@ -378,7 +371,7 @@ def __init__(
378371
raise FrameRateUnavailable()
379372

380373
self._cap = cap
381-
self._frame_rate: float = framerate
374+
self._frame_rate: Fraction = framerate_to_fraction(framerate)
382375
self._num_frames = 0
383376
self._max_read_attempts = max_read_attempts
384377
self._decode_failures = 0
@@ -408,7 +401,7 @@ def capture(self) -> cv2.VideoCapture:
408401
"""Unique name used to identify this backend."""
409402

410403
@property
411-
def frame_rate(self) -> float:
404+
def frame_rate(self) -> Fraction:
412405
"""Framerate in frames/sec."""
413406
assert self._frame_rate
414407
return self._frame_rate
@@ -439,8 +432,6 @@ def frame_size(self) -> ty.Tuple[int, int]:
439432
@property
440433
def duration(self) -> ty.Optional[FrameTimecode]:
441434
"""Duration of the stream as a FrameTimecode, or None if non terminating."""
442-
# TODO(https://scenedetect.com/issue/168): This will be incorrect for VFR. See if there is
443-
# another property we can use to estimate the video length correctly.
444435
frame_count = math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_COUNT))
445436
if frame_count > 0:
446437
return self.base_timecode + frame_count
@@ -455,7 +446,12 @@ def aspect_ratio(self) -> float:
455446
def position(self) -> FrameTimecode:
456447
if self.frame_number < 1:
457448
return self.base_timecode
458-
return self.base_timecode + (self.frame_number - 1)
449+
# Synthesize a Timecode from frame count and rational framerate.
450+
fps = self.frame_rate
451+
time_base = Fraction(1, fps.numerator)
452+
pts = (self.frame_number - 1) * fps.denominator
453+
timecode = Timecode(pts=pts, time_base=time_base)
454+
return FrameTimecode(timecode=timecode, fps=fps)
459455

460456
@property
461457
def position_ms(self) -> float:

scenedetect/backends/pyav.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import av
1919
import numpy as np
2020

21-
from scenedetect.common import _USE_PTS_IN_DEVELOPMENT, MAX_FPS_DELTA, FrameTimecode, Timecode
21+
from scenedetect.common import MAX_FPS_DELTA, FrameTimecode, Timecode
2222
from scenedetect.platform import get_file_name
2323
from scenedetect.video_stream import FrameRateUnavailable, VideoOpenFailure, VideoStream
2424

@@ -172,8 +172,8 @@ def duration(self) -> FrameTimecode:
172172
return self.base_timecode + self._duration_frames
173173

174174
@property
175-
def frame_rate(self) -> float:
176-
"""Frame rate in frames/sec."""
175+
def frame_rate(self) -> Fraction:
176+
"""Frame rate in frames/sec as a rational Fraction."""
177177
return self._frame_rate
178178

179179
@property
@@ -184,10 +184,8 @@ def position(self) -> FrameTimecode:
184184
to the presentation time 0. Returns 0 even if `frame_number` is 1."""
185185
if self._frame is None:
186186
return self.base_timecode
187-
if _USE_PTS_IN_DEVELOPMENT:
188-
timecode = Timecode(pts=self._frame.pts, time_base=self._frame.time_base)
189-
return FrameTimecode(timecode=timecode, fps=self.frame_rate)
190-
return FrameTimecode(round(self._frame.time * self.frame_rate), self.frame_rate)
187+
timecode = Timecode(pts=self._frame.pts, time_base=self._frame.time_base)
188+
return FrameTimecode(timecode=timecode, fps=self.frame_rate)
191189

192190
@property
193191
def position_ms(self) -> float:
@@ -204,10 +202,8 @@ def frame_number(self) -> int:
204202
Will return 0 until the first frame is `read`."""
205203

206204
if self._frame:
207-
if _USE_PTS_IN_DEVELOPMENT:
208-
# frame_number is 1-indexed, so add 1 to the 0-based frame position.
209-
return round(self._frame.time * self.frame_rate) + 1
210-
return self.position.frame_num + 1
205+
# frame_number is 1-indexed, so add 1 to the 0-based frame position.
206+
return round(self._frame.time * self.frame_rate) + 1
211207
return 0
212208

213209
@property
@@ -258,7 +254,6 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]) -> None:
258254
raise ValueError("Target cannot be negative!")
259255
beginning = target == 0
260256

261-
# TODO(https://scenedetect.com/issues/168): This breaks with PTS mode enabled.
262257
target = self.base_timecode + target
263258
if target >= 1:
264259
target = target - 1

0 commit comments

Comments
 (0)