2727import cv2
2828import numpy as np
2929
30- from scenedetect .common import _USE_PTS_IN_DEVELOPMENT , MAX_FPS_DELTA , FrameTimecode , Timecode
30+ from scenedetect .common import MAX_FPS_DELTA , FrameTimecode , Timecode , framerate_to_fraction
3131from scenedetect .platform import get_file_name
3232from scenedetect .video_stream import (
3333 FrameRateUnavailable ,
@@ -111,7 +111,7 @@ def __init__(
111111 self ._cap : ty .Optional [cv2 .VideoCapture ] = (
112112 None # Reference to underlying cv2.VideoCapture object.
113113 )
114- self ._frame_rate : ty .Optional [float ] = None
114+ self ._frame_rate : ty .Optional [Fraction ] = None
115115
116116 # VideoCapture state
117117 self ._has_grabbed = False
@@ -144,7 +144,7 @@ def capture(self) -> cv2.VideoCapture:
144144 """Unique name used to identify this backend."""
145145
146146 @property
147- def frame_rate (self ) -> float :
147+ def frame_rate (self ) -> Fraction :
148148 assert self ._frame_rate
149149 return self ._frame_rate
150150
@@ -196,30 +196,25 @@ def aspect_ratio(self) -> float:
196196
197197 @property
198198 def timecode (self ) -> Timecode :
199- """Current position within stream as a Timecode. This is not frame accurate. """
199+ """Current position within stream as a Timecode."""
200200 # *NOTE*: Although OpenCV has `CAP_PROP_PTS`, it doesn't seem to be reliable. For now, we
201- # use `CAP_PROP_POS_MSEC` instead, with a time base of 1/1000. Unfortunately this means that
202- # rounding errors will affect frame accuracy with this backend .
203- pts = self ._cap .get (cv2 .CAP_PROP_POS_MSEC )
204- time_base = Fraction (1 , 1000 )
205- return Timecode (pts = round (pts ), time_base = time_base )
201+ # use `CAP_PROP_POS_MSEC` instead, converting to microseconds for sufficient precision to
202+ # avoid frame-boundary rounding errors at common framerates like 24000/1001 .
203+ ms = self ._cap .get (cv2 .CAP_PROP_POS_MSEC )
204+ time_base = Fraction (1 , 1000000 )
205+ return Timecode (pts = round (ms * 1000 ), time_base = time_base )
206206
207207 @property
208208 def position (self ) -> FrameTimecode :
209- # TODO(https://scenedetect.com/issue/168): See if there is a better way to do this, or
210- # add a config option before landing this.
211- if _USE_PTS_IN_DEVELOPMENT :
212- timecode = self .timecode
213- # If PTS is 0 but we've read frames, derive from frame number.
214- # This handles image sequences and cases where CAP_PROP_POS_MSEC is unreliable.
215- if timecode .pts == 0 and self .frame_number > 0 :
216- time_sec = (self .frame_number - 1 ) / self .frame_rate
217- pts = round (time_sec * 1000 )
218- timecode = Timecode (pts = pts , time_base = Fraction (1 , 1000 ))
219- return FrameTimecode (timecode = timecode , fps = self .frame_rate )
220- if self .frame_number < 1 :
221- return self .base_timecode
222- return self .base_timecode + (self .frame_number - 1 )
209+ timecode = self .timecode
210+ # If PTS is 0 but we've read frames, derive from frame number.
211+ # This handles image sequences and cases where CAP_PROP_POS_MSEC is unreliable.
212+ if timecode .pts == 0 and self .frame_number > 0 :
213+ fps = self .frame_rate
214+ time_base = Fraction (1 , fps .numerator )
215+ pts = (self .frame_number - 1 ) * fps .denominator
216+ timecode = Timecode (pts = pts , time_base = time_base )
217+ return FrameTimecode (timecode = timecode , fps = self .frame_rate )
223218
224219 @property
225220 def position_ms (self ) -> float :
@@ -235,8 +230,9 @@ def seek(self, target: ty.Union[FrameTimecode, float, int]):
235230 if target < 0 :
236231 raise ValueError ("Target seek position cannot be negative!" )
237232
238- # TODO(https://scenedetect.com/issue/168): Shouldn't use frames for VFR video here.
239- # Have to seek one behind and call grab() after to that the VideoCapture
233+ # Seeking is done via frame number since OpenCV doesn't support PTS-based seeking.
234+ # After seeking, position returns actual PTS from CAP_PROP_POS_MSEC.
235+ # Have to seek one behind and call grab() after so that the VideoCapture
240236 # returns a valid timestamp when using CAP_PROP_POS_MSEC.
241237 target_frame_cv2 = (self .base_timecode + target ).frame_num
242238 if target_frame_cv2 > 0 :
@@ -329,14 +325,11 @@ def _open_capture(self, framerate: ty.Optional[float] = None):
329325 raise FrameRateUnavailable ()
330326
331327 self ._cap = cap
332- self ._frame_rate = framerate
328+ self ._frame_rate = framerate_to_fraction ( framerate )
333329 self ._has_grabbed = False
334330 cap .set (cv2 .CAP_PROP_ORIENTATION_AUTO , 1.0 ) # https://github.com/opencv/opencv/issues/26795
335331
336332
337- # TODO(https://scenedetect.com/issues/168): Support non-monotonic timing for `position`. VFR timecode
338- # support is a prerequisite for this. Timecodes are currently calculated by multiplying the
339- # framerate by number of frames. Actual elapsed time can be obtained via `position_ms` for now.
340333class VideoCaptureAdapter (VideoStream ):
341334 """Adapter for existing VideoCapture objects. Unlike VideoStreamCv2, this class supports
342335 VideoCaptures which may not support seeking.
@@ -378,7 +371,7 @@ def __init__(
378371 raise FrameRateUnavailable ()
379372
380373 self ._cap = cap
381- self ._frame_rate : float = framerate
374+ self ._frame_rate : Fraction = framerate_to_fraction ( framerate )
382375 self ._num_frames = 0
383376 self ._max_read_attempts = max_read_attempts
384377 self ._decode_failures = 0
@@ -408,7 +401,7 @@ def capture(self) -> cv2.VideoCapture:
408401 """Unique name used to identify this backend."""
409402
410403 @property
411- def frame_rate (self ) -> float :
404+ def frame_rate (self ) -> Fraction :
412405 """Framerate in frames/sec."""
413406 assert self ._frame_rate
414407 return self ._frame_rate
@@ -439,8 +432,6 @@ def frame_size(self) -> ty.Tuple[int, int]:
439432 @property
440433 def duration (self ) -> ty .Optional [FrameTimecode ]:
441434 """Duration of the stream as a FrameTimecode, or None if non terminating."""
442- # TODO(https://scenedetect.com/issue/168): This will be incorrect for VFR. See if there is
443- # another property we can use to estimate the video length correctly.
444435 frame_count = math .trunc (self ._cap .get (cv2 .CAP_PROP_FRAME_COUNT ))
445436 if frame_count > 0 :
446437 return self .base_timecode + frame_count
@@ -455,7 +446,12 @@ def aspect_ratio(self) -> float:
455446 def position (self ) -> FrameTimecode :
456447 if self .frame_number < 1 :
457448 return self .base_timecode
458- return self .base_timecode + (self .frame_number - 1 )
449+ # Synthesize a Timecode from frame count and rational framerate.
450+ fps = self .frame_rate
451+ time_base = Fraction (1 , fps .numerator )
452+ pts = (self .frame_number - 1 ) * fps .denominator
453+ timecode = Timecode (pts = pts , time_base = time_base )
454+ return FrameTimecode (timecode = timecode , fps = fps )
459455
460456 @property
461457 def position_ms (self ) -> float :
0 commit comments