This notebook processes videos in a directory where files come in pairs with names like:
running_1.mp4andrunning_2.mp4swimming_1.MOVandswimming_2.MOV
For each pair, it automatically:
Finds all valid
_1/_2pairs in the directory.From the
_1video, creates:*_clip1→ first 60 seconds*_clip2→ a 60-second clip from the middle*_clip3→ last 60 seconds
From the
_2video, creates:*_clip4→ a 60-second clip from the middle*_clip5→ last 60 seconds
All new clips are saved to the same directory as the original videos, keeping the same file extension.
If a video is shorter than 60 seconds, the clip will simply be as long as the video allows.
We will use:
pathlib: to handle file paths in a clean, OS-independent way.re(regular expressions): to detect file name patterns likeprefix_1.extandprefix_2.ext.moviepy.editor: to load videos and write subclips to new files.
# If moviepy is not installed in your environment, run this cell once.
# If it is already installed, you can skip this cell.
!python3 -m pip install moviepy==1.0.3
from pathlib import Path
import re
from moviepy.editor import VideoFileClip, vfx
from moviepy.video.fx.all import crop
from tqdm.auto import tqdm
# Allowed video extensions (lowercase)
VIDEO_EXTENSIONS = {".mp4", ".mov", ".mkv", ".avi", ".m4v"}
# Desired duration of each segment in seconds
SEGMENT_LENGTH_SECONDS = 60
# Final output resolution (square)
TARGET_SIZE = 1080Helper functions: pair detection and segment computation¶
The functions below:
find_video_pairs(directory)Scans a directory for video files whose names match
<prefix>_<index>.<ext>.Groups them by
(prefix, ext)and returns a list of pairs where both_1and_2exist.
Segment computation functions
compute_first_segment(duration)→ first 60 seconds.compute_middle_segment(duration)→ 60 seconds around the middle.compute_last_segment(duration)→ last 60 seconds.
def find_video_pairs(directory: Path):
"""
Find pairs of videos in `directory` where filenames follow:
<prefix>_<index>.<ext>
and both index '1' and '2' exist for the same (prefix, ext).
Returns:
List of tuples: [(path_to_prefix_1, path_to_prefix_2), ...]
"""
all_files = [
p for p in directory.iterdir()
if p.is_file() and p.suffix.lower() in VIDEO_EXTENSIONS
]
groups = {}
pattern = re.compile(r"^(?P<prefix>.+)_(?P<index>\d+)$") # match <prefix>_<index>
for path in all_files:
match = pattern.match(path.stem)
if not match:
continue
prefix = match.group("prefix")
index = match.group("index")
key = (prefix, path.suffix.lower())
if key not in groups:
groups[key] = {}
groups[key][index] = path
pairs = []
for (prefix, ext), index_map in groups.items():
if "1" in index_map and "2" in index_map:
pairs.append((index_map["1"], index_map["2"]))
return pairs
def compute_first_segment(duration: float, seg_len: float = SEGMENT_LENGTH_SECONDS):
"""Return (start, end) for the first segment, up to seg_len seconds."""
start = 0.0
end = min(seg_len, duration)
return start, end
def compute_middle_segment(duration: float, seg_len: float = SEGMENT_LENGTH_SECONDS):
"""
Return (start, end) for a seg_len-second window around the middle.
If the video is shorter than seg_len, return the whole video.
"""
if duration <= 0:
return 0.0, 0.0
if duration <= seg_len:
return 0.0, duration
mid = duration / 2.0
start = max(0.0, mid - seg_len / 2.0)
end = start + seg_len
if end > duration:
end = duration
start = max(0.0, end - seg_len)
return start, end
def compute_last_segment(duration: float, seg_len: float = SEGMENT_LENGTH_SECONDS):
"""
Return (start, end) for the last seg_len seconds.
If the video is shorter than seg_len, return the whole video.
"""
if duration <= 0:
return 0.0, 0.0
if duration <= seg_len:
return 0.0, duration
start = max(0.0, duration - seg_len)
end = duration
return start, endHelper functions: resizing and writing clips¶
The functions below handle resizing and saving clips:
make_square_clip(clip, target_size)Resizes the clip so that the smaller dimension becomes at least
target_size.Center-crops to a square of size
target_size × target_size.
write_segment_clip(clip, input_path, start, end, suffix)Extracts a subclip between
startandend.Resizes and crops it to 1080 × 1080.
Writes it to disk with the appropriate suffix.
process_video_for_index(input_path, index)For
_1videos: creates_clip1,_clip2,_clip3.For
_2videos: creates_clip4,_clip5.
def make_square_clip(clip, target_size: int = TARGET_SIZE):
"""
Resize `clip` so that the smaller side >= target_size, then center-crop
to (target_size x target_size). This preserves aspect ratio.
"""
w, h = clip.size
if w == 0 or h == 0:
return clip
# Scale so that the smaller dimension is at least target_size
scale = target_size / min(w, h)
clip_resized = clip.fx(vfx.resize, scale)
w2, h2 = clip_resized.size
x_center = w2 / 2.0
y_center = h2 / 2.0
x1 = int(x_center - target_size / 2.0)
x2 = x1 + target_size
y1 = int(y_center - target_size / 2.0)
y2 = y1 + target_size
return crop(clip_resized, x1=x1, y1=y1, x2=x2, y2=y2)
def write_segment_clip(clip: VideoFileClip, input_path: Path, start: float, end: float, suffix: str):
"""
Write a subclip from `start` to `end` seconds to a new file, resizing to TARGET_SIZE x TARGET_SIZE.
"""
if end - start <= 0:
print(f" [!] Skipping {input_path.name}{suffix}: non-positive segment length.")
return
# Extract the time window
sub = clip.subclip(start, end)
# Resize + center-crop to TARGET_SIZE x TARGET_SIZE
sub_square = make_square_clip(sub, TARGET_SIZE)
output_name = f"{input_path.stem}{suffix}{input_path.suffix}"
output_path = input_path.with_name(output_name)
print(f" -> Writing {output_path.name} [{start:.2f}s - {end:.2f}s] at {TARGET_SIZE}x{TARGET_SIZE}")
# You can adjust codec/bitrate here if needed
sub_square.write_videofile(
str(output_path),
codec="libx264",
audio_codec="aac",
verbose=False,
logger=None
)
def process_video_for_index(input_path: Path, index: int):
"""
Process a single video file depending on its index:
- index == 1: create _clip1, _clip2, _clip3
- index == 2: create _clip4, _clip5
"""
input_path = Path(input_path)
match = re.match(r"^(?P<prefix>.+)_(?P<index>\d+)$", input_path.stem)
if not match:
print(f"[!] Skipping {input_path.name}: does not match <prefix>_<index> pattern.")
return
true_index = int(match.group("index"))
if true_index != index:
print(f"[!] Warning: Expected index {index}, but filename has index {true_index} in {input_path.name}. Using {true_index}.")
index = true_index
with VideoFileClip(str(input_path)) as clip:
duration = clip.duration or 0.0
print(f"Processing {input_path.name} (duration = {duration:.2f} s, index = {index})")
if duration <= 0:
print(" [!] Duration is zero or invalid. Skipping.")
return
if index == 1:
# _clip1: first 60s
s1, e1 = compute_first_segment(duration)
write_segment_clip(clip, input_path, s1, e1, "_clip1")
# _clip2: middle 60s
s2, e2 = compute_middle_segment(duration)
write_segment_clip(clip, input_path, s2, e2, "_clip2")
# _clip3: last 60s
s3, e3 = compute_last_segment(duration)
write_segment_clip(clip, input_path, s3, e3, "_clip3")
elif index == 2:
# _clip4: middle 60s
s4, e4 = compute_middle_segment(duration)
write_segment_clip(clip, input_path, s4, e4, "_clip4")
# _clip5: last 60s
s5, e5 = compute_last_segment(duration)
write_segment_clip(clip, input_path, s5, e5, "_clip5")
else:
print(f" [!] Index {index} not handled (only 1 and 2 are supported).")Set the input directory¶
Update the input_dir path below to point to the folder that contains your videos.
All output clips will be saved in the same directory as their corresponding input videos.
input_dir = "/Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/Students_data/Zach_Buller/videos" # <-- change this line
video_dir = Path(input_dir).expanduser().resolve()
if not video_dir.exists():
raise ValueError(f"Directory not found: {video_dir}")
print("Using directory:", video_dir)Set Your Input Directory Here¶
Replace the path below with your video folder path.
input_dir = "/Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/Students_data/Zach_Buller/videos" # <-- change this line
video_dir = Path(input_dir).expanduser().resolve()
if not video_dir.exists():
raise ValueError(f"Directory not found: {video_dir}")
print("Using directory:", video_dir)
Using directory: /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/Students_data/Zach_Buller/videos
Run the pipeline¶
This cell will:
Find all valid
(prefix_1, prefix_2)video pairs.For each pair:
Process the
_1video and create clips 1–3.Process the
_2video and create clips 4–5.
Show an overall progress bar across all pairs.
When it finishes, your output clips (*_clip1–*_clip5) will be available in the same directory as the originals.
pairs = find_video_pairs(video_dir)
if not pairs:
print("No valid (prefix_1, prefix_2) video pairs found in this directory.")
else:
print(f"Found {len(pairs)} pair(s):\n")
for p1, p2 in pairs:
print(f" - {p1.name} & {p2.name}")
print("\nStarting processing...\n")
for v1, v2 in tqdm(pairs, desc="Processing video pairs"):
print("\n" + "=" * 70)
print(f"Pair: {v1.name} | {v2.name}")
# Process the _1 video (clips 1, 2, 3)
process_video_for_index(v1, index=1)
# Process the _2 video (clips 4, 5)
process_video_for_index(v2, index=2)
print("\nAll pairs processed.")Found 1 pair(s):
- Danny_1.MP4 & Danny_2.MP4
Starting processing...
======================================================================
Pair: Danny_1.MP4 | Danny_2.MP4
Processing Danny_1.MP4 (duration = 896.90 s, index = 1)
-> Writing Danny_1_clip1.MP4 [0.00s - 60.00s] at 1080x1080
-> Writing Danny_1_clip2.MP4 [418.45s - 478.45s] at 1080x1080
-> Writing Danny_1_clip3.MP4 [836.90s - 896.90s] at 1080x1080
Processing Danny_2.MP4 (duration = 846.36 s, index = 2)
-> Writing Danny_2_clip4.MP4 [393.18s - 453.18s] at 1080x1080
-> Writing Danny_2_clip5.MP4 [786.36s - 846.36s] at 1080x1080
All pairs processed.