The short version is that I am getting error that for my output should have should_snapshot=True in the error, here is the code in question:
from transforms.api import transform, Input, Output, lightweight
from transforms.mediasets import MediaSetInput, MediaSetOutput
from transforms.external.systems import use_external_systems, external_systems, Source, EgressPolicy, ExportControl
import cv2
from PIL import Image
import pandas as pd
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
import io
@lightweight(gpu_type='NVIDIA_T4')
@external_systems(
jrp_external_connection=Source("ri.magritte..source.7f4aad1c-f213-43b5-a266-d5f8747ed7af")
)
# @use_external_systems(
# # export_control=ExportControl(markings=['<marking ID>']),
# egress=EgressPolicy('ri.resource-policy-manager.global.network-egress-policy.e4982a71-6117-483c-9bc0-cc855d85c918v'),
# )
@transform(
# MediaSetInput not Input
video_input=MediaSetInput("ri.mio.main.media-set.52287ebd-f5d2-4c90-91ad-6d45146b1fdf"),
output=MediaSetOutput("ri.mio.main.media-set.8d7d566a-14a3-43af-aaad-fc812bb5e5c2", should_snapshot=True),
)
def compute(ctx, jrp_external_connection, video_input, output):
# Load BLIP processor and model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
video = video_input.get_media_item("ri.mio.main.media-item.0198aa08-e56f-7157-a066-c92235051fcd")
results = []
cap = cv2.VideoCapture(video)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Sample every 30th frame (adjust as needed)
for i in range(0, frame_count, 30):
cap.set(cv2.CAP_PROP_POS_FRAMES, i)
ret, frame = cap.read()
if not ret:
continue
# Convert frame to PIL Image
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
inputs = processor(image, return_tensors="pt").to(device)
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
# Save image to bytes
img_bytes = io.BytesIO()
image.save(img_bytes, format='JPEG')
img_bytes.seek(0)
# Add image to mediaset
asset_name = f"astronaut_vid_frame_{i}.jpg"
output.put_media_item(img_bytes.read(), asset_name)
# results.append({
# "video_path": video_path,
# "frame_index": i,
# "caption": caption
# })
cap.release()
I could not find an “extract frames from mp4” transform in pipeline builder so here I am trying to do it in code repositories. I’ve ironed out all the complaints about libraries/dependencies, and also the transforms library (recommended by ai assist) was trying to reach out to huggingface so I made an egress policy and the network connection error seems to have gone away. Heres the actual complaint in builder about 6m into the job:
Job failed with status 1: Traceback (most recent call last): File "/foundry/python_environment/lib/python3.9/runpy.py", line 197, in _run_module_as_main return _run_code(code, main_globals, None, File "/foundry/python_environment/lib/python3.9/runpy.py", line 87, in _run_code exec(code, run_globals) File "/foundry/user_code/myproject/datasets/examples.py", line 25, in <module> def compute(ctx, jrp_external_connection, video_input, output): File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_decorators.py", line 133, in _lightweight return ContainerTransform( File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_transform.py", line 112, in __init__ self.__name__ = transform.__name__ File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_transform.py", line 158, in __name__ self.compute() File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_transform.py", line 205, in compute self._compute() File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_transform.py", line 275, in _compute inputs, outputs = self._params.instantiate_params(self._incremental) File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_configuration.py", line 200, in instantiate_params return ParamInstantiator(self, incremental).instantiate() File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_param_instantiator.py", line 49, in instantiate outputs = { File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_param_instantiator.py", line 52, in <dictcomp> for param_name, output_obj in [self._create_output_obj(output_param)] File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_param_instantiator.py", line 79, in _create_output_obj return param_name, self._create_v2_incremental_output_obj(param_name, output_param) File "/foundry/python_environment/lib/python3.9/site-packages/transforms/api/_lightweight/_param_instantiator.py", line 120, in _create_v2_incremental_output_obj return instance.get_non_incremental() File "/foundry/python_environment/lib/python3.9/site-packages/transforms/mediasets/outputs/_output_operations.py", line 92, in get_non_incremental raise ValueError( ValueError: Media set output should be snapshotted, but is not configured to do so. Resolved by setting "should_snapshot=True"
What I dont understand is, as you can see, i’ve added should_snapshot=True to my mediaSetOutput, so why does it think its not configured for that?
Also if anyone has any experience doing this, does what I’m doing make sense? any help would be appreciated this is very time sensitive and I don’t want to abandon it.
PS: I realize I’m not doing anything with the caption, I realized that it can’t go into the mediaset the caption will have to go into a dataset output instead but I’m trying to get the frames first