How to write to multimodal mediasets?

I have a Code Repository where I’m writing to a multimodal mediaset. How can I write to such mediaset ? How to specify the mime-types ?

Here is an example of transform that write to a multimodal mediaset multiple file types.

from transforms.api import transform
from transforms.mediasets import MediaSetOutput
from io import BytesIO
import json
import zipfile


# ============================================================================
# File Generation Functions
# ============================================================================


def generate_pdf():
    """Generate a simple PDF document using reportlab."""
    try:
        from reportlab.lib.pagesizes import letter
        from reportlab.pdfgen import canvas

        pdf_buffer = BytesIO()
        c = canvas.Canvas(pdf_buffer, pagesize=letter)
        c.drawString(100, 750, "Hello from Foundry!")
        c.drawString(100, 730, "This is a generated PDF document.")
        c.drawString(100, 710, f"Generated by multimodal mediaset transform")
        c.showPage()
        c.save()
        pdf_buffer.seek(0)
        return pdf_buffer, "generated_document.pdf"
    except ImportError:
        print("⚠ reportlab not available, skipping PDF generation")
        return None, None


def generate_png_image():
    """Generate a PNG image using PIL/Pillow."""
    try:
        from PIL import Image, ImageDraw, ImageFont

        img = Image.new("RGB", (400, 300), color="lightblue")
        draw = ImageDraw.Draw(img)
        draw.rectangle([50, 50, 350, 250], outline="darkblue", width=3)
        draw.text((100, 130), "Generated Image", fill="darkblue")

        img_buffer = BytesIO()
        img.save(img_buffer, format="PNG")
        img_buffer.seek(0)
        return img_buffer, "generated_image.png"
    except ImportError:
        print("⚠ Pillow not available, skipping PNG generation")
        return None, None


def generate_jpeg_image():
    """Generate a JPEG image using PIL/Pillow."""
    try:
        from PIL import Image, ImageDraw

        img = Image.new("RGB", (300, 300), color="coral")
        draw = ImageDraw.Draw(img)
        draw.ellipse([50, 50, 250, 250], fill="yellow", outline="red", width=5)

        jpg_buffer = BytesIO()
        img.save(jpg_buffer, format="JPEG", quality=95)
        jpg_buffer.seek(0)
        return jpg_buffer, "generated_image.jpg"
    except ImportError:
        print("⚠ Pillow not available, skipping JPEG generation")
        return None, None


def generate_json_file():
    """Generate a JSON data file."""
    json_data = {
        "test_type": "multimodal_mediaset",
        "files_generated": ["pdf", "png", "jpg", "json", "txt", "csv"],
        "metadata": {
            "purpose": "Testing put_media_item functionality",
            "format": "multimodal",
        },
        "items": [
            {"id": 1, "name": "Item A", "value": 100},
            {"id": 2, "name": "Item B", "value": 200},
            {"id": 3, "name": "Item C", "value": 300},
        ],
    }

    json_buffer = BytesIO(json.dumps(json_data, indent=2).encode("utf-8"))
    return json_buffer, "data_file.json"


def generate_text_file():
    """Generate a plain text file."""
    text_content = """This is a test text file.
Generated by a Foundry Python Transform.

Testing multimodal mediaset functionality with put_media_item.

Line 1: First line
Line 2: Second line
Line 3: Third line

End of file.
"""

    text_buffer = BytesIO(text_content.encode("utf-8"))
    return text_buffer, "test_document.txt"


def generate_csv_file():
    """Generate a CSV data file."""
    csv_content = """id,name,category,value,status
1,Product A,Electronics,299.99,active
2,Product B,Books,19.99,active
3,Product C,Clothing,49.99,inactive
4,Product D,Electronics,599.99,active
5,Product E,Books,29.99,active
"""

    csv_buffer = BytesIO(csv_content.encode("utf-8"))
    return csv_buffer, "data_export.csv"


def generate_xml_file():
    """Generate an XML data file."""
    xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<root>
    <metadata>
        <title>Multimodal Mediaset Test</title>
        <created>2024-01-01</created>
    </metadata>
    <items>
        <item id="1">
            <name>Item One</name>
            <value>100</value>
        </item>
        <item id="2">
            <name>Item Two</name>
            <value>200</value>
        </item>
    </items>
</root>
"""

    xml_buffer = BytesIO(xml_content.encode("utf-8"))
    return xml_buffer, "data_structure.xml"


def generate_markdown_file():
    """Generate a Markdown documentation file."""
    markdown_content = " Example markdown content [here](this is a link)"

    md_buffer = BytesIO(markdown_content.encode("utf-8"))
    return md_buffer, "README.md"


def generate_zip_file():
    """Generate a ZIP file containing multiple files."""
    zip_buffer = BytesIO()

    # Create ZIP file with compression
    with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
        # Add a readme text file
        readme_content = """ZIP Archive - Multimodal Mediaset Test
========================================

This ZIP archive was generated by a Foundry Python Transform.

Contents:
- readme.txt (this file)
- data.json (sample JSON data)
- products.csv (sample CSV data)
- subfolder/notes.txt (nested file example)
- subfolder/config.ini (configuration file)

Purpose: Testing ZIP file generation and upload to multimodal mediaset.
"""
        zipf.writestr("readme.txt", readme_content)

        # Add a JSON file with sample data
        sample_data = {
            "archive_info": {
                "format": "zip",
                "compression": "ZIP_DEFLATED",
                "created_by": "multimodal_mediaset_transform",
            },
            "contents": [
                {"filename": "readme.txt", "type": "text"},
                {"filename": "data.json", "type": "json"},
                {"filename": "products.csv", "type": "csv"},
                {"filename": "subfolder/notes.txt", "type": "text"},
                {"filename": "subfolder/config.ini", "type": "config"},
            ],
            "test_data": {
                "items": [
                    {"id": 1, "name": "Alpha", "status": "active"},
                    {"id": 2, "name": "Beta", "status": "inactive"},
                    {"id": 3, "name": "Gamma", "status": "active"},
                ]
            },
        }
        zipf.writestr("data.json", json.dumps(sample_data, indent=2))

        # Add a CSV file with sample product data
        csv_content = """id,product,category,price,stock
1,Widget,Hardware,9.99,150
2,Gadget,Electronics,19.99,75
3,Tool,Hardware,29.99,200
4,Device,Electronics,49.99,50
5,Component,Hardware,4.99,500
"""
        zipf.writestr("products.csv", csv_content)

        # Add a text file in a subfolder
        notes_content = """Project Notes
=============

Date: 2024-01-01
Author: Foundry Transform

Notes:
- Successfully created nested folder structure in ZIP
- Tested multiple file formats within archive
- Compression working correctly

Status: ✓ Complete
"""
        zipf.writestr("subfolder/notes.txt", notes_content)

        # Add a config file in subfolder
        config_content = """[settings]
app_name = Multimodal Mediaset Test
version = 1.0.0
debug = false

[database]
host = localhost
port = 5432
name = testdb

[features]
enable_compression = true
max_file_size = 10485760
allowed_formats = pdf,png,jpg,json,txt,csv,xml,md,zip
"""
        zipf.writestr("subfolder/config.ini", config_content)

    zip_buffer.seek(0)
    return zip_buffer, "archive_bundle.zip"


# ============================================================================
# Transform Function
# ============================================================================


@transform.spark.using(
    output_mediaset=MediaSetOutput(
        "/path/to/multimodal_test_output"
    )
)
def generate_multimodal_files(output_mediaset):
    """
    Generate various file formats and write them to a multimodal mediaset.
    This tests the put_media_item functionality with different file types.
    """

    # Define all file generators
    generators = [
        (generate_pdf, "PDF"),
        (generate_png_image, "PNG image"),
        (generate_jpeg_image, "JPEG image"),
        (generate_json_file, "JSON file"),
        (generate_text_file, "TXT file"),
        (generate_csv_file, "CSV file"),
        (generate_xml_file, "XML file"),
        (generate_markdown_file, "Markdown file"),
        (generate_zip_file, "ZIP archive"),
    ]

    uploaded_files = []

    # Generate and upload each file
    for generator_func, file_type in generators:
        buffer, filename = generator_func()
        if buffer is not None and filename is not None:
            output_mediaset.put_media_item(buffer, filename)
            print(f"✓ Uploaded {file_type}")
            uploaded_files.append(filename)

    # Print summary
    print("\n=== Summary ===")
    print(f"Successfully uploaded {len(uploaded_files)} files to multimodal mediaset:")
    for filename in uploaded_files:
        print(f"  - {filename}")

1 Like