I have a Code Repository where I’m writing to a multimodal mediaset. How can I write to such mediaset ? How to specify the mime-types ?
Here is an example of transform that write to a multimodal mediaset multiple file types.
from transforms.api import transform
from transforms.mediasets import MediaSetOutput
from io import BytesIO
import json
import zipfile
# ============================================================================
# File Generation Functions
# ============================================================================
def generate_pdf():
"""Generate a simple PDF document using reportlab."""
try:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
pdf_buffer = BytesIO()
c = canvas.Canvas(pdf_buffer, pagesize=letter)
c.drawString(100, 750, "Hello from Foundry!")
c.drawString(100, 730, "This is a generated PDF document.")
c.drawString(100, 710, f"Generated by multimodal mediaset transform")
c.showPage()
c.save()
pdf_buffer.seek(0)
return pdf_buffer, "generated_document.pdf"
except ImportError:
print("⚠ reportlab not available, skipping PDF generation")
return None, None
def generate_png_image():
"""Generate a PNG image using PIL/Pillow."""
try:
from PIL import Image, ImageDraw, ImageFont
img = Image.new("RGB", (400, 300), color="lightblue")
draw = ImageDraw.Draw(img)
draw.rectangle([50, 50, 350, 250], outline="darkblue", width=3)
draw.text((100, 130), "Generated Image", fill="darkblue")
img_buffer = BytesIO()
img.save(img_buffer, format="PNG")
img_buffer.seek(0)
return img_buffer, "generated_image.png"
except ImportError:
print("⚠ Pillow not available, skipping PNG generation")
return None, None
def generate_jpeg_image():
"""Generate a JPEG image using PIL/Pillow."""
try:
from PIL import Image, ImageDraw
img = Image.new("RGB", (300, 300), color="coral")
draw = ImageDraw.Draw(img)
draw.ellipse([50, 50, 250, 250], fill="yellow", outline="red", width=5)
jpg_buffer = BytesIO()
img.save(jpg_buffer, format="JPEG", quality=95)
jpg_buffer.seek(0)
return jpg_buffer, "generated_image.jpg"
except ImportError:
print("⚠ Pillow not available, skipping JPEG generation")
return None, None
def generate_json_file():
"""Generate a JSON data file."""
json_data = {
"test_type": "multimodal_mediaset",
"files_generated": ["pdf", "png", "jpg", "json", "txt", "csv"],
"metadata": {
"purpose": "Testing put_media_item functionality",
"format": "multimodal",
},
"items": [
{"id": 1, "name": "Item A", "value": 100},
{"id": 2, "name": "Item B", "value": 200},
{"id": 3, "name": "Item C", "value": 300},
],
}
json_buffer = BytesIO(json.dumps(json_data, indent=2).encode("utf-8"))
return json_buffer, "data_file.json"
def generate_text_file():
"""Generate a plain text file."""
text_content = """This is a test text file.
Generated by a Foundry Python Transform.
Testing multimodal mediaset functionality with put_media_item.
Line 1: First line
Line 2: Second line
Line 3: Third line
End of file.
"""
text_buffer = BytesIO(text_content.encode("utf-8"))
return text_buffer, "test_document.txt"
def generate_csv_file():
"""Generate a CSV data file."""
csv_content = """id,name,category,value,status
1,Product A,Electronics,299.99,active
2,Product B,Books,19.99,active
3,Product C,Clothing,49.99,inactive
4,Product D,Electronics,599.99,active
5,Product E,Books,29.99,active
"""
csv_buffer = BytesIO(csv_content.encode("utf-8"))
return csv_buffer, "data_export.csv"
def generate_xml_file():
"""Generate an XML data file."""
xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<root>
<metadata>
<title>Multimodal Mediaset Test</title>
<created>2024-01-01</created>
</metadata>
<items>
<item id="1">
<name>Item One</name>
<value>100</value>
</item>
<item id="2">
<name>Item Two</name>
<value>200</value>
</item>
</items>
</root>
"""
xml_buffer = BytesIO(xml_content.encode("utf-8"))
return xml_buffer, "data_structure.xml"
def generate_markdown_file():
"""Generate a Markdown documentation file."""
markdown_content = " Example markdown content [here](this is a link)"
md_buffer = BytesIO(markdown_content.encode("utf-8"))
return md_buffer, "README.md"
def generate_zip_file():
"""Generate a ZIP file containing multiple files."""
zip_buffer = BytesIO()
# Create ZIP file with compression
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
# Add a readme text file
readme_content = """ZIP Archive - Multimodal Mediaset Test
========================================
This ZIP archive was generated by a Foundry Python Transform.
Contents:
- readme.txt (this file)
- data.json (sample JSON data)
- products.csv (sample CSV data)
- subfolder/notes.txt (nested file example)
- subfolder/config.ini (configuration file)
Purpose: Testing ZIP file generation and upload to multimodal mediaset.
"""
zipf.writestr("readme.txt", readme_content)
# Add a JSON file with sample data
sample_data = {
"archive_info": {
"format": "zip",
"compression": "ZIP_DEFLATED",
"created_by": "multimodal_mediaset_transform",
},
"contents": [
{"filename": "readme.txt", "type": "text"},
{"filename": "data.json", "type": "json"},
{"filename": "products.csv", "type": "csv"},
{"filename": "subfolder/notes.txt", "type": "text"},
{"filename": "subfolder/config.ini", "type": "config"},
],
"test_data": {
"items": [
{"id": 1, "name": "Alpha", "status": "active"},
{"id": 2, "name": "Beta", "status": "inactive"},
{"id": 3, "name": "Gamma", "status": "active"},
]
},
}
zipf.writestr("data.json", json.dumps(sample_data, indent=2))
# Add a CSV file with sample product data
csv_content = """id,product,category,price,stock
1,Widget,Hardware,9.99,150
2,Gadget,Electronics,19.99,75
3,Tool,Hardware,29.99,200
4,Device,Electronics,49.99,50
5,Component,Hardware,4.99,500
"""
zipf.writestr("products.csv", csv_content)
# Add a text file in a subfolder
notes_content = """Project Notes
=============
Date: 2024-01-01
Author: Foundry Transform
Notes:
- Successfully created nested folder structure in ZIP
- Tested multiple file formats within archive
- Compression working correctly
Status: ✓ Complete
"""
zipf.writestr("subfolder/notes.txt", notes_content)
# Add a config file in subfolder
config_content = """[settings]
app_name = Multimodal Mediaset Test
version = 1.0.0
debug = false
[database]
host = localhost
port = 5432
name = testdb
[features]
enable_compression = true
max_file_size = 10485760
allowed_formats = pdf,png,jpg,json,txt,csv,xml,md,zip
"""
zipf.writestr("subfolder/config.ini", config_content)
zip_buffer.seek(0)
return zip_buffer, "archive_bundle.zip"
# ============================================================================
# Transform Function
# ============================================================================
@transform.spark.using(
output_mediaset=MediaSetOutput(
"/path/to/multimodal_test_output"
)
)
def generate_multimodal_files(output_mediaset):
"""
Generate various file formats and write them to a multimodal mediaset.
This tests the put_media_item functionality with different file types.
"""
# Define all file generators
generators = [
(generate_pdf, "PDF"),
(generate_png_image, "PNG image"),
(generate_jpeg_image, "JPEG image"),
(generate_json_file, "JSON file"),
(generate_text_file, "TXT file"),
(generate_csv_file, "CSV file"),
(generate_xml_file, "XML file"),
(generate_markdown_file, "Markdown file"),
(generate_zip_file, "ZIP archive"),
]
uploaded_files = []
# Generate and upload each file
for generator_func, file_type in generators:
buffer, filename = generator_func()
if buffer is not None and filename is not None:
output_mediaset.put_media_item(buffer, filename)
print(f"✓ Uploaded {file_type}")
uploaded_files.append(filename)
# Print summary
print("\n=== Summary ===")
print(f"Successfully uploaded {len(uploaded_files)} files to multimodal mediaset:")
for filename in uploaded_files:
print(f" - {filename}")
1 Like