import sys
import json
import os
from pptx import Presentation
from PIL import Image
import io

def extract_slides(ppt_path, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    prs = Presentation(ppt_path)
    slides_data = []

    for idx, slide in enumerate(prs.slides, start=1):
        slide_data = {
            'id': idx,
            'title': f'Slide {idx}',
            'type': 'image',
            'file': f'slide_{idx}.jpg',
            'notes': ''
        }

        if slide.has_notes_slide:
            notes_frame = slide.notes_slide.notes_text_frame
            if notes_frame:
                slide_data['notes'] = notes_frame.text

        for shape in slide.shapes:
            if shape.shape_type == 13:  # PICTURE
                img_stream = io.BytesIO(shape.image.blob)
                img = Image.open(img_stream)

                img_path = os.path.join(
                    output_dir,
                    f'image_{idx}_{shape.shape_id}.jpg'
                )
                img.save(img_path)

        slides_data.append(slide_data)

    return slides_data

if __name__ == '__main__':
    ppt_path = sys.argv[1]
    output_dir = sys.argv[2]

    slides = extract_slides(ppt_path, output_dir)
    print(json.dumps(slides, indent=2))
