import pandas as pd
import os
import av
import numpy as np
import time
import glob
import cv2
from rich.progress import Progress
from fractions import Fraction
from pathlib import Path
from PIL import Image
import ipywidgets as widgets

from pupil_labs.dynamic_content_on_rim.parser import audioSources, init_parser
from pupil_labs.dynamic_content_on_rim.uitools.get_corners import pick_point_in_image
from pupil_labs.dynamic_content_on_rim.uitools.ui_tools import (
    get_file,
    get_path,
    get_savedir,
    rich_df,
)
from pupil_labs.dynamic_content_on_rim.video.read import get_frame, read_video_ts

from pupil_labs.dynamic_content_on_rim.dynamic_rim import check_ids, get_perspective_transform, merge_tables

def map_(xy, frame):
    frame = np.asarray(frame)
    frame = frame[:, :, :]
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    xy = xy.to_numpy(dtype=np.int32)

    resizefactor = 1
    frame = cv2.circle(
        frame, xy, int(25 * resizefactor), (255, 0, 255), int(5 * resizefactor)
    )
    return frame

def get_final_video_with_gazedata(td_sv_root, 
                                  case_name,
                                  gaze_dynamic_rim_path,
                                  mp4_path_screen_video,
                                  output_video_path = 'output_video.mp4'):
    events_df = pd.read_csv(os.path.join(td_sv_root, case_name, 'events.csv'))
    world_timestamps_df = pd.read_csv(os.path.join(td_sv_root, case_name, 'world_timestamps.csv'))
    gaze_video_df = pd.read_csv(gaze_dynamic_rim_path)

    _, sc_frames, sc_pts, sc_ts = read_video_ts(mp4_path_screen_video)
    print(f"Total frames (from timestamps): {sc_frames}, PTS: {sc_pts}, Time: {sc_ts}")

    start_video_ns = events_df.loc[events_df["name"] == "start.video"][
        "timestamp [ns]"
    ].values[0]
    sc_timestamps_ns = sc_ts + start_video_ns
    end_video_ns = np.min(
        [np.max(sc_timestamps_ns), np.max(world_timestamps_df["timestamp [ns]"])]
    )
    sc_video_df = pd.DataFrame()
    sc_video_df["frame"] = np.arange(sc_frames)
    sc_video_df["timestamp [ns]"] = sc_timestamps_ns.astype(int)
    sc_video_df["pts"] = [int(pt) for pt in sc_pts]

    merged_sc = pd.merge_asof(
            gaze_video_df,
            sc_video_df ,
            on="timestamp [ns]",
            direction="nearest",
            suffixes=["_video", "_audio"],
        )

    with av.open(mp4_path_screen_video,) as sc_video:
        _scframe = next(sc_video.decode(video=0))
        mheight = _scframe.height
        mwidth = _scframe.width

    with av.open(mp4_path_screen_video,) as sc_video,         av.open(output_video_path, "w") as out_container:
        stream = sc_video.streams.video[0]

        out_video = out_container.add_stream("libx264", rate=30, options={"crf": "18"})
        out_video.height = mheight
        out_video.width = mwidth

        out_video.pix_fmt = "yuv420p"
        out_video.codec_context.time_base = Fraction(1, 30)

        for frame in sc_video.decode(stream):
            xy = merged_sc[['gaze position transf x [px]', 'gaze position transf y [px]']].iloc[frame.index]

            img = frame.to_image().convert("RGB")

            if xy is not None:
                img = map_(xy, img)
            img = np.array(img, dtype=np.uint8)

            out_frame = av.VideoFrame.from_ndarray(img, format="rgb24")
            for packet in out_video.encode(out_frame):
                out_container.mux(packet)

        for packet in out_video.encode():
            out_container.mux(packet)
        out_container.close()

td_sv_root = './Timeseries Data + Scene Video'
case_name = '2025-04-10_16-13-50-f2af74e9'
gaze_dynamic_rim_path = './E1/gaze.csv'
mp4_path_screen_video = './E1/MyMovie.mp4'

get_final_video_with_gazedata(td_sv_root, 
                            case_name,
                            gaze_dynamic_rim_path,
                            mp4_path_screen_video,
                            output_video_path = 'output_video_e4.mp4')