import './App.css';
import React, { useRef, useState, useEffect, useCallback } from "react";
import * as SpeechSDK from "microsoft-cognitiveservices-speech-sdk";
import * as utils from "./Utils";
import avatarBackground from "./assets/img/avatarBackground.png";
import Subtitle from "./components/Subtitle";
import Thinking from "./components/status/Thinking";
import Listening from "./components/status/Listening";
import AvatarControls from "./components/Controls";
import ConfigModal from './components/Config'; // Import the ConfigModal component


function App() {
  const audioConfig = useRef(null);
  const avatarConfig = useRef(null);
  const avatarSynthesizer = useRef(null);
  const avatarVideoFormat = useRef(null);
  const speechSynthesisConfig = useRef(null);
  const langchainSocket = useRef(null); // Socket for the backend to talk to Langchain
  const avatarConnection = useRef(null); // Socket for the avatar video and TTS
  const speechRecognitionConfig = useRef(null);
  const speechRecognizer = useRef(null);
  const speakingThreads = useRef(0);
  const removeVideoDiv = useRef(null);
  const canvasRef = useRef(null);
  const tmpCanvasRef = useRef(null);
  const audioRef = useRef(null);
  const videoRef = useRef(null);
  const avatarVoice = useRef("en-US-JennyMultilingualV2Neural");
  const avatarSelection = useRef({ "character": "lisa", "style": "casual-sitting" });
  const previousAnimationFrameTimestamp = useRef(0);
  const autoDetectSourceLanguageConfig = useRef(null);
  const [isListening, setIsListening] = useState(false); // State for when the user is speaking
  const [isThinking, setIsThinking] = useState(false); // Waiting on response from Langchain WS
  const [iceCredentials, setIceCredentials] = useState(""); // State for the ICE credentials from Langchain WS
  const [avatarCaption, setAvatarCaption] = useState(null); // State for the caption displayed on the screen
  const [avatarSpeaking, setAvatarSpeaking] = useState(false); // State for when the avatar is speaking
  const [avatarStarted, setAvatarStarted] = useState(false); // State for when the avatar is ready to play on the screen
  const [langchainSocketReady, setLangchainSocketReady] = useState(false); // State for when the Langchain socket is open
  const [lastSpeakingEvent, setLastSpeakingEvent] = useState(null); // State for the last speaking event
  const [currentEventId, setCurrentEventId] = useState(""); // State for the event ID
  const [events, setEvents] = useState([]); // Events array for the coffee ordering tools.
  const [isModalOpen, setIsModalOpen] = useState(false); // This should be true so the user can select the event prior to starting the avatar.
  const [continuousListening, setContinuousListening] = useState(false); // State for continuous listening
  const [avatarLoading, setAvatarLoading] = useState(false); // State for when the avatar is loading

  const handleSaveEventId = (e) => {
    setCurrentEventId(e);
    setIsModalOpen(false);
  };

  // This will cause the avatar to introduce itself when the Langchain socket is ready and the avatar has started.
  const avatarIntroduction = useCallback(() => {
    if (langchainSocketReady) {
      const userQuery = "Hi, tell me about yourself.";
      langchainSocket.current.send(
        JSON.stringify({
          type: "agentCall",
          input: userQuery,
        })
      );
    };
  }, [langchainSocketReady]);

  const makeBackgroundTransparent = useCallback((timestamp) => {
    // Throttle the frame rate to 30 FPS to reduce CPU usage
    if (timestamp - previousAnimationFrameTimestamp.current > 30) {
      const video = videoRef.current;
      const tmpCanvas = tmpCanvasRef.current;
      const tmpCanvasContext = tmpCanvas.getContext("2d", {
        willReadFrequently: true,
      });

      tmpCanvasContext.drawImage(
        video,
        0,
        0,
        video.videoWidth,
        video.videoHeight
      );
      if (video.videoWidth > 0) {
        let frame = tmpCanvasContext.getImageData(
          0,
          0,
          video.videoWidth,
          video.videoHeight
        );

        for (let i = 0; i < frame.data.length / 4; i++) {
          let r = frame.data[i * 4 + 0];
          let g = frame.data[i * 4 + 1];
          let b = frame.data[i * 4 + 2];

          if (g - 150 > r + b) {
            // Set alpha to 0 for pixels that are close to green
            frame.data[i * 4 + 3] = 0;
          } else if (g + g > r + b) {
            // Reduce green part of the green pixels to avoid green edge issue
            let adjustment = (g - (r + b) / 2) / 3;
            r += adjustment;
            g -= adjustment * 2;
            b += adjustment;
            frame.data[i * 4 + 0] = r;
            frame.data[i * 4 + 1] = g;
            frame.data[i * 4 + 2] = b;
            // Reduce alpha part for green pixels to make the edge smoother
            let a = Math.max(0, 255 - adjustment * 4);
            frame.data[i * 4 + 3] = a;
          }
        }

        const canvas = canvasRef.current;
        const canvasContext = canvas.getContext("2d");
        canvasContext.putImageData(frame, 0, 0);
      };
      previousAnimationFrameTimestamp.current = timestamp;
    };

    window.requestAnimationFrame(makeBackgroundTransparent);
  }, []);

  // Speak text using TTS Avatar API
  const speak = useCallback((text, endingSilenceMs = 0) => {
    speakingThreads.current++;
    setAvatarCaption({ role: "agent", content: text });
    const ssml = utils.generateSSML(text, avatarVoice.current, endingSilenceMs);

    avatarSynthesizer.current.speakSsmlAsync(ssml).then((result) => {
      if (result.reason === SpeechSDK.ResultReason.SynthesizingAudioCompleted) {
        speakingThreads.current--;
      } else {
        speakingThreads.current--;
        if (result.reason === SpeechSDK.ResultReason.Canceled) {
          let cancellationDetails = SpeechSDK.CancellationDetails.fromResult(result);
          if (cancellationDetails.reason === SpeechSDK.CancellationReason.Error) {
            console.error(`Error occurred while speaking the SSML: [ ${cancellationDetails.errorDetails} ]`);
          };
        };
      };
    }).catch((error) => {
      console.error(`Error occurred while speaking the SSML: [ ${error} ]`);
    });
  }, [avatarVoice]);

  const startLangchainSession = useCallback(() => {
    console.log(`[${new Date().toISOString()}]: Starting Langchain Session...`);
    if (process.env.NODE_ENV === "production") {
      langchainSocket.current = new WebSocket(`${window.location.origin.replace(/^https/, 'wss')}/api`);
    } else {
      langchainSocket.current = new WebSocket(`${window.location.origin.replace(/^http/, 'ws').replace("3000", "8080")}/ws`);
    };

    langchainSocket.current.onopen = () => {
      console.log(`[${new Date().toISOString()}]: Langchain WebSocket Connected...`);
      if (langchainSocket.current.readyState === WebSocket.OPEN) {
        langchainSocket.current.send(JSON.stringify({ type: "token" }));
      };
    };

    langchainSocket.current.onmessage = (message) => {
      const data = JSON.parse(message.data);
      switch (data.type) {
        case "agentResponse":
          speak(data.result["output"]);
          break;
        case "setToken":
          setIceCredentials(data.iceServers);
          setEvents(data.events);
          break;
        case "agentCreated":
          setLangchainSocketReady(true);
          break;
        default:
          console.log(`[${new Date().toISOString()}]: Unknown message type: ${data.type}`);
          break;
      };
    };
  }, [speak]);

  const handleContinuousListening = useCallback((e) => {
    setContinuousListening(!continuousListening);
  }, [continuousListening]);

  const stopRecognition = useCallback(() => {
    setIsListening(false);
    speechRecognizer.current.stopContinuousRecognitionAsync(() => {
      console.log(`[${new Date().toISOString()}]: Speech recognition stopped.`);
    });
  }, []);

  const queryLangchainAgent = useCallback((userQuery) => {
    try {
      if (avatarConnection.current.iceConnectionState === "connected") {
        if (!continuousListening) {
          stopRecognition();
        };

        setIsThinking(true);
        langchainSocket.current.send(
          JSON.stringify({
            type: "agentCall",
            input: userQuery,
          }));
      };
    } catch (err) {
      console.error(err);
    };
  }, [stopRecognition, continuousListening]);

  // Stop all TTS streams
  const stopSpeaking = () => {
    setIsListening(false);
    setAvatarSpeaking(false);
    avatarSynthesizer.current.stopSpeakingAsync(() => {
      speakingThreads.current = 0;
    },
      (error) => {
        speakingThreads.current = 0;
        console.error(`Error occurred while stopping the Avatar: [ ${error} ]`);
      }
    );
  };

  // Callback function to handle errors from TTS Avatar API
  const error_cb = useCallback((result) => {
    let cancellationDetails = SpeechSDK.CancellationDetails.fromResult(result);
    console.log(`Error occurred in the Avatar service: ${cancellationDetails.errorDetails}`);
  }, []);

  // Callback function to handle the response from TTS Avatar API
  const complete_cb = useCallback((result) => {
    const sdp = result.properties.getProperty(SpeechSDK.PropertyId.TalkingAvatarService_WebRTC_SDP);

    if (sdp === undefined) {
      console.log(`[${new Date().toISOString()}] Failed to get remote SDP. The avatar instance is temporarily unavailable. Result ID: ${result.resultId} `);
    };
    setTimeout(() => {
      avatarConnection.current.setRemoteDescription(new RTCSessionDescription(JSON.parse(atob(sdp))));
    }, 2000);
  }, []);

  const startAvatar = useCallback(() => {
    if (!avatarStarted && langchainSocketReady && iceCredentials) {
      console.log(`[${new Date().toISOString()}]: Starting Avatar...`);
      setAvatarLoading(true);
      avatarConnection.current = new RTCPeerConnection(iceCredentials);
      avatarConnection.current.addEventListener("track", (event) => {
        if (event.track.kind === "audio") {
          audioRef.current.srcObject = event.streams[0];
        } else if (event.track.kind === "video") {
          videoRef.current.srcObject = event.streams[0];

          removeVideoDiv.current.hidden = true;
          canvasRef.current.hidden = false;

          videoRef.current.addEventListener("play", () => {
            removeVideoDiv.current.style.width = videoRef.current.videoWidth + "px";
            window.requestAnimationFrame(makeBackgroundTransparent);
            setAvatarStarted(true);
            setAvatarLoading(false);

          });
          videoRef.current.onplaying = () => {
            console.log(`WebRTC ${event.track.kind} channel connected.`);
          };
        };
      });

      // For troubleshooting purposes. Can be removed in production.
      avatarConnection.current.addEventListener("iceconnectionstatechange", (event) => {
        switch (event.currentTarget.iceConnectionState) {
          case "connected":
            console.log(`ICE connection state is ${event.currentTarget.iceConnectionState}.`);
            break;
          case "disconnected":
            console.log(`ICE connection state is ${event.currentTarget.iceConnectionState}.`);
            avatarStarted(false);
            break;
          case 'closed':
            console.log(`ICE connection state is ${event.currentTarget.iceConnectionState}.`);
            avatarStarted(false);
            break;
          case 'checking':
            console.log(`ICE connection state is ${event.currentTarget.iceConnectionState}.`);
            break;
          default:
            console.log('Unknown ICE connection state:', avatarConnection.current.iceConnectionState);
            break;
        };
      });

      avatarConnection.current.addTransceiver("video", { direction: "sendrecv" });
      avatarConnection.current.addTransceiver("audio", { direction: "sendrecv" });

      avatarSynthesizer.current.startAvatarAsync(avatarConnection.current, complete_cb, error_cb).then((r) => {
        if (r.reason === SpeechSDK.ResultReason.SynthesizingAudioCompleted) {
          console.log(`[${new Date().toISOString()}]: Avatar Connected...`);
        } else {
          console.error(`[${new Date().toISOString()}]: Failed to Avatar Start - ${r.reason}`);
          console.error(`[${new Date().toISOString()}]: ${JSON.stringify(r)}`);
          if (r.reason === SpeechSDK.ResultReason.Canceled) {
            let cancellationDetails = SpeechSDK.CancellationDetails.fromResult(r);
            if (cancellationDetails.reason === SpeechSDK.CancellationReason.Error) {
              console.error(`[${new Date().toISOString()}]: ${cancellationDetails.errorDetails}`);
            };
          };
        };
      }).catch((error) => {
        console.error(`[${new Date().toISOString()}]: ${error.message}`);
      });

      // Determine when the avatar is speaking or not
      avatarSynthesizer.current.avatarEventReceived = (s, e) => {
        if (e.privDescription === "TurnStart") {
          setAvatarSpeaking(true);
          setIsThinking(false);
        } else if (e.privDescription === "TurnEnd") {
          setAvatarSpeaking(false);
          setLastSpeakingEvent(Date.now());
        };
      };
    };
  }, [iceCredentials, makeBackgroundTransparent, complete_cb, error_cb, avatarStarted, langchainSocketReady]);


  const handleStartAvatarButton = (e) => {
    e.preventDefault();
    if (langchainSocket.current.readyState === WebSocket.OPEN && !avatarStarted) {
      startAvatar();
    };
  };

  const handleStartRecognitionButton = (e) => {
    e.preventDefault();
    if (avatarSpeaking && !isThinking) return;
    setIsListening(true);

    speechRecognizer.current.startContinuousRecognitionAsync(() => {
      console.log(`[${new Date().toISOString()}]: Speech Recognition Started`);
    }, (err) => {
      console.log(err)
    });
  };

  const handleStopSpeakingButton = (e) => {
    e.preventDefault();
    stopSpeaking();
  };

  const handleStopRecognitionButton = (e) => {
    e.preventDefault();
    stopRecognition();
  };

  const handleLangchainReset = async (e) => {
    e.preventDefault();
    await langchainSocket.current.close();
    setCurrentEventId("");
    setLangchainSocketReady(false);
  };


  useEffect(() => {
    if (avatarStarted && langchainSocket.current.readyState === WebSocket.OPEN) {
      avatarIntroduction()
    };

  }, [avatarStarted, avatarIntroduction]);


  // Show the events modal if there are events available.
  useEffect(() => {
    if (events.length > 0) {
      setIsModalOpen(true);
    };

  }, [events]);

  // Set the event ID for the Langchain WebSocket once the eventId is set.
  useEffect(() => {
    if (langchainSocket.current && langchainSocket.current.readyState === WebSocket.OPEN) {
      langchainSocket.current.send(JSON.stringify({ type: "buildAgent", eventId: currentEventId }));
    };
  }, [currentEventId]);

  useEffect(() => {
    if (!langchainSocketReady) {
      startLangchainSession();
    };
  }, [startLangchainSession, langchainSocketReady]);

  // Set Avatar configuration and begin establish the backend websocket connection
  useEffect(() => {
    try {
      // Languages for Azure Recognizer
      const supportedLanguages = ["en-US", "es-US", "de-DE", "zh-CN", "ar-AE", "ja-JP", "pt-BR", "fr-CA"];

      // Text-to-Speech
      speechSynthesisConfig.current = SpeechSDK.SpeechConfig.fromSubscription(process.env.REACT_APP_SPEECH_KEY, process.env.REACT_APP_SPEECH_REGION);
      speechSynthesisConfig.current.speechSynthesisVoiceName = avatarVoice.current;
      speechSynthesisConfig.current.speechSynthesisLanguage = "en-US";


      // Set up the avatar to crop the video feed to fit into a portrait mode
      avatarVideoFormat.current = new SpeechSDK.AvatarVideoFormat();
      avatarVideoFormat.current.setCropRange(new SpeechSDK.Coordinate(600, 0), new SpeechSDK.Coordinate(1320, 1080));

      // You can change the avatar here as well as the position of the avatar
      avatarConfig.current = new SpeechSDK.AvatarConfig(avatarSelection.current.character, avatarSelection.current.style, avatarVideoFormat.current);

      // Set the background color of the avatar to green screen
      avatarConfig.current.subtitleType = "soft_embedded";
      avatarConfig.current.backgroundColor = "#00FF00FF";

      // Set up the Avatar Synthesizer and Azure Speech Recognizer
      avatarSynthesizer.current = new SpeechSDK.AvatarSynthesizer(speechSynthesisConfig.current, avatarConfig.current);

      speechRecognitionConfig.current = SpeechSDK.SpeechConfig.fromSubscription(process.env.REACT_APP_SPEECH_KEY, process.env.REACT_APP_SPEECH_REGION);

      autoDetectSourceLanguageConfig.current = SpeechSDK.AutoDetectSourceLanguageConfig.fromLanguages(supportedLanguages);

      audioConfig.current = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();

      speechRecognitionConfig.current.setProperty(SpeechSDK.PropertyId.SpeechServiceConnection_LanguageIdMode, "Continuous");

      speechRecognizer.current = SpeechSDK.SpeechRecognizer.FromConfig(speechRecognitionConfig.current, autoDetectSourceLanguageConfig.current, audioConfig.current);

      //TODO: Need to figure out why the recognizer passes the word "Play" when continuous listening is enabled.
      speechRecognizer.current.recognized = (s, e) => {
        if (e.result.reason === SpeechSDK.ResultReason.RecognizedSpeech) {
          setIsListening(false);
          // Trim whitespace
          let userQuery = e.result.text.trim();

          // Return if user query is blank or picks up something it cannot recognize.
          if (userQuery === "" || e.result.text.includes("undefined")) {
            return;
          };

          // Send recognized text to API / Langchain
          setAvatarCaption({ role: "user", content: userQuery });
          queryLangchainAgent(userQuery);
        } else {
          console.log(e.result.reason);
        };
      };

      speechRecognizer.current.canceled = (s, e) => {
        setIsListening(false);
      };
    } catch (err) {
      console.error(err);
    };
  }, [queryLangchainAgent, speak, avatarSelection.current.character, avatarSelection.current.style, avatarVoice.current]);

  /*

  This function will run every 30 seconds to send an idle response to the Langchain WebSocket preventing from
  the LangChain socket and the Azure Avatar from disconnecting.

  We want to avoid from playing an idle response if there is an active conversation going on.
  This will prevent the idle response from interrupting the conversation.

*/
  const runEveryThirtySeconds = useCallback(() => {
    const now = Date.now();
    if (avatarStarted && !avatarSpeaking && !isThinking && !isListening && langchainSocket.current.readyState === WebSocket.OPEN) {

      if (!lastSpeakingEvent || (now - lastSpeakingEvent) > 30000) {
        langchainSocket.current.send(JSON.stringify({ type: "idleResponse" }));
      };
    };
  }, [avatarSpeaking, isThinking, isListening, lastSpeakingEvent, avatarStarted]);

  useEffect(() => {
    const intervalId = setInterval(runEveryThirtySeconds, 30000);
    return () => clearInterval(intervalId);
  }, [runEveryThirtySeconds]);


  return (
    <div className="flex flex-col dark text-foreground z-0 items-center justify-center h-screen min-h-screen bg-stone-900">
      <div className="overflow-hidden h-[1080px] w-[720px] portrait:h-full portrait:w-full">
        <div className="flex items-center justify-center overflow-hidden h-full w-full">
          <ConfigModal
            isOpen={isModalOpen}
            onClose={() => setIsModalOpen(false)}
            onSave={handleSaveEventId}
            events={events}
            currentEventId={currentEventId}
            continuousListening={continuousListening}
            handleContinuousListening={handleContinuousListening} />
          <div
            id="canvasContainer"
            style={{ backgroundImage: `url(${avatarBackground})` }}
            className="h-full w-full bg-no-repeat bg-cover relative overflow-hidden flex items-center justify-center">
            <div id="remoteVideo" ref={removeVideoDiv} className="h-full w-full">
              <video id="video" ref={videoRef} autoPlay playsInline className="h-full w-full"></video>
              <audio id="remoteAudio" ref={audioRef} autoPlay></audio>
            </div>
            <canvas
              id="canvas"
              hidden
              ref={canvasRef}
              width={720}
              height={1080}
            ></canvas>
            <canvas
              id="tmpCanvas"
              hidden
              width={720}
              height={1080}
              ref={tmpCanvasRef}
            ></canvas>
            {isListening && !isThinking && <Listening />}
            {isThinking && !isListening && <Thinking />}
            {avatarCaption && <Subtitle message={avatarCaption} />}
            <AvatarControls
              handleStartAvatarButton={handleStartAvatarButton}
              handleStartRecognitionButton={handleStartRecognitionButton}
              handleStopSpeakingButton={handleStopSpeakingButton}
              handleStopRecognitionButton={handleStopRecognitionButton}
              handleResetLangchain={handleLangchainReset}
              avatarSpeaking={avatarSpeaking}
              avatarStarted={avatarStarted}
              isListening={isListening}
              langchainSocketReady={langchainSocketReady}
              avatarLoading={avatarLoading}
            />
          </div>
        </div>
      </div>
    </div>
  );
}
export default App;
