Switch audio/video devices while in call

Enable the menus to switch audio/video devices.
Add connectionstatechange webRTCConnection listener to set the connected
status.

GitLab: #146
Change-Id: Ic3afbdee2b1a6bf312d3d7d902adb3c103a7d26f
diff --git a/client/src/contexts/CallProvider.tsx b/client/src/contexts/CallProvider.tsx
index c32704b..fdb6935 100644
--- a/client/src/contexts/CallProvider.tsx
+++ b/client/src/contexts/CallProvider.tsx
@@ -16,7 +16,7 @@
  * <https://www.gnu.org/licenses/>.
  */
 import { CallAction, CallBegin, WebSocketMessageType } from 'jami-web-common';
-import { createContext, useCallback, useContext, useEffect, useMemo, useState } from 'react';
+import { createContext, MutableRefObject, useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react';
 import { Navigate, useNavigate } from 'react-router-dom';
 
 import LoadingPage from '../components/Loading';
@@ -26,7 +26,7 @@
 import { callTimeoutMs } from '../utils/constants';
 import { SetState, WithChildren } from '../utils/utils';
 import { ConversationContext } from './ConversationProvider';
-import { WebRtcContext } from './WebRtcProvider';
+import { MediaDevicesInfo, MediaInputKind, WebRtcContext } from './WebRtcProvider';
 import { IWebSocketContext, WebSocketContext } from './WebSocketProvider';
 
 export type CallRole = 'caller' | 'receiver';
@@ -40,7 +40,30 @@
   PermissionsDenied,
 }
 
+type MediaDeviceIdState = {
+  id: string | undefined;
+  setId: (id: string | undefined) => void | Promise<void>;
+};
+type CurrentMediaDeviceIds = Record<MediaDeviceKind, MediaDeviceIdState>;
+
+/**
+ * HTMLVideoElement with the `sinkId` and `setSinkId` optional properties.
+ *
+ * These properties are defined only on supported browsers
+ * https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement/setSinkId#browser_compatibility
+ */
+interface VideoElementWithSinkId extends HTMLVideoElement {
+  sinkId?: string;
+  setSinkId?: (deviceId: string) => void;
+}
+
 export interface ICallContext {
+  mediaDevices: MediaDevicesInfo;
+  currentMediaDeviceIds: CurrentMediaDeviceIds;
+
+  localVideoRef: MutableRefObject<VideoElementWithSinkId | null>;
+  remoteVideoRef: MutableRefObject<VideoElementWithSinkId | null>;
+
   isAudioOn: boolean;
   setIsAudioOn: SetState<boolean>;
   isVideoOn: boolean;
@@ -58,6 +81,29 @@
 }
 
 const defaultCallContext: ICallContext = {
+  mediaDevices: {
+    audioinput: [],
+    audiooutput: [],
+    videoinput: [],
+  },
+  currentMediaDeviceIds: {
+    audioinput: {
+      id: undefined,
+      setId: async () => {},
+    },
+    audiooutput: {
+      id: undefined,
+      setId: async () => {},
+    },
+    videoinput: {
+      id: undefined,
+      setId: async () => {},
+    },
+  },
+
+  localVideoRef: { current: null },
+  remoteVideoRef: { current: null },
+
   isAudioOn: false,
   setIsAudioOn: () => {},
   isVideoOn: false,
@@ -93,10 +139,19 @@
   webSocket: IWebSocketContext;
 }) => {
   const { state: routeState } = useUrlParams<CallRouteParams>();
-  const { localStream, sendWebRtcOffer, iceConnectionState, closeConnection, getUserMedia } = useContext(WebRtcContext);
+  const { localStream, sendWebRtcOffer, iceConnectionState, closeConnection, getMediaDevices, updateLocalStream } =
+    useContext(WebRtcContext);
   const { conversationId, conversation } = useContext(ConversationContext);
   const navigate = useNavigate();
 
+  const localVideoRef = useRef<HTMLVideoElement | null>(null);
+  const remoteVideoRef = useRef<HTMLVideoElement | null>(null);
+
+  const [mediaDevices, setMediaDevices] = useState(defaultCallContext.mediaDevices);
+  const [audioInputDeviceId, setAudioInputDeviceId] = useState<string>();
+  const [audioOutputDeviceId, setAudioOutputDeviceId] = useState<string>();
+  const [videoDeviceId, setVideoDeviceId] = useState<string>();
+
   const [isAudioOn, setIsAudioOn] = useState(false);
   const [isVideoOn, setIsVideoOn] = useState(false);
   const [isChatShown, setIsChatShown] = useState(false);
@@ -111,9 +166,40 @@
   const contactUri = useMemo(() => conversation.getFirstMember().contact.getUri(), [conversation]);
 
   useEffect(() => {
+    if (callStatus !== CallStatus.InCall) {
+      return;
+    }
+
+    const updateMediaDevices = async () => {
+      try {
+        const newMediaDevices = await getMediaDevices();
+
+        if (newMediaDevices.audiooutput.length !== 0 && !audioOutputDeviceId) {
+          setAudioOutputDeviceId(newMediaDevices.audiooutput[0].deviceId);
+        }
+
+        setMediaDevices(newMediaDevices);
+      } catch (e) {
+        console.error('Could not update media devices:', e);
+      }
+    };
+
+    navigator.mediaDevices.addEventListener('devicechange', updateMediaDevices);
+    updateMediaDevices();
+
+    return () => {
+      navigator.mediaDevices.removeEventListener('devicechange', updateMediaDevices);
+    };
+  }, [callStatus, getMediaDevices, audioOutputDeviceId]);
+
+  useEffect(() => {
     if (localStream) {
       for (const track of localStream.getAudioTracks()) {
         track.enabled = isAudioOn;
+        const deviceId = track.getSettings().deviceId;
+        if (deviceId) {
+          setAudioInputDeviceId(deviceId);
+        }
       }
     }
   }, [isAudioOn, localStream]);
@@ -122,6 +208,10 @@
     if (localStream) {
       for (const track of localStream.getVideoTracks()) {
         track.enabled = isVideoOn;
+        const deviceId = track.getSettings().deviceId;
+        if (deviceId) {
+          setVideoDeviceId(deviceId);
+        }
       }
     }
   }, [isVideoOn, localStream]);
@@ -139,17 +229,18 @@
 
   useEffect(() => {
     if (callRole === 'caller' && callStatus === CallStatus.Default) {
+      const withVideoOn = routeState?.isVideoOn ?? false;
       setCallStatus(CallStatus.Loading);
-      getUserMedia()
+      updateLocalStream()
         .then(() => {
           const callBegin: CallBegin = {
             contactId: contactUri,
             conversationId,
-            withVideoOn: routeState?.isVideoOn ?? false,
+            withVideoOn,
           };
 
           setCallStatus(CallStatus.Ringing);
-          setIsVideoOn(routeState?.isVideoOn ?? false);
+          setIsVideoOn(withVideoOn);
           console.info('Sending CallBegin', callBegin);
           webSocket.send(WebSocketMessageType.CallBegin, callBegin);
         })
@@ -158,12 +249,12 @@
           setCallStatus(CallStatus.PermissionsDenied);
         });
     }
-  }, [webSocket, getUserMedia, callRole, callStatus, contactUri, conversationId, routeState]);
+  }, [webSocket, updateLocalStream, callRole, callStatus, contactUri, conversationId, routeState]);
 
   const acceptCall = useCallback(
     (withVideoOn: boolean) => {
       setCallStatus(CallStatus.Loading);
-      getUserMedia()
+      updateLocalStream()
         .then(() => {
           const callAccept: CallAction = {
             contactId: contactUri,
@@ -180,7 +271,7 @@
           setCallStatus(CallStatus.PermissionsDenied);
         });
     },
-    [webSocket, getUserMedia, contactUri, conversationId]
+    [webSocket, updateLocalStream, contactUri, conversationId]
   );
 
   useEffect(() => {
@@ -268,6 +359,34 @@
     };
   }, [callStatus, endCall]);
 
+  const currentMediaDeviceIds: CurrentMediaDeviceIds = useMemo(() => {
+    const createSetIdForDeviceKind = (mediaInputKind: MediaInputKind) => async (id: string | undefined) => {
+      const mediaDeviceIds = {
+        audio: audioInputDeviceId,
+        video: videoDeviceId,
+      };
+
+      mediaDeviceIds[mediaInputKind] = id;
+
+      await updateLocalStream(mediaDeviceIds);
+    };
+
+    return {
+      audioinput: {
+        id: audioInputDeviceId,
+        setId: createSetIdForDeviceKind('audio'),
+      },
+      audiooutput: {
+        id: audioOutputDeviceId,
+        setId: setAudioOutputDeviceId,
+      },
+      videoinput: {
+        id: videoDeviceId,
+        setId: createSetIdForDeviceKind('video'),
+      },
+    };
+  }, [updateLocalStream, audioInputDeviceId, audioOutputDeviceId, videoDeviceId]);
+
   useEffect(() => {
     navigate('.', {
       replace: true,
@@ -283,6 +402,10 @@
   return (
     <CallContext.Provider
       value={{
+        mediaDevices,
+        currentMediaDeviceIds,
+        localVideoRef,
+        remoteVideoRef,
         isAudioOn,
         setIsAudioOn,
         isVideoOn,
diff --git a/client/src/contexts/WebRtcProvider.tsx b/client/src/contexts/WebRtcProvider.tsx
index 9bf7a9a..4cd6263 100644
--- a/client/src/contexts/WebRtcProvider.tsx
+++ b/client/src/contexts/WebRtcProvider.tsx
@@ -25,13 +25,17 @@
 import { ConversationContext } from './ConversationProvider';
 import { IWebSocketContext, WebSocketContext } from './WebSocketProvider';
 
+export type MediaDevicesInfo = Record<MediaDeviceKind, MediaDeviceInfo[]>;
+export type MediaInputKind = 'audio' | 'video';
+export type MediaInputIds = Record<MediaInputKind, string | false | undefined>;
+
 interface IWebRtcContext {
   iceConnectionState: RTCIceConnectionState | undefined;
 
-  mediaDevices: Record<MediaDeviceKind, MediaDeviceInfo[]>;
   localStream: MediaStream | undefined;
   remoteStreams: readonly MediaStream[] | undefined;
-  getUserMedia: () => Promise<void>;
+  getMediaDevices: () => Promise<MediaDevicesInfo>;
+  updateLocalStream: (mediaDeviceIds?: MediaInputIds) => Promise<void>;
 
   sendWebRtcOffer: () => Promise<void>;
   closeConnection: () => void;
@@ -39,15 +43,11 @@
 
 const defaultWebRtcContext: IWebRtcContext = {
   iceConnectionState: undefined,
-  mediaDevices: {
-    audioinput: [],
-    audiooutput: [],
-    videoinput: [],
-  },
   localStream: undefined,
   remoteStreams: undefined,
-  getUserMedia: async () => {},
-  sendWebRtcOffer: async () => {},
+  getMediaDevices: async () => Promise.reject(),
+  updateLocalStream: async () => Promise.reject(),
+  sendWebRtcOffer: async () => Promise.reject(),
   closeConnection: () => {},
 };
 
@@ -103,9 +103,9 @@
   const [localStream, setLocalStream] = useState<MediaStream>();
   const [remoteStreams, setRemoteStreams] = useState<readonly MediaStream[]>();
   const [iceConnectionState, setIceConnectionState] = useState<RTCIceConnectionState | undefined>();
-  const [mediaDevices, setMediaDevices] = useState<Record<MediaDeviceKind, MediaDeviceInfo[]>>(
-    defaultWebRtcContext.mediaDevices
-  );
+
+  const [audioRtcRtpSenders, setAudioRtcRtpSenders] = useState<RTCRtpSender[]>();
+  const [videoRtcRtpSenders, setVideoRtcRtpSenders] = useState<RTCRtpSender[]>();
 
   // TODO: The ICE candidate queue is used to cache candidates that were received before `setRemoteDescription` was
   //       called. This is currently necessary, because the jami-daemon is unreliable as a WebRTC signaling channel,
@@ -120,73 +120,107 @@
   // TODO: This logic will have to change to support multiple people in a call
   const contactUri = useMemo(() => conversation.getFirstMember().contact.getUri(), [conversation]);
 
-  const getMediaDevices = useCallback(async () => {
+  const getMediaDevices = useCallback(async (): Promise<MediaDevicesInfo> => {
     try {
       const devices = await navigator.mediaDevices.enumerateDevices();
-      const newMediaDevices: Record<MediaDeviceKind, MediaDeviceInfo[]> = {
-        audioinput: [],
-        audiooutput: [],
-        videoinput: [],
+
+      // TODO: On Firefox, some devices can sometime be duplicated (2 devices can share the same deviceId). Using a map
+      //       and then converting it to an array makes it so that there is no duplicate. If we find a way to prevent
+      //       Firefox from listing 2 devices with the same deviceId, we can remove this logic.
+      const newMediaDevices: Record<MediaDeviceKind, Record<string, MediaDeviceInfo>> = {
+        audioinput: {},
+        audiooutput: {},
+        videoinput: {},
       };
 
       for (const device of devices) {
-        newMediaDevices[device.kind].push(device);
+        newMediaDevices[device.kind][device.deviceId] = device;
       }
 
-      return newMediaDevices;
+      return {
+        audioinput: Object.values(newMediaDevices.audioinput),
+        audiooutput: Object.values(newMediaDevices.audiooutput),
+        videoinput: Object.values(newMediaDevices.videoinput),
+      };
     } catch (e) {
       throw new Error('Could not get media devices', { cause: e });
     }
   }, []);
 
-  useEffect(() => {
-    if (iceConnectionState !== 'connected' && iceConnectionState !== 'completed') {
-      return;
-    }
+  const updateLocalStream = useCallback(
+    async (mediaDeviceIds?: MediaInputIds) => {
+      const devices = await getMediaDevices();
 
-    const updateMediaDevices = async () => {
+      let audioConstraint: MediaTrackConstraints | boolean = devices.audioinput.length !== 0;
+      let videoConstraint: MediaTrackConstraints | boolean = devices.videoinput.length !== 0;
+
+      if (!audioConstraint && !videoConstraint) {
+        return;
+      }
+
+      if (mediaDeviceIds?.audio !== undefined) {
+        audioConstraint = mediaDeviceIds.audio !== false ? { deviceId: mediaDeviceIds.audio } : false;
+      }
+      if (mediaDeviceIds?.video !== undefined) {
+        videoConstraint = mediaDeviceIds.video !== false ? { deviceId: mediaDeviceIds.video } : false;
+      }
+
       try {
-        const newMediaDevices = await getMediaDevices();
-        setMediaDevices(newMediaDevices);
+        const stream = await navigator.mediaDevices.getUserMedia({
+          audio: audioConstraint,
+          video: videoConstraint,
+        });
+
+        for (const track of stream.getTracks()) {
+          track.enabled = false;
+        }
+
+        setLocalStream(stream);
       } catch (e) {
-        console.error('Could not update media devices:', e);
+        throw new Error('Could not get media devices', { cause: e });
       }
-    };
+    },
+    [getMediaDevices]
+  );
 
-    navigator.mediaDevices.addEventListener('devicechange', updateMediaDevices);
-    updateMediaDevices();
-
-    return () => {
-      navigator.mediaDevices.removeEventListener('devicechange', updateMediaDevices);
-    };
-  }, [getMediaDevices, iceConnectionState]);
-
-  const getUserMedia = useCallback(async () => {
-    const devices = await getMediaDevices();
-
-    const shouldGetAudio = devices.audioinput.length !== 0;
-    const shouldGetVideo = devices.videoinput.length !== 0;
-
-    if (!shouldGetAudio && !shouldGetVideo) {
+  useEffect(() => {
+    if (!localStream || !webRtcConnection) {
       return;
     }
 
-    try {
-      const stream = await navigator.mediaDevices.getUserMedia({
-        audio: shouldGetAudio,
-        video: shouldGetVideo,
-      });
-
-      for (const track of stream.getTracks()) {
-        track.enabled = false;
-        webRtcConnection.addTrack(track, stream);
+    const updateTracks = async (kind: 'audio' | 'video') => {
+      const senders = kind === 'audio' ? audioRtcRtpSenders : videoRtcRtpSenders;
+      const tracks = kind === 'audio' ? localStream.getAudioTracks() : localStream.getVideoTracks();
+      if (senders) {
+        const promises: Promise<void>[] = [];
+        for (let i = 0; i < senders.length; i++) {
+          // TODO: There is a bug where calling multiple times `addTrack` when changing an input device doesn't work.
+          //       Calling `addTrack` doesn't trigger the `track` event listener for the other user.
+          //       This workaround makes it possible to replace a track, but it could be improved by figuring out the
+          //       proper way of changing a track.
+          promises.push(
+            senders[i].replaceTrack(tracks[i]).catch((e) => {
+              console.error('Error replacing track:', e);
+            })
+          );
+        }
+        return Promise.all(promises);
       }
 
-      setLocalStream(stream);
-    } catch (e) {
-      throw new Error('Could not get media devices', { cause: e });
-    }
-  }, [webRtcConnection, getMediaDevices]);
+      // TODO: Currently, we do not support adding new devices. To enable this feature, we would need to implement
+      //       the "Perfect negotiation" pattern to renegotiate after `addTrack`.
+      //       https://blog.mozilla.org/webrtc/perfect-negotiation-in-webrtc/
+      const newSenders = tracks.map((track) => webRtcConnection.addTrack(track, localStream));
+      if (kind === 'audio') {
+        setAudioRtcRtpSenders(newSenders);
+      } else {
+        setVideoRtcRtpSenders(newSenders);
+      }
+    };
+
+    updateTracks('audio');
+    updateTracks('video');
+  }, [localStream, webRtcConnection, audioRtcRtpSenders, videoRtcRtpSenders]);
 
   const sendWebRtcOffer = useCallback(async () => {
     const sdp = await webRtcConnection.createOffer({
@@ -229,7 +263,11 @@
       console.info('WebRTC remote description has been set. Ready to receive ICE candidates');
       setIsReadyForIceCandidates(true);
       if (iceCandidateQueue.length !== 0) {
-        console.warn('Adding queued ICE candidates...', iceCandidateQueue);
+        console.warn(
+          'Found queued ICE candidates that were added before `setRemoteDescription` was called. ' +
+            'Adding queued ICE candidates...',
+          iceCandidateQueue
+        );
 
         await Promise.all(iceCandidateQueue.map((iceCandidate) => webRtcConnection.addIceCandidate(iceCandidate)));
       }
@@ -281,10 +319,6 @@
       if (isReadyForIceCandidates) {
         await webRtcConnection.addIceCandidate(data.candidate);
       } else {
-        console.warn(
-          "Received event on WebRtcIceCandidate before 'setRemoteDescription' was called. Pushing to ICE candidates queue...",
-          data
-        );
         setIceCandidateQueue((v) => {
           v.push(data.candidate);
           return v;
@@ -355,10 +389,10 @@
     <WebRtcContext.Provider
       value={{
         iceConnectionState,
-        mediaDevices,
         localStream,
         remoteStreams,
-        getUserMedia,
+        getMediaDevices,
+        updateLocalStream,
         sendWebRtcOffer,
         closeConnection,
       }}