Switch audio/video devices while in call

Enable the menus to switch audio/video devices.
Add connectionstatechange webRTCConnection listener to set the connected
status.

GitLab: #146
Change-Id: Ic3afbdee2b1a6bf312d3d7d902adb3c103a7d26f
diff --git a/client/src/components/Button.tsx b/client/src/components/Button.tsx
index a27028a..a89726c 100644
--- a/client/src/components/Button.tsx
+++ b/client/src/components/Button.tsx
@@ -19,6 +19,7 @@
 import {
   Box,
   ClickAwayListener,
+  FormControlLabel,
   IconButton,
   IconButtonProps,
   ListItemIcon,
@@ -28,6 +29,7 @@
   Popper,
   Radio,
   RadioGroup,
+  RadioGroupProps,
   SvgIconProps,
 } from '@mui/material';
 import { styled } from '@mui/material/styles';
@@ -97,12 +99,11 @@
   icon?: ReactNode;
 };
 
-export type ExpandMenuRadioOption = {
+export type ExpandMenuRadioOption = RadioGroupProps & {
   options: {
     key: string;
     description: ReactNode;
   }[];
-  defaultSelectedOption?: string;
 };
 
 export type ExpandableButtonProps = IconButtonProps & {
@@ -142,19 +143,21 @@
         >
           {expandMenuOptions?.map((option, id) => {
             if ('options' in option) {
-              const { options, defaultSelectedOption } = option;
+              const { options, ...radioGroupProps } = option;
               return (
-                <RadioGroup key={id} defaultValue={defaultSelectedOption}>
-                  {options.map(({ description, key }) => {
-                    return (
-                      <MenuItem key={key}>
-                        <ListItemIcon>
-                          <Radio value={key} />
-                        </ListItemIcon>
-                        <ListItemText>{description}</ListItemText>
-                      </MenuItem>
-                    );
-                  })}
+                <RadioGroup key={id} {...radioGroupProps}>
+                  {options.map(({ description, key }, i) => (
+                    <MenuItem key={i}>
+                      <FormControlLabel
+                        value={key}
+                        control={<Radio value={key} />}
+                        label={<ListItemText>{description}</ListItemText>}
+                        sx={{
+                          width: '100%',
+                        }}
+                      />
+                    </MenuItem>
+                  ))}
                 </RadioGroup>
               );
             }
diff --git a/client/src/components/CallButtons.tsx b/client/src/components/CallButtons.tsx
index 6f53b73..2ecc4fc 100644
--- a/client/src/components/CallButtons.tsx
+++ b/client/src/components/CallButtons.tsx
@@ -18,11 +18,10 @@
 
 import { IconButton, IconButtonProps, PaletteColor } from '@mui/material';
 import { styled, Theme } from '@mui/material/styles';
-import { useContext, useMemo } from 'react';
+import { ChangeEvent, useContext, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 
 import { CallContext, CallStatus } from '../contexts/CallProvider';
-import { WebRtcContext } from '../contexts/WebRtcProvider';
 import {
   ExpandableButton,
   ExpandableButtonProps,
@@ -178,7 +177,7 @@
 };
 
 const useMediaDeviceExpandMenuOptions = (kind: MediaDeviceKind): ExpandMenuRadioOption[] | undefined => {
-  const { mediaDevices } = useContext(WebRtcContext);
+  const { currentMediaDeviceIds, mediaDevices } = useContext(CallContext);
 
   const options = useMemo(
     () =>
@@ -189,13 +188,38 @@
     [mediaDevices, kind]
   );
 
-  return options.length > 0 ? [{ options }] : undefined;
+  const currentDevice = currentMediaDeviceIds[kind];
+
+  if (options.length === 0) {
+    return undefined;
+  }
+  return [
+    {
+      options,
+      value: currentDevice.id ?? '',
+      onChange: (e: ChangeEvent<HTMLInputElement>) => {
+        currentDevice.setId(e.target.value);
+      },
+    },
+  ];
 };
 
 export const CallingVolumeButton = (props: ExpandableButtonProps) => {
   const options = useMediaDeviceExpandMenuOptions('audiooutput');
+  const { remoteVideoRef } = useContext(CallContext);
 
-  return <CallButton aria-label="volume options" Icon={VolumeIcon} expandMenuOptions={options} {...props} />;
+  // Audio out options are only available on chrome and other browsers that support `setSinkId`
+  // https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement/setSinkId#browser_compatibility
+  const hasSetSinkId = remoteVideoRef.current?.setSinkId != null;
+
+  return (
+    <CallButton
+      aria-label="volume options"
+      Icon={VolumeIcon}
+      expandMenuOptions={hasSetSinkId ? options : undefined}
+      {...props}
+    />
+  );
 };
 
 export const CallingMicButton = (props: ExpandableButtonProps) => {
diff --git a/client/src/contexts/CallProvider.tsx b/client/src/contexts/CallProvider.tsx
index c32704b..fdb6935 100644
--- a/client/src/contexts/CallProvider.tsx
+++ b/client/src/contexts/CallProvider.tsx
@@ -16,7 +16,7 @@
  * <https://www.gnu.org/licenses/>.
  */
 import { CallAction, CallBegin, WebSocketMessageType } from 'jami-web-common';
-import { createContext, useCallback, useContext, useEffect, useMemo, useState } from 'react';
+import { createContext, MutableRefObject, useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react';
 import { Navigate, useNavigate } from 'react-router-dom';
 
 import LoadingPage from '../components/Loading';
@@ -26,7 +26,7 @@
 import { callTimeoutMs } from '../utils/constants';
 import { SetState, WithChildren } from '../utils/utils';
 import { ConversationContext } from './ConversationProvider';
-import { WebRtcContext } from './WebRtcProvider';
+import { MediaDevicesInfo, MediaInputKind, WebRtcContext } from './WebRtcProvider';
 import { IWebSocketContext, WebSocketContext } from './WebSocketProvider';
 
 export type CallRole = 'caller' | 'receiver';
@@ -40,7 +40,30 @@
   PermissionsDenied,
 }
 
+type MediaDeviceIdState = {
+  id: string | undefined;
+  setId: (id: string | undefined) => void | Promise<void>;
+};
+type CurrentMediaDeviceIds = Record<MediaDeviceKind, MediaDeviceIdState>;
+
+/**
+ * HTMLVideoElement with the `sinkId` and `setSinkId` optional properties.
+ *
+ * These properties are defined only on supported browsers
+ * https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement/setSinkId#browser_compatibility
+ */
+interface VideoElementWithSinkId extends HTMLVideoElement {
+  sinkId?: string;
+  setSinkId?: (deviceId: string) => void;
+}
+
 export interface ICallContext {
+  mediaDevices: MediaDevicesInfo;
+  currentMediaDeviceIds: CurrentMediaDeviceIds;
+
+  localVideoRef: MutableRefObject<VideoElementWithSinkId | null>;
+  remoteVideoRef: MutableRefObject<VideoElementWithSinkId | null>;
+
   isAudioOn: boolean;
   setIsAudioOn: SetState<boolean>;
   isVideoOn: boolean;
@@ -58,6 +81,29 @@
 }
 
 const defaultCallContext: ICallContext = {
+  mediaDevices: {
+    audioinput: [],
+    audiooutput: [],
+    videoinput: [],
+  },
+  currentMediaDeviceIds: {
+    audioinput: {
+      id: undefined,
+      setId: async () => {},
+    },
+    audiooutput: {
+      id: undefined,
+      setId: async () => {},
+    },
+    videoinput: {
+      id: undefined,
+      setId: async () => {},
+    },
+  },
+
+  localVideoRef: { current: null },
+  remoteVideoRef: { current: null },
+
   isAudioOn: false,
   setIsAudioOn: () => {},
   isVideoOn: false,
@@ -93,10 +139,19 @@
   webSocket: IWebSocketContext;
 }) => {
   const { state: routeState } = useUrlParams<CallRouteParams>();
-  const { localStream, sendWebRtcOffer, iceConnectionState, closeConnection, getUserMedia } = useContext(WebRtcContext);
+  const { localStream, sendWebRtcOffer, iceConnectionState, closeConnection, getMediaDevices, updateLocalStream } =
+    useContext(WebRtcContext);
   const { conversationId, conversation } = useContext(ConversationContext);
   const navigate = useNavigate();
 
+  const localVideoRef = useRef<HTMLVideoElement | null>(null);
+  const remoteVideoRef = useRef<HTMLVideoElement | null>(null);
+
+  const [mediaDevices, setMediaDevices] = useState(defaultCallContext.mediaDevices);
+  const [audioInputDeviceId, setAudioInputDeviceId] = useState<string>();
+  const [audioOutputDeviceId, setAudioOutputDeviceId] = useState<string>();
+  const [videoDeviceId, setVideoDeviceId] = useState<string>();
+
   const [isAudioOn, setIsAudioOn] = useState(false);
   const [isVideoOn, setIsVideoOn] = useState(false);
   const [isChatShown, setIsChatShown] = useState(false);
@@ -111,9 +166,40 @@
   const contactUri = useMemo(() => conversation.getFirstMember().contact.getUri(), [conversation]);
 
   useEffect(() => {
+    if (callStatus !== CallStatus.InCall) {
+      return;
+    }
+
+    const updateMediaDevices = async () => {
+      try {
+        const newMediaDevices = await getMediaDevices();
+
+        if (newMediaDevices.audiooutput.length !== 0 && !audioOutputDeviceId) {
+          setAudioOutputDeviceId(newMediaDevices.audiooutput[0].deviceId);
+        }
+
+        setMediaDevices(newMediaDevices);
+      } catch (e) {
+        console.error('Could not update media devices:', e);
+      }
+    };
+
+    navigator.mediaDevices.addEventListener('devicechange', updateMediaDevices);
+    updateMediaDevices();
+
+    return () => {
+      navigator.mediaDevices.removeEventListener('devicechange', updateMediaDevices);
+    };
+  }, [callStatus, getMediaDevices, audioOutputDeviceId]);
+
+  useEffect(() => {
     if (localStream) {
       for (const track of localStream.getAudioTracks()) {
         track.enabled = isAudioOn;
+        const deviceId = track.getSettings().deviceId;
+        if (deviceId) {
+          setAudioInputDeviceId(deviceId);
+        }
       }
     }
   }, [isAudioOn, localStream]);
@@ -122,6 +208,10 @@
     if (localStream) {
       for (const track of localStream.getVideoTracks()) {
         track.enabled = isVideoOn;
+        const deviceId = track.getSettings().deviceId;
+        if (deviceId) {
+          setVideoDeviceId(deviceId);
+        }
       }
     }
   }, [isVideoOn, localStream]);
@@ -139,17 +229,18 @@
 
   useEffect(() => {
     if (callRole === 'caller' && callStatus === CallStatus.Default) {
+      const withVideoOn = routeState?.isVideoOn ?? false;
       setCallStatus(CallStatus.Loading);
-      getUserMedia()
+      updateLocalStream()
         .then(() => {
           const callBegin: CallBegin = {
             contactId: contactUri,
             conversationId,
-            withVideoOn: routeState?.isVideoOn ?? false,
+            withVideoOn,
           };
 
           setCallStatus(CallStatus.Ringing);
-          setIsVideoOn(routeState?.isVideoOn ?? false);
+          setIsVideoOn(withVideoOn);
           console.info('Sending CallBegin', callBegin);
           webSocket.send(WebSocketMessageType.CallBegin, callBegin);
         })
@@ -158,12 +249,12 @@
           setCallStatus(CallStatus.PermissionsDenied);
         });
     }
-  }, [webSocket, getUserMedia, callRole, callStatus, contactUri, conversationId, routeState]);
+  }, [webSocket, updateLocalStream, callRole, callStatus, contactUri, conversationId, routeState]);
 
   const acceptCall = useCallback(
     (withVideoOn: boolean) => {
       setCallStatus(CallStatus.Loading);
-      getUserMedia()
+      updateLocalStream()
         .then(() => {
           const callAccept: CallAction = {
             contactId: contactUri,
@@ -180,7 +271,7 @@
           setCallStatus(CallStatus.PermissionsDenied);
         });
     },
-    [webSocket, getUserMedia, contactUri, conversationId]
+    [webSocket, updateLocalStream, contactUri, conversationId]
   );
 
   useEffect(() => {
@@ -268,6 +359,34 @@
     };
   }, [callStatus, endCall]);
 
+  const currentMediaDeviceIds: CurrentMediaDeviceIds = useMemo(() => {
+    const createSetIdForDeviceKind = (mediaInputKind: MediaInputKind) => async (id: string | undefined) => {
+      const mediaDeviceIds = {
+        audio: audioInputDeviceId,
+        video: videoDeviceId,
+      };
+
+      mediaDeviceIds[mediaInputKind] = id;
+
+      await updateLocalStream(mediaDeviceIds);
+    };
+
+    return {
+      audioinput: {
+        id: audioInputDeviceId,
+        setId: createSetIdForDeviceKind('audio'),
+      },
+      audiooutput: {
+        id: audioOutputDeviceId,
+        setId: setAudioOutputDeviceId,
+      },
+      videoinput: {
+        id: videoDeviceId,
+        setId: createSetIdForDeviceKind('video'),
+      },
+    };
+  }, [updateLocalStream, audioInputDeviceId, audioOutputDeviceId, videoDeviceId]);
+
   useEffect(() => {
     navigate('.', {
       replace: true,
@@ -283,6 +402,10 @@
   return (
     <CallContext.Provider
       value={{
+        mediaDevices,
+        currentMediaDeviceIds,
+        localVideoRef,
+        remoteVideoRef,
         isAudioOn,
         setIsAudioOn,
         isVideoOn,
diff --git a/client/src/contexts/WebRtcProvider.tsx b/client/src/contexts/WebRtcProvider.tsx
index 9bf7a9a..4cd6263 100644
--- a/client/src/contexts/WebRtcProvider.tsx
+++ b/client/src/contexts/WebRtcProvider.tsx
@@ -25,13 +25,17 @@
 import { ConversationContext } from './ConversationProvider';
 import { IWebSocketContext, WebSocketContext } from './WebSocketProvider';
 
+export type MediaDevicesInfo = Record<MediaDeviceKind, MediaDeviceInfo[]>;
+export type MediaInputKind = 'audio' | 'video';
+export type MediaInputIds = Record<MediaInputKind, string | false | undefined>;
+
 interface IWebRtcContext {
   iceConnectionState: RTCIceConnectionState | undefined;
 
-  mediaDevices: Record<MediaDeviceKind, MediaDeviceInfo[]>;
   localStream: MediaStream | undefined;
   remoteStreams: readonly MediaStream[] | undefined;
-  getUserMedia: () => Promise<void>;
+  getMediaDevices: () => Promise<MediaDevicesInfo>;
+  updateLocalStream: (mediaDeviceIds?: MediaInputIds) => Promise<void>;
 
   sendWebRtcOffer: () => Promise<void>;
   closeConnection: () => void;
@@ -39,15 +43,11 @@
 
 const defaultWebRtcContext: IWebRtcContext = {
   iceConnectionState: undefined,
-  mediaDevices: {
-    audioinput: [],
-    audiooutput: [],
-    videoinput: [],
-  },
   localStream: undefined,
   remoteStreams: undefined,
-  getUserMedia: async () => {},
-  sendWebRtcOffer: async () => {},
+  getMediaDevices: async () => Promise.reject(),
+  updateLocalStream: async () => Promise.reject(),
+  sendWebRtcOffer: async () => Promise.reject(),
   closeConnection: () => {},
 };
 
@@ -103,9 +103,9 @@
   const [localStream, setLocalStream] = useState<MediaStream>();
   const [remoteStreams, setRemoteStreams] = useState<readonly MediaStream[]>();
   const [iceConnectionState, setIceConnectionState] = useState<RTCIceConnectionState | undefined>();
-  const [mediaDevices, setMediaDevices] = useState<Record<MediaDeviceKind, MediaDeviceInfo[]>>(
-    defaultWebRtcContext.mediaDevices
-  );
+
+  const [audioRtcRtpSenders, setAudioRtcRtpSenders] = useState<RTCRtpSender[]>();
+  const [videoRtcRtpSenders, setVideoRtcRtpSenders] = useState<RTCRtpSender[]>();
 
   // TODO: The ICE candidate queue is used to cache candidates that were received before `setRemoteDescription` was
   //       called. This is currently necessary, because the jami-daemon is unreliable as a WebRTC signaling channel,
@@ -120,73 +120,107 @@
   // TODO: This logic will have to change to support multiple people in a call
   const contactUri = useMemo(() => conversation.getFirstMember().contact.getUri(), [conversation]);
 
-  const getMediaDevices = useCallback(async () => {
+  const getMediaDevices = useCallback(async (): Promise<MediaDevicesInfo> => {
     try {
       const devices = await navigator.mediaDevices.enumerateDevices();
-      const newMediaDevices: Record<MediaDeviceKind, MediaDeviceInfo[]> = {
-        audioinput: [],
-        audiooutput: [],
-        videoinput: [],
+
+      // TODO: On Firefox, some devices can sometime be duplicated (2 devices can share the same deviceId). Using a map
+      //       and then converting it to an array makes it so that there is no duplicate. If we find a way to prevent
+      //       Firefox from listing 2 devices with the same deviceId, we can remove this logic.
+      const newMediaDevices: Record<MediaDeviceKind, Record<string, MediaDeviceInfo>> = {
+        audioinput: {},
+        audiooutput: {},
+        videoinput: {},
       };
 
       for (const device of devices) {
-        newMediaDevices[device.kind].push(device);
+        newMediaDevices[device.kind][device.deviceId] = device;
       }
 
-      return newMediaDevices;
+      return {
+        audioinput: Object.values(newMediaDevices.audioinput),
+        audiooutput: Object.values(newMediaDevices.audiooutput),
+        videoinput: Object.values(newMediaDevices.videoinput),
+      };
     } catch (e) {
       throw new Error('Could not get media devices', { cause: e });
     }
   }, []);
 
-  useEffect(() => {
-    if (iceConnectionState !== 'connected' && iceConnectionState !== 'completed') {
-      return;
-    }
+  const updateLocalStream = useCallback(
+    async (mediaDeviceIds?: MediaInputIds) => {
+      const devices = await getMediaDevices();
 
-    const updateMediaDevices = async () => {
+      let audioConstraint: MediaTrackConstraints | boolean = devices.audioinput.length !== 0;
+      let videoConstraint: MediaTrackConstraints | boolean = devices.videoinput.length !== 0;
+
+      if (!audioConstraint && !videoConstraint) {
+        return;
+      }
+
+      if (mediaDeviceIds?.audio !== undefined) {
+        audioConstraint = mediaDeviceIds.audio !== false ? { deviceId: mediaDeviceIds.audio } : false;
+      }
+      if (mediaDeviceIds?.video !== undefined) {
+        videoConstraint = mediaDeviceIds.video !== false ? { deviceId: mediaDeviceIds.video } : false;
+      }
+
       try {
-        const newMediaDevices = await getMediaDevices();
-        setMediaDevices(newMediaDevices);
+        const stream = await navigator.mediaDevices.getUserMedia({
+          audio: audioConstraint,
+          video: videoConstraint,
+        });
+
+        for (const track of stream.getTracks()) {
+          track.enabled = false;
+        }
+
+        setLocalStream(stream);
       } catch (e) {
-        console.error('Could not update media devices:', e);
+        throw new Error('Could not get media devices', { cause: e });
       }
-    };
+    },
+    [getMediaDevices]
+  );
 
-    navigator.mediaDevices.addEventListener('devicechange', updateMediaDevices);
-    updateMediaDevices();
-
-    return () => {
-      navigator.mediaDevices.removeEventListener('devicechange', updateMediaDevices);
-    };
-  }, [getMediaDevices, iceConnectionState]);
-
-  const getUserMedia = useCallback(async () => {
-    const devices = await getMediaDevices();
-
-    const shouldGetAudio = devices.audioinput.length !== 0;
-    const shouldGetVideo = devices.videoinput.length !== 0;
-
-    if (!shouldGetAudio && !shouldGetVideo) {
+  useEffect(() => {
+    if (!localStream || !webRtcConnection) {
       return;
     }
 
-    try {
-      const stream = await navigator.mediaDevices.getUserMedia({
-        audio: shouldGetAudio,
-        video: shouldGetVideo,
-      });
-
-      for (const track of stream.getTracks()) {
-        track.enabled = false;
-        webRtcConnection.addTrack(track, stream);
+    const updateTracks = async (kind: 'audio' | 'video') => {
+      const senders = kind === 'audio' ? audioRtcRtpSenders : videoRtcRtpSenders;
+      const tracks = kind === 'audio' ? localStream.getAudioTracks() : localStream.getVideoTracks();
+      if (senders) {
+        const promises: Promise<void>[] = [];
+        for (let i = 0; i < senders.length; i++) {
+          // TODO: There is a bug where calling multiple times `addTrack` when changing an input device doesn't work.
+          //       Calling `addTrack` doesn't trigger the `track` event listener for the other user.
+          //       This workaround makes it possible to replace a track, but it could be improved by figuring out the
+          //       proper way of changing a track.
+          promises.push(
+            senders[i].replaceTrack(tracks[i]).catch((e) => {
+              console.error('Error replacing track:', e);
+            })
+          );
+        }
+        return Promise.all(promises);
       }
 
-      setLocalStream(stream);
-    } catch (e) {
-      throw new Error('Could not get media devices', { cause: e });
-    }
-  }, [webRtcConnection, getMediaDevices]);
+      // TODO: Currently, we do not support adding new devices. To enable this feature, we would need to implement
+      //       the "Perfect negotiation" pattern to renegotiate after `addTrack`.
+      //       https://blog.mozilla.org/webrtc/perfect-negotiation-in-webrtc/
+      const newSenders = tracks.map((track) => webRtcConnection.addTrack(track, localStream));
+      if (kind === 'audio') {
+        setAudioRtcRtpSenders(newSenders);
+      } else {
+        setVideoRtcRtpSenders(newSenders);
+      }
+    };
+
+    updateTracks('audio');
+    updateTracks('video');
+  }, [localStream, webRtcConnection, audioRtcRtpSenders, videoRtcRtpSenders]);
 
   const sendWebRtcOffer = useCallback(async () => {
     const sdp = await webRtcConnection.createOffer({
@@ -229,7 +263,11 @@
       console.info('WebRTC remote description has been set. Ready to receive ICE candidates');
       setIsReadyForIceCandidates(true);
       if (iceCandidateQueue.length !== 0) {
-        console.warn('Adding queued ICE candidates...', iceCandidateQueue);
+        console.warn(
+          'Found queued ICE candidates that were added before `setRemoteDescription` was called. ' +
+            'Adding queued ICE candidates...',
+          iceCandidateQueue
+        );
 
         await Promise.all(iceCandidateQueue.map((iceCandidate) => webRtcConnection.addIceCandidate(iceCandidate)));
       }
@@ -281,10 +319,6 @@
       if (isReadyForIceCandidates) {
         await webRtcConnection.addIceCandidate(data.candidate);
       } else {
-        console.warn(
-          "Received event on WebRtcIceCandidate before 'setRemoteDescription' was called. Pushing to ICE candidates queue...",
-          data
-        );
         setIceCandidateQueue((v) => {
           v.push(data.candidate);
           return v;
@@ -355,10 +389,10 @@
     <WebRtcContext.Provider
       value={{
         iceConnectionState,
-        mediaDevices,
         localStream,
         remoteStreams,
-        getUserMedia,
+        getMediaDevices,
+        updateLocalStream,
         sendWebRtcOffer,
         closeConnection,
       }}
diff --git a/client/src/managers/NotificationManager.tsx b/client/src/managers/NotificationManager.tsx
index c515320..63eccc6 100644
--- a/client/src/managers/NotificationManager.tsx
+++ b/client/src/managers/NotificationManager.tsx
@@ -25,7 +25,7 @@
 import { WithChildren } from '../utils/utils';
 
 /**
- * Binds notification listeners to the WebSocket from a WebSocketContext
+ * Binds notification listeners to the WebSocket from a WebSocketContext.
  */
 export default ({ children }: WithChildren) => {
   const webSocket = useContext(WebSocketContext);
diff --git a/client/src/pages/CallInterface.tsx b/client/src/pages/CallInterface.tsx
index 3febdc0..b58b326 100644
--- a/client/src/pages/CallInterface.tsx
+++ b/client/src/pages/CallInterface.tsx
@@ -20,6 +20,7 @@
   ComponentType,
   Fragment,
   ReactNode,
+  RefObject,
   useCallback,
   useContext,
   useEffect,
@@ -84,16 +85,21 @@
 
 const CallInterface = () => {
   const { localStream, remoteStreams } = useContext(WebRtcContext);
-  const { isVideoOn } = useContext(CallContext);
-  const gridItemRef = useRef(null);
-  const remoteVideoRef = useRef<HTMLVideoElement | null>(null);
-  const localVideoRef = useRef<HTMLVideoElement | null>(null);
+  const {
+    isVideoOn,
+    currentMediaDeviceIds: {
+      audiooutput: { id: audioOutDeviceId },
+    },
+    localVideoRef,
+    remoteVideoRef,
+  } = useContext(CallContext);
+  const gridItemRef = useRef<HTMLDivElement | null>(null);
 
   useEffect(() => {
     if (localStream && localVideoRef.current) {
       localVideoRef.current.srcObject = localStream;
     }
-  }, [localStream]);
+  }, [localStream, localVideoRef]);
 
   useEffect(() => {
     // TODO: For now, `remoteStream` is the first remote stream in the array.
@@ -102,7 +108,19 @@
     if (remoteStream && remoteVideoRef.current) {
       remoteVideoRef.current.srcObject = remoteStream;
     }
-  }, [remoteStreams]);
+  }, [remoteStreams, remoteVideoRef]);
+
+  useEffect(() => {
+    if (!audioOutDeviceId) {
+      return;
+    }
+
+    if (remoteVideoRef.current?.setSinkId) {
+      // This only work on chrome and other browsers that support `setSinkId`
+      // https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement/setSinkId#browser_compatibility
+      remoteVideoRef.current.setSinkId(audioOutDeviceId);
+    }
+  }, [audioOutDeviceId, remoteVideoRef]);
 
   return (
     <Box display="flex" flexGrow={1}>
@@ -215,7 +233,7 @@
   CallingFullScreenButton,
 ];
 
-const CallInterfaceSecondaryButtons = (props: Props & { gridItemRef: React.RefObject<HTMLElement> }) => {
+const CallInterfaceSecondaryButtons = (props: Props & { gridItemRef: RefObject<HTMLElement> }) => {
   const stackRef = useRef<HTMLElement>(null);
 
   const [initialMeasurementDone, setInitialMeasurementDone] = useState(false);
diff --git a/client/src/pages/CallPending.tsx b/client/src/pages/CallPending.tsx
index cc24669..02f06e4 100644
--- a/client/src/pages/CallPending.tsx
+++ b/client/src/pages/CallPending.tsx
@@ -17,7 +17,7 @@
  */
 
 import { Box, CircularProgress, Grid, IconButtonProps, Stack, Typography } from '@mui/material';
-import { ComponentType, ReactNode, useContext, useEffect, useMemo, useRef } from 'react';
+import { ComponentType, ReactNode, useContext, useEffect, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { useLocation } from 'react-router-dom';
 
@@ -35,14 +35,13 @@
 export const CallPending = () => {
   const { localStream } = useContext(WebRtcContext);
   const { conversation } = useContext(ConversationContext);
-  const { callRole } = useContext(CallContext);
-  const localVideoRef = useRef<HTMLVideoElement | null>(null);
+  const { callRole, localVideoRef } = useContext(CallContext);
 
   useEffect(() => {
     if (localStream && localVideoRef.current) {
       localVideoRef.current.srcObject = localStream;
     }
-  }, [localStream]);
+  }, [localStream, localVideoRef]);
 
   return (
     <Stack