Whisper: add multilanguage model support GitLab: #576 Change-Id: Ic9729c22341a7a14af26e0e7296701f65eb0f02f

commit: bd032f8297ea3a3698d11ed40dbc0f4c81802f7e [log] [tgz]
author: Aline Gondim Santos <aline.gondimsantos@savoirfairelinux.com> Fri Nov 25 15:39:12 2022 -0300
committer: Aline Gondim Santos <aline.gondimsantos@savoirfairelinux.com> Mon Mar 13 09:24:47 2023 -0300
tree: 3d866b3e077284c62afa0d98340c432d3a8c1444
parent: 329f86235adc78a3e2208f6eb363591a6c33f369 [diff] [blame]
diff --git a/WhisperTranscript/TranscriptAudioSubscriber.cpp b/WhisperTranscript/TranscriptAudioSubscriber.cpp
index d341671..6b29622 100644
--- a/WhisperTranscript/TranscriptAudioSubscriber.cpp
+++ b/WhisperTranscript/TranscriptAudioSubscriber.cpp

@@ -59,8 +59,7 @@
              **/
             l.unlock();
 #endif
-            modelProcessor_.feedInput(melSpectrogram_.data);
-            auto text = modelProcessor_.getText();
+            auto text = modelProcessor_.feedInput(melSpectrogram_.data, language_);
             mVS_->setText(text);
         }
     });
@@ -78,18 +77,30 @@
 void
 TranscriptAudioSubscriber::stop()
 {
+    std::lock_guard<std::mutex> l(inputLock);
     running = false;
     inputCv.notify_all();
 }
 
 void
+TranscriptAudioSubscriber::setParameter(std::string& parameter, Parameter type)
+{
+    switch (type) {
+    case (Parameter::LANGUAGE):
+        language_ = parameter;
+        break;
+    default:
+        return;
+    }
+}
+
+void
 TranscriptAudioSubscriber::update(jami::Observable<AVFrame*>*, AVFrame* const& pluginFrame)
 {
     if (!pluginFrame || modelFilters_.data.empty())
         return;
 
     if (firstRun) {
-        modelProcessor_.getText();
         count_ = 0;
         pastModelInput_.clear();
         currentModelInput_.clear();
@@ -110,7 +121,9 @@
         uniqueFramePtr filteredFrame = {formatFilter_.readOutput(), frameFree};
         if (filteredFrame) {
             for (size_t i = 0; i < filteredFrame->buf[0]->size; i += 2) {
+#ifdef __DEBUG__
                 std::lock_guard<std::mutex> l(inputLock);
+#endif
                 int16_t rawValue = (filteredFrame->buf[0]->data[i+1] << 8) | filteredFrame->buf[0]->data[i];
 
                 // If not a positive value, perform the 2's complement math on the value
@@ -130,8 +143,12 @@
                     std::swap(futureModelInput_, overlapInput_);
                     count_ = 0;
                     overlapInput_.clear();
+
+#ifndef __DEBUG__
+                    std::lock_guard<std::mutex> l(inputLock);
+#endif
                     newFrame = true;
-                    inputCv.notify_all();
+                    inputCv.notify_one();
                 }
             }
         }
@@ -149,7 +166,6 @@
 void
 TranscriptAudioSubscriber::detached(jami::Observable<AVFrame*>*)
 {
-    modelProcessor_.getText();
     firstRun = true;
     observable_ = nullptr;
     Plog::log(Plog::LogPriority::INFO, TAG, "::Detached()");
commit	bd032f8297ea3a3698d11ed40dbc0f4c81802f7e	[log] [tgz]
author	Aline Gondim Santos <aline.gondimsantos@savoirfairelinux.com>	Fri Nov 25 15:39:12 2022 -0300
committer	Aline Gondim Santos <aline.gondimsantos@savoirfairelinux.com>	Mon Mar 13 09:24:47 2023 -0300
tree	3d866b3e077284c62afa0d98340c432d3a8c1444
parent	329f86235adc78a3e2208f6eb363591a6c33f369 [diff] [blame]