Differences

This shows you the differences between two versions of the page.

--- user:rolf001:vosk:start [2023/09/10 16:01] – removed - external edit (Unknown date) 127.0.0.1
+++ user:rolf001:vosk:start [2023/09/10 16:42] (current) – [pyaudio: Find the right audio device index] rolf.becker
@@ Line 1: / Line 1: @@
+====== Automatic Speech Recognition (ASR) with vosk ======
+===== Sources =====
+==== vosk ====
+  * https://alphacephei.com/vosk/
+  * https://github.com/alphacep/vosk-api
+==== Dataquest ====
+  * https://github.com/dataquestio/project-walkthroughs/blob/master/microphone/microphone.ipynb
+{{youtube>2kSPbH4jWME?}}
+\\
+===== Installation =====
+<code>
+conda create -n vosk python=3.9
+conda activate vosk
+conda install -c conda-forge jupyterlab numpy matplotlib pandas
+#conda install -c conda-forge ipywidgets
+#conda install -c conda-forge scipy scikit-learn
+</code>
+<code>
+pip install vosk
+pip install pyaudio
+</code>
+On Windows the vosk models are **cached here**: ''C:\Users\<username>\.cache\vosk''
+==== ffmpeg ====
+  * https://www.gyan.dev/ffmpeg/builds/
+===== pyaudio: Find the right audio device index of your favorite microphone ====
+<code python>
+import pyaudio
+import wave
+# Constants for audio recording
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 44100
+CHUNK = 1024
+RECORD_SECONDS = 5  # Adjust this to change the duration of the recording
+OUTPUT_FILENAME = "output.wav"
+def list_audio_devices():
+    audio = pyaudio.PyAudio()
+    devices = []
+    for i in range(audio.get_device_count()):
+        device_info = audio.get_device_info_by_index(i)
+        devices.append(f"{i}: {device_info['name']}")
+    audio.terminate()
+    return devices
+def get_input_device_index():
+    devices = list_audio_devices()
+    print("Available audio input devices:")
+    for device in devices:
+        print(device)
+    while True:
+        try:
+            print("")
+            print("On Becker's Dell Lat. 7330 the following works:")
+            print("1: Microphone Array (Realtek(R) Au")
+            print("")
+            device_index = int(input("Enter the index of the desired input device: "))
+            if 0 <= device_index < len(devices):
+                return device_index
+            else:
+                print("Invalid input. Please enter a valid device index.")
+        except ValueError:
+            print("Invalid input. Please enter a valid device index.")
+def record_audio(device_index):
+    audio = pyaudio.PyAudio()
+    # Open a microphone stream with the selected input device
+    stream = audio.open(format=FORMAT, channels=CHANNELS,
+                        rate=RATE, input=True, input_device_index=device_index,
+                        frames_per_buffer=CHUNK)
+    print(f"Recording from: {audio.get_device_info_by_index(device_index)['name']}")
+    frames = []
+    # Record audio in chunks and store it in frames
+    for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
+        data = stream.read(CHUNK)
+        frames.append(data)
+    print("Finished recording.")
+    # Stop and close the microphone stream
+    stream.stop_stream()
+    stream.close()
+    audio.terminate()
+    # Save the recorded audio to a WAV file
+    with wave.open(OUTPUT_FILENAME, 'wb') as wf:
+        wf.setnchannels(CHANNELS)
+        wf.setsampwidth(audio.get_sample_size(FORMAT))
+        wf.setframerate(RATE)
+        wf.writeframes(b''.join(frames))
+if __name__ == "__main__":
+    device_index = get_input_device_index()
+    record_audio(device_index)
+</code>