user:rolf001:vosk:start
This is an old revision of the document!
Table of Contents
Automatic Speech Recognition (ASR) with vosk
Sources
vosk
Dataquest
Installation
conda create -n vosk python=3.9 conda activate vosk conda install -c conda-forge jupyterlab numpy matplotlib pandas #conda install -c conda-forge ipywidgets #conda install -c conda-forge scipy scikit-learn
pip install vosk pip install pyaudio
On Windows the vosk models are cached here: C:\Users\<username>\.cache\vosk
Missing:
ffmpeg …
<code> import pyaudio import wave
# Constants for audio recording FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 CHUNK = 1024 RECORD_SECONDS = 5 # Adjust this to change the duration of the recording OUTPUT_FILENAME = “output.wav”
def list_audio_devices():
audio = pyaudio.PyAudio() devices = [] for i in range(audio.get_device_count()): device_info = audio.get_device_info_by_index(i) devices.append(f"{i}: {device_info['name']}")
audio.terminate() return devices
def get_input_device_index():
devices = list_audio_devices() print("Available audio input devices:") for device in devices: print(device)
while True: try: print("") print("On Becker's Dell Lat. 7330 the following works:") print("1: Microphone Array (Realtek(R) Au") print("") device_index = int(input("Enter the index of the desired input device: ")) if 0 <= device_index < len(devices): return device_index else: print("Invalid input. Please enter a valid device index.") except ValueError: print("Invalid input. Please enter a valid device index.")
def record_audio(device_index):
audio = pyaudio.PyAudio()
# Open a microphone stream with the selected input device stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, input_device_index=device_index, frames_per_buffer=CHUNK)
print(f"Recording from: {audio.get_device_info_by_index(device_index)['name']}")
frames = []
# Record audio in chunks and store it in frames for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data)
print("Finished recording.")
# Stop and close the microphone stream stream.stop_stream() stream.close() audio.terminate()
# Save the recorded audio to a WAV file with wave.open(OUTPUT_FILENAME, 'wb') as wf: wf.setnchannels(CHANNELS) wf.setsampwidth(audio.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames))
if name == “main”:
device_index = get_input_device_index() record_audio(device_index)
<code>
user/rolf001/vosk/start.1694354745.txt.gz · Last modified: 2023/09/10 16:05 by rolf.becker