user:rolf001:vosk:start
This is an old revision of the document!
Table of Contents
Automatic Speech Recognition (ASR) with vosk
Sources
vosk
Dataquest
Installation
conda create -n vosk python=3.9 conda activate vosk conda install -c conda-forge jupyterlab numpy matplotlib pandas #conda install -c conda-forge ipywidgets #conda install -c conda-forge scipy scikit-learn
pip install vosk pip install pyaudio
On Windows the vosk models are cached here: C:\Users\<username>\.cache\vosk
Missing:
ffmpeg …
<code> import pyaudio import wave
# Constants for audio recording FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 CHUNK = 1024 RECORD_SECONDS = 5 # Adjust this to change the duration of the recording OUTPUT_FILENAME = “output.wav”
def list_audio_devices():
audio = pyaudio.PyAudio()
devices = []
for i in range(audio.get_device_count()):
device_info = audio.get_device_info_by_index(i)
devices.append(f"{i}: {device_info['name']}")
audio.terminate() return devices
def get_input_device_index():
devices = list_audio_devices()
print("Available audio input devices:")
for device in devices:
print(device)
while True:
try:
print("")
print("On Becker's Dell Lat. 7330 the following works:")
print("1: Microphone Array (Realtek(R) Au")
print("")
device_index = int(input("Enter the index of the desired input device: "))
if 0 <= device_index < len(devices):
return device_index
else:
print("Invalid input. Please enter a valid device index.")
except ValueError:
print("Invalid input. Please enter a valid device index.")
def record_audio(device_index):
audio = pyaudio.PyAudio()
# Open a microphone stream with the selected input device
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True, input_device_index=device_index,
frames_per_buffer=CHUNK)
print(f"Recording from: {audio.get_device_info_by_index(device_index)['name']}")
frames = []
# Record audio in chunks and store it in frames
for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("Finished recording.")
# Stop and close the microphone stream stream.stop_stream() stream.close() audio.terminate()
# Save the recorded audio to a WAV file
with wave.open(OUTPUT_FILENAME, 'wb') as wf:
wf.setnchannels(CHANNELS)
wf.setsampwidth(audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
if name == “main”:
device_index = get_input_device_index() record_audio(device_index)
<code>
user/rolf001/vosk/start.1694354745.txt.gz · Last modified: 2023/09/10 16:05 by rolf.becker