====== Automatic Speech Recognition (ASR) with vosk ====== ===== Sources ===== ==== vosk ==== * https://alphacephei.com/vosk/ * https://github.com/alphacep/vosk-api ==== Dataquest ==== * https://github.com/dataquestio/project-walkthroughs/blob/master/microphone/microphone.ipynb {{youtube>2kSPbH4jWME?}} \\ ===== Installation =====


conda create -n vosk python=3.9
conda activate vosk
conda install -c conda-forge jupyterlab numpy matplotlib pandas 
#conda install -c conda-forge ipywidgets
#conda install -c conda-forge scipy scikit-learn


pip install vosk
pip install pyaudio

On Windows the vosk models are **cached here**: ''C:\Users\\.cache\vosk'' ==== ffmpeg ==== * https://www.gyan.dev/ffmpeg/builds/ ===== pyaudio: Find the right audio device index of your favorite microphone ====



import pyaudio
import wave

# Constants for audio recording
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 5  # Adjust this to change the duration of the recording
OUTPUT_FILENAME = "output.wav"

def list_audio_devices():
    audio = pyaudio.PyAudio()
    devices = []
    
    for i in range(audio.get_device_count()):
        device_info = audio.get_device_info_by_index(i)
        devices.append(f"{i}: {device_info['name']}")

    audio.terminate()
    return devices

def get_input_device_index():
    devices = list_audio_devices()
    
    print("Available audio input devices:")
    for device in devices:
        print(device)

    while True:
        try:
            print("")
            print("On Becker's Dell Lat. 7330 the following works:")
            print("1: Microphone Array (Realtek(R) Au")
            print("")
            device_index = int(input("Enter the index of the desired input device: "))
            if 0 <= device_index < len(devices):
                return device_index
            else:
                print("Invalid input. Please enter a valid device index.")
        except ValueError:
            print("Invalid input. Please enter a valid device index.")

def record_audio(device_index):
    audio = pyaudio.PyAudio()

    # Open a microphone stream with the selected input device
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                        rate=RATE, input=True, input_device_index=device_index,
                        frames_per_buffer=CHUNK)

    print(f"Recording from: {audio.get_device_info_by_index(device_index)['name']}")

    frames = []

    # Record audio in chunks and store it in frames
    for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Finished recording.")

    # Stop and close the microphone stream
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded audio to a WAV file
    with wave.open(OUTPUT_FILENAME, 'wb') as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(audio.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))

if __name__ == "__main__":
    device_index = get_input_device_index()
    record_audio(device_index)