Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ Using `all` as wanted_asr parameter to main.py will attempt to start all ASRs fo
| ✅ sp | CMU Sphinx | Open Source | Offline - docker |
| ✅ vs | Alphacep Vosk | Open Source | Offline - docker |
| ✅ cq | Coqui | Open Source | Offline - docker |
| ✅ nm | Nvidia NeMo | Open Source | Offline - docker |
| ❌ sb | Speech Brain | Open Source | Offline - docker |
| ❌ nm | Nvidia NeMo | Open Source | Offline - docker |
| ✅ gg | Google | Proprietary | API set env:`GOOGLE_APPLICATION_CREDENTIALS` |
| ✅ az | Microsoft Azure | Proprietary | API set env:`AZURE_KEY` |
| ✅ aw | Amazon | Proprietary | API set env:`AWS_ACCESS_KEY_ID`<br>+`AWS_SECRET_ACCESS_KEY` or aws configure|
Expand Down
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
- 0.0.1 - initial release
- 0.0.2 - fixed common corrections pack error with requirements. Fixed issue with wizard where no ASR can be selected
- 0.0.3 - refactored ASRs so each is own file. Added arg switches for columns/enable_wer/text_normalization/hashing
- 0.0.4 - added NeMo as alpha
2 changes: 2 additions & 0 deletions models/dev_prune_delete_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

docker system prune -a --volumes
31 changes: 31 additions & 0 deletions models/sl-nemo/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM python:3.9-slim

ENV TZ=Europe/London
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

ENV MODELNAME='stt_en_contextnet_1024.nemo'
ENV MODELTYPE='EncDecRNNTBPEModel'
ENV MODELURL='https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_contextnet_1024/versions/1.9.0/files/stt_en_contextnet_1024.nemo'

EXPOSE 3500
COPY app /app
WORKDIR /app

RUN apt update && apt-get install -y gcc curl python3-dev python3-pip ffmpeg \
&& pip install numpy==1.22.4 fastapi uvicorn Cython torch==1.9.0+cpu torchvision==0.10.0+cpu torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html \
&& pip install nemo_toolkit[asr] \
&& curl -L -o /app/$MODELNAME $MODELURL \
&& rm -rf /var/lib/apt/lists/* \
&& apt remove -y gcc curl \
&& apt autoremove -y

HEALTHCHECK --interval=30s --timeout=5s --start-period=15s \
CMD curl --fail http://localhost:3500/healthcheck || exit 1

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "3500"]

# docker build -f Dockerfile . -t robmsmt/sl-coqui
# docker run -d --restart unless-stopped -p 3200:3200 robmsmt/sl-coqui-en-16k:latest
# docker run -it -p 3200:3200 robmsmt/sl-coqui
#docker run -it -p 3200:3200 robmsmt/sl-coqui-en-16k:latest
#docker commit <container_id> my-broken-container && docker run -it my-broken-container /bin/bash
16 changes: 16 additions & 0 deletions models/sl-nemo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Nemo

## CONFIG
- Shortcode: ` nm `
- Docker: ` robmsmt/sl-nemo-en-16k:latest `
- InternalPort: ` 3500 `
- ExternalPort: ` 3500 `
- SampleRate: ` 16000 `
- InterfaceType: ` docker-fastapi `

## CHANGES
- tbc

## Notes
- Contextnet 1024 version
- Not used onnx yet
Empty file added models/sl-nemo/app/__init__.py
Empty file.
83 changes: 83 additions & 0 deletions models/sl-nemo/app/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os
from fastapi import FastAPI
from pydantic import BaseModel
import tempfile
from io import BytesIO
from base64 import b64decode
import argparse

# import soundfile
# import numpy as np
# import onnxruntime as rt
# import nemo
import nemo.collections.asr as nemo_asr

model = os.environ["MODELNAME"]

app = FastAPI()
# nm = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En")
# print(nemo_asr.models.EncDecRNNTBPEModel.list_available_models())
# nm = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name=args.model)
nm = nemo_asr.models.EncDecRNNTBPEModel.restore_from(model)
#
#
# enc_dec_ctc_models = [(x.pretrained_model_name, nemo_asr.models.EncDecCTCModel.from_pretrained(model_name=x.pretrained_model_name)) for x in nemo_asr.models.EncDecCTCModel.list_available_models() if "en" in x.pretrained_model_name]
# enc_dec_ctc_bpe_models = [(x.pretrained_model_name, nemo_asr.models.EncDecCTCModelBPE.from_pretrained(model_name=x.pretrained_model_name)) for x in nemo_asr.models.EncDecCTCModelBPE.list_available_models() if "en" in x.pretrained_model_name]
# enc_dec_rnn_t_bpe_models = [(x.pretrained_model_name, nemo_asr.models.EncDecRNNTBPEModel.from_pretrained(model_name=x.pretrained_model_name)) for x in nemo_asr.models.EncDecRNNTBPEModel.list_available_models() if "en" in x.pretrained_model_name]
# enc_dec_rnn_t_models = [(x.pretrained_model_name, nemo_asr.models.EncDecRNNTModel.from_pretrained(model_name=x.pretrained_model_name)) for x in nemo_asr.models.EncDecRNNTModel.list_available_models() if "en" in x.pretrained_model_name]
#
# all_models = enc_dec_ctc_models + enc_dec_ctc_bpe_models + enc_dec_rnn_t_bpe_models + enc_dec_rnn_t_models
# print(all_models)


def disk_in_memory(wav_bytes):
"""
this spooled wav was chosen because it's much more efficient than writing to disk,
it effectively is writing to memory only and can still be read (by some applications) as a file
"""
with tempfile.SpooledTemporaryFile() as spooled_wav:
spooled_wav.write(wav_bytes)
spooled_wav.seek(0)
return BytesIO(spooled_wav.read())


class Audio(BaseModel):
b64_wav: str
sr: int = 16000


@app.get("/healthcheck")
async def healthcheck():
return {"ok": "true"}


# Next, we instantiate all the necessary models directly from NVIDIA NGC
# Speech Recognition model


@app.post("/transcribe")
async def transcribe(audio: Audio):

try:
wav_bytes = b64decode(audio.b64_wav.encode("utf-8"))

# dm = disk_in_memory(wav_bytes)
# pcm, sample_rate = soundfile.read(dm, dtype="int16")
# todo cannot use disk memory since nemo lib needs file - in future replace with onnx: https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_with_NeMo.ipynb

with tempfile.NamedTemporaryFile(mode="wb", delete=True, suffix=".wav") as f:
f.write(wav_bytes)
files_list = [f.name]
print(files_list)
transcript = nm.transcribe(paths2audio_files=files_list)

return {"transcript": transcript[0][0]}
except:
raise


if __name__ == "__main__":
import uvicorn

print("starting...")
uvicorn.run("main:app", host="0.0.0.0", port=3600)
13 changes: 13 additions & 0 deletions models/sl-nemo/autoinspect.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

# try docker log first

# This should work for a running container
#
IMG=$(cat ./README.md | grep "Docker:" | awk '{print $4}')
ID=$(docker ps | grep $IMG | awk '{ print $1 }')

echo "INSPECTING: $IMG with ID: $ID"
$(docker stop $(docker ps -a -q --filter ancestor="$IMG" --format="{{.ID}}"))
docker commit "$ID" broken-container1 && docker run -p 3500:3500 -it broken-container1 /bin/bash
# run with: uvicorn main:app --host 0.0.0.0 --port 3200
# then hit test endpoint
18 changes: 18 additions & 0 deletions models/sl-nemo/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
set -e
#CWD=${PWD##*/}
DIR_PATH="$(dirname "${0}")"
IMG_REPO=$(cat ./README.md | grep "Docker:" | awk '{print $4}')
EXTPORT=$(cat ./README.md | grep "ExternalPort:" | awk '{print $4}')

echo $DIR_PATH $IMG_REPO $EXTPORT
docker build -t $IMG_REPO "$DIR_PATH"

set +e
# this is empty if the container crashes
echo $(docker ps -q -a --filter ancestor="$IMG_REPO" --format="{{.ID}}")
docker stop $(docker ps -q -a --filter ancestor="$IMG_REPO" --format="{{.ID}}")
set -e
docker run -p "$EXTPORT":"$EXTPORT" -d "$IMG_REPO"

## to debug - kill container and start with:
#docker run --restart unless-stopped -p "$EXTPORT":"$EXTPORT" "$IMG_REPO"
2 changes: 2 additions & 0 deletions models/sl-nemo/push.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#to upload
docker push robmsmt/sl-nemo-en-16k
34 changes: 34 additions & 0 deletions models/sl-nemo/te_st_endpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env python3

import json
import base64
import requests
import pprint as pp


def main(endpoint, wav_location):

b64audio = base64.b64encode(open(wav_location, "rb").read()).decode("utf-8")
print(f"Length of b64 data is:{len(b64audio)}")

json_message = {"b64_wav": b64audio, "sr": 16000}

r = requests.post(endpoint, json=json_message)
print(f"Status code: {r.status_code}")
try:
response = r.json()
pp.pprint(response, indent=2)
except:
print("err")


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(description="This file reads in a wav file and prints a CURL best to be piped to a file")
parser.add_argument("--endpoint", default="/transcribe", type=str)
parser.add_argument("--host", default="http://localhost:3500", type=str)
parser.add_argument("--wav", default="../../speechloop/data/simple_test/wavs/109938_zebra_ch0_16k.wav", type=str)
args = parser.parse_args()
url = args.host + args.endpoint
main(url, args.wav)
8 changes: 8 additions & 0 deletions models/sl-nemo/uninstall.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

#rm .INSTALLED
IMG=$(cat ./README.md | grep "Docker:" | awk '{print $4}')
echo "Killing: $IMG"
docker rm $(docker stop $(docker ps -a -q --filter ancestor="$IMG" --format="{{.ID}}"))
echo "Deleting: $IMG"
docker image rm "$IMG"
echo "Finished removing: $IMG"
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def read_file(fname):
# python3 -m pip install --upgrade setuptools wheel
setup(
name="speechloop",
version="0.0.3",
version="0.0.4",
author="robmsmt",
author_email="[email protected]",
description='A "keep it simple" collection of many speech recognition engines... Designed to help answer - what is the best ASR?',
Expand Down
Loading