From d368911d340a786d909d31d4ba241e5b4d85c2ae Mon Sep 17 00:00:00 2001 From: Erik Wrenholt Date: Sat, 8 Apr 2023 23:26:14 +0000 Subject: [PATCH 1/2] use an instruct model --- .devcontainer/devcontainer.json | 140 ++++++++++++++++---------------- docker-compose.yml | 2 +- server/app.py | 3 +- server/entrypoint.sh | 2 +- 4 files changed, 74 insertions(+), 73 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index e2b1bef..38f8dea 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,73 +1,73 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/miniconda { - "name": "Miniconda (Python 3)", - "image": "mcr.microsoft.com/devcontainers/miniconda:0-3", - "features": { - "ghcr.io/devcontainers/features/common-utils:2": { - "installZsh": true, - "configureZshAsDefaultShell": true, - "installOhMyZsh": true, - "upgradePackages": true, - "username": "automatic", - "userUid": "automatic", - "userGid": "automatic" - }, - "ghcr.io/devcontainers/features/node:1": { - "nodeGypDependencies": true, - "version": "18" - }, - "ghcr.io/devcontainers/features/docker-in-docker:2": { - "version": "latest", - "enableNonRootDocker": "true", - "moby": "true" - }, - "ghcr.io/stuartleeks/dev-container-features/shell-history:0": {}, - }, - "mounts": [ - // node modules cache - "source=${localWorkspaceFolderBasename}-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume", - // models cache - { - "source": "models", - "target": "/models", - "type": "volume" - }, - // pip cachce - { - "source": "${localWorkspaceFolderBasename}-pip-cache", - "target": "/home/vscode/.cache/pip", - "type": "volume" - }, - ], - "remoteEnv": { - "MODELS_DIR": "/models", - "MODEL_FILE": "RWKV-4-Pile-1B5-20220929-ctx4096" - }, - // If you have a gpu this will pass it in! - // "runArgs": [ - // "--gpus", - // "all" - // ], - // Use 'forwardPorts' to make a list of ports inside the container available locally. - "forwardPorts": [ - 3000, - 8080 - ], - "postCreateCommand": "npm i -g diff-so-fancy && conda init zsh && sudo chown -R vscode node_modules ~/.cache", - "postStartCommand": "npm install", - "customizations": { - "vscode": { - "extensions": [ - "dbaeumer.vscode-eslint", - "GitHub.copilot", - "ms-python.pylint", - "ms-python.python", - "streetsidesoftware.code-spell-checker", - ], - "settings": {} - }, - }, - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" -} + "name": "Miniconda (Python 3)", + "image": "mcr.microsoft.com/devcontainers/miniconda:0-3", + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": true, + "configureZshAsDefaultShell": true, + "installOhMyZsh": true, + "upgradePackages": true, + "username": "automatic", + "userUid": "automatic", + "userGid": "automatic" + }, + "ghcr.io/devcontainers/features/node:1": { + "nodeGypDependencies": true, + "version": "18" + }, + "ghcr.io/devcontainers/features/docker-in-docker:2": { + "version": "latest", + "enableNonRootDocker": "true", + "moby": "true" + }, + "ghcr.io/stuartleeks/dev-container-features/shell-history:0": {}, + }, + "mounts": [ + // node modules cache + "source=${localWorkspaceFolderBasename}-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume", + // models cache + { + "source": "models", + "target": "/models", + "type": "volume" + }, + // pip cachce + { + "source": "${localWorkspaceFolderBasename}-pip-cache", + "target": "/home/vscode/.cache/pip", + "type": "volume" + }, + ], + "remoteEnv": { + "MODELS_DIR": "/workspaces/rwkv-web-inference/models", + "MODEL_FILE": "RWKV-4-Raven-1B5-v8-Eng-20230408-ctx4096" + }, + // If you have a gpu this will pass it in! + // "runArgs": [ + // "--gpus", + // "all" + // ], + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [ + 3000, + 8080 + ], + "postCreateCommand": "npm i -g diff-so-fancy && conda init zsh && sudo chown -R vscode node_modules ~/.cache", + "postStartCommand": "npm install", + "customizations": { + "vscode": { + "extensions": [ + "dbaeumer.vscode-eslint", + "GitHub.copilot", + "ms-python.pylint", + "ms-python.python", + "streetsidesoftware.code-spell-checker", + ], + "settings": {} + }, + }, + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 4603004..104115f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: # restart: always environment: - MODELS_DIR=/models - - MODEL_FILE=RWKV-4-Pile-1B5-20220929-ctx4096 + - MODEL_FILE=RWKV-4-Raven-1B5-v8-Eng-20230408-ctx4096 volumes: - models:/models ports: diff --git a/server/app.py b/server/app.py index 6c6ff8a..42813a5 100644 --- a/server/app.py +++ b/server/app.py @@ -40,7 +40,8 @@ ######################################################################################################## # args.RUN_DEVICE = "cuda" # 'cpu' (already very fast) // 'cuda' -# args.FLOAT_MODE = "bf16" # fp32 (good for cpu) // fp16 (might overflow) // bf16 (less accurate) +# fp32 (good for cpu) // fp16 (might overflow) // bf16 (less accurate) +# args.FLOAT_MODE = "fp16" args.RUN_DEVICE = "cpu" # 'cpu' (already very fast) // 'cuda' # fp32 (good for cpu) // fp16 (might overflow) // bf16 (less accurate) diff --git a/server/entrypoint.sh b/server/entrypoint.sh index 70ed285..59d740a 100755 --- a/server/entrypoint.sh +++ b/server/entrypoint.sh @@ -11,7 +11,7 @@ fi pushd "$MODELS_DIR" if [ ! -f "${MODEL_FILE}.pth" ]; then - wget -q https://huggingface.co/BlinkDL/rwkv-4-pile-1b5/resolve/main/RWKV-4-Pile-1B5-20220929-ctx4096.pth + wget -q https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v8-Eng-20230408-ctx4096.pth fi popd From 200c66a0bc24a49b10daff22a22748abad6e1e25 Mon Sep 17 00:00:00 2001 From: Erik Wrenholt Date: Sun, 9 Apr 2023 14:46:21 +0000 Subject: [PATCH 2/2] got cuda working --- .devcontainer/devcontainer.json | 8 ++++---- scripts/dev.js | 32 ++++++++++++++++---------------- server/app.py | 19 +++++++++++++++---- server/requirements.txt | 7 +++++-- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 38f8dea..8b684cb 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -45,10 +45,10 @@ "MODEL_FILE": "RWKV-4-Raven-1B5-v8-Eng-20230408-ctx4096" }, // If you have a gpu this will pass it in! - // "runArgs": [ - // "--gpus", - // "all" - // ], + "runArgs": [ + "--gpus", + "all" + ], // Use 'forwardPorts' to make a list of ports inside the container available locally. "forwardPorts": [ 3000, diff --git a/scripts/dev.js b/scripts/dev.js index f95b9d8..f5c7170 100644 --- a/scripts/dev.js +++ b/scripts/dev.js @@ -2,25 +2,25 @@ const concurrently = require('concurrently'); const path = require('node:path'); const { result } = concurrently( - [ + [ + { + command: 'npm run dev', + name: 'chat client', + cwd: path.resolve(__dirname, '../packages/chat-client'), + }, + { + command: "/opt/conda/bin/python app.py 0 -h 0.0.0.0 -p 8080", + name: 'chat server', + cwd: path.resolve(__dirname, '../server'), + } + ], { - command: 'npm run dev', - name: 'chat client', - cwd: path.resolve(__dirname, '../packages/chat-client'), - }, - { - command: "flask run -h 0.0.0.0 -p 8080", - name: 'chat server', - cwd: path.resolve(__dirname, '../server'), + prefix: 'name', + killOthers: ['failure', 'success'], } - ], - { - prefix: 'name', - killOthers: ['failure', 'success'], - } ); result.catch((error) => { - console.error(error); - process.exit(1); + console.error(error); + process.exit(1); }) diff --git a/server/app.py b/server/app.py index 42813a5..acdef28 100644 --- a/server/app.py +++ b/server/app.py @@ -28,6 +28,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1] except: pass + torch.backends.cudnn.benchmark = True torch.backends.cudnn.allow_tf32 = True torch.backends.cuda.matmul.allow_tf32 = True @@ -39,17 +40,23 @@ # Do this first: pip install torchdynamo ######################################################################################################## -# args.RUN_DEVICE = "cuda" # 'cpu' (already very fast) // 'cuda' +args.RUN_DEVICE = "cuda" # 'cpu' (already very fast) // 'cuda' # fp32 (good for cpu) // fp16 (might overflow) // bf16 (less accurate) -# args.FLOAT_MODE = "fp16" +args.FLOAT_MODE = "fp16" -args.RUN_DEVICE = "cpu" # 'cpu' (already very fast) // 'cuda' +# args.RUN_DEVICE = "cpu" # 'cpu' (already very fast) // 'cuda' # fp32 (good for cpu) // fp16 (might overflow) // bf16 (less accurate) -args.FLOAT_MODE = "fp32" +# args.FLOAT_MODE = "fp32" # if args.RUN_DEVICE == "cuda": # os.environ["RWKV_RUN_BACKEND"] = 'nvfuser' # !!!BUGGY!!! wrong output +if args.RUN_DEVICE == "cuda": + print("cuda device count: ", torch.cuda.device_count()) + print("current cuda device: ", torch.cuda.current_device()) + print(torch.cuda.get_device_name(0)) + + TOKEN_MODE = "pile" WORD_NAME = [ "20B_tokenizer.json", @@ -240,3 +247,7 @@ def help_response(): response.headers['Access-Control-Allow-Headers'] = 'Origin,X-Requested-With,Content-Type,Accept,Authorization' return response + + +if __name__ == '__main__': + app.run(host="0.0.0.0", port=8080) diff --git a/server/requirements.txt b/server/requirements.txt index dfb708b..c58a4d0 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,4 +1,7 @@ -torch==1.13.1 +torch==1.13.1+cu116 numpy==1.24.1 flask==2.2.2 -transformers==4.25.1 \ No newline at end of file +transformers==4.25.1 + +# package location +--find-links https://download.pytorch.org/whl/torch_stable.html \ No newline at end of file