Collection of experiments related to tuning small language models for specific tasks.
pip install uv
uv venv
source .venv/bin/activate
uv sync --group torch
uv sync --no-build-isolation --group training
pyright --createstub transformers
CUDA:
export CUDA_HOME=/usr/local/cuda
sudo apt purge cmake
uv pip install setuptools_scm cmake
uv pip install vllm -vv --no-build-isolation
Mac:
uv pip install pip
pip install vllm==0.7.0 --use-deprecated=legacy-resolver
llama-cpp can also be installed with:
uv pip install "llama-cpp-python[server]" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/metal
pyright --createstub transformers
# Training
uv run modal run -d modal_entrypoint.py::training
# Generation
uv run modal run -d modal_entrypoint.py::generation
# Inference
modal deploy modal_vllm.py
python util_scripts.py test_openai_api
python util_scripts.py download_dataset gutenberg_backtranslate_from_txt