transformers==4.44.0 tiktoken==0.6.0 torch pillow accelerate>=0.26.0 fastapi==0.112.2 https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu118torch1.12cxx11abiFALSE-cp310-cp310-linux_x86_64.whl