!pip install tensorrt_llm -U --pre --extra-index-url https://pypi.nvidia.com
!pip uninstall -y mpmath
!pip install mpmath==1.3.0
!pip install ipywidgets

!wget https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/llama/convert_checkpoint.py -P .
!wget https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/run.py -P .
!wget https://raw.githubusercontent.com/NVIDIA/TensorRT-LLM/main/examples/utils.py -P .

!python convert_checkpoint.py --model_dir mistralai/Mistral-7B-v0.1 --output_dir ./tllm_checkpoint_1gpu_mistral --dtype float16

!mkdir -p mistral_engine
!trtllm-build --checkpoint_dir ./tllm_checkpoint_1gpu_mistral --output_dir ./mistral_engine --gemm_plugin float16 --max_input_len 32256

!python3 run.py --max_output_len=50 --tokenizer_dir mistralai/Mistral-7B-v0.1 --engine_dir=./mistral_engine --max_attention_window_size=4096 --input_text "Swap memory is"

Run inference on Mistral 7B using NVIDIA TensorRT-LLM¶

Step 1 - Install TensorRT-LLM¶

Step 2 - Convert Mistral to the TensorRT format¶