源码下载
# 指定 docs_typo_mlc_chat 分支克隆
git clone -b docs_typo_mlc_chat --single-branch https://github.com/mlc-ai/mlc-llm.git
# 进入 mlc-llm 项目
cd mlc-llm
# 克隆子模块代码
git submodule update --init --recursive
# 进入 MLCChat 目录
cd ./android/MLCChat
编辑环境变量
vim ~/.bashrc  查看环境变量
export ANDROID_NDK=/home/lenovo/Android/Sdk/ndk/26.1.10909125
export ANDROID_HOME=/home/lenovo/Android/Sdk
export PATH=$PATH:/home/lenovo/Android/Sdk/cmake/3.10.2.4988404/bin
export PATH=$PATH:/home/lenovo/Android/Sdk/platform-tools
export TVM_NDK_CC=$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android23-clang
export JAVA_HOME=/home/lenovo/.jdks/corretto-18.0.2
export PATH=$PATH:$JAVA_HOME/bin
export MLC_LLM_SOURCE_DIR=/sda/xj/t/mlc-llm
export TVM_SOURCE_DIR=/sda/xj/t/mlc-llm/3rdparty/tvm
<font style="color:#DF2A3F;">需要注意的是jdk版本要和androidStudio里面使用的版本保持一致。</font>
<font style="color:#DF2A3F;"></font>
export MLC_LLM_SOURCE_DIR=/sda/xj/mlc-llm-llama3/mlc-llm
export TVM_SOURCE_DIR=/sda/xj/mlc-llm-llama3/mlc-llm/3rdparty/tvm
export MLC_LLM_SOURCE_DIR=/sda/xj/mlc-llm-llama3/mlc-llm
export TVM_SOURCE_DIR=/sda/xj/mlc-llm-llama3/mlc-llm/3rdparty/tvm
转换模型权重
下载 MiniCPM-2B-dpo-bf16-llama-format 模型库
官网 huggingface 下载 openbmb/MiniCPM-2B-dpo-bf16-llama-format ,放入 dist/models 目录。
convert_weight 权重转换
# 进入 mlc-llm 的安卓 MLCChat 根目录
cd D:\mlc-llm\android\MLCChat
# MiniCPM-2B-dpo-bf16-llama-format 模型转换
mlc_llm convert_weight ./dist/models/MiniCPM-2B-dpo-bf16-llama-format/ --quantization q4f16_1
-o dist/bundle/MiniCPM-2B-dpo-bf16-llama-format-q4f16_1
llama8b模型转化
mlc_llm convert_weight ./dist/models/Llama-3-8B-Instruct-llama-format/ --quantization q3f16_1 -o dist/bundle/Llama-3-8B-Instruct-llama-format-q3f16_1
生成MLC聊天配置
mlc_llm gen_config ./dist/models/MiniCPM-2B-dpo-bf16-llama-format/ --quantization q4f16_1 -
-conv-template redpajama_chat -o dist/bundle/MiniCPM-2B-dpo-bf16-llama-format-q4f16_1/
执行成功后, dist/bundle/MiniCPM-2B-dpo-bf16-llama-format-q4f16_1 目录下会多生成 mlc-chat-config.json 、 tokenizer.json 、tokenizer.model 、 tokenizer_config.json 四个文件。
mlc_llm gen_config ./dist/models/llama_3.1_0.5_4-30/ --quantization q4f16_1 --conv-template redpajama_chat -o dist/bundle/llama_3.1_0.5_4-30-q4f16_1/
mlc_llm gen_config ./dist/models/llama3_pruned/ --quantization q0f16 -
-conv-template redpajama_chat -o dist/bundle/llama3_pruned/
mlc_llm convert_weight ./dist/models/llama3_pruned/ --quantization q0f1
-o dist/bundle/llama3_pruned-format-q4f16
mlc_llm gen_config ./dist/models/llama3_pruned/ --quantization q0f16 -
-conv-template redpajama_chat -o dist/bundle/MiniCPM-2B-dpo-bf16-llama-format-q4f16_1/
编译安卓依赖库&jar包
把转换好的 MiniCPM-2B-dpo-bf16-llama-format-q4f16_1 模型复制到
mlc_llm\model_weights\hf\mlc-ai 目录下。(<font style="color:#DF2A3F;">model_weights需要创建</font>)若找不到会
去官网 https://huggingface.co/mlc-ai 下载。不建议去下载。下载模型配置文件在MLCChat/mlc-package-config.json内编辑。
mlc_llm package
会生成以下 /dist/lib/mlc4j 目录下的文件。一个<font style="color:#DF2A3F;">libtvm4j_runtime_packed.so</font>、<font style="color:#DF2A3F;">tvm4j_core.jar</font>。
构建apk
打开AS, 点击Build → Generate Signed Bundle / APK
启动AS过程中不小心将gradle给清空后,再次下载会很慢。可以使用国内腾讯源:
<font style="color:rgb(0, 0, 0);background-color:rgb(149, 236, 105);">https://mirrors.cloud.tencent.com/gradle/gradle-8.5-bin.zip</font>
拷贝模型到手机端
cd mlc-llm\android\MLCChat
python bundle_weight.py --apk-path app/release/app-release.apk
这里的release指的是在AS中需要设置应用前面编译构建正式应用。需要在操作6中完成。
mlc_llm convert_weight ./dist/models/MiniCPM-2B-dpo-bf16-llama-format/ --quantization q4f16_1
-o dist/bundle/MiniCPM-2B-dpo-bf16-llama-format-q4f16_1
python bundle_weight.py --apk-path app/debug/app-debug.apk
其他
<font style="color:rgb(56, 58, 66);">python -m pip </font><font style="color:rgb(64, 120, 242);">install</font><font style="color:rgb(56, 58, 66);"> -U mlc-llm-nightly-cu121.whl mlc-ai-nightly-cu121.whl</font>
<font style="color:rgb(56, 58, 66);">mlc_llm convert_weight ./dist/models/llama3_pruned/ --quantization q0f16 -o dist/bundle/llama3-pruned-format-q0f16</font>
<font style="color:rgb(56, 58, 66);">mlc_llm gen_config ./dist/models/llama3_pruned/ --quantization q0f16 --conv-template redpajama_chat -o dist/bundle/llama3-pruned-format-q0f16/</font>
mlc_llm convert_weight ./dist/models/llama3_pruned/ --quantization q4f16_1 -o dist/bundle/llama3-pruned-format-q4f16_1
mlc_llm gen_config ./dist/models/llama3_pruned/ --quantization q4f16_1 --conv-template redpajama_chat -o dist/bundle/llama3-pruned-format-q4f16_1/
/home/xj/sda/xj/mlc-llm-llama3/mlc-llm/android/MLCChat
可用路径
conda activate mlc-chat-cpm3
project path: /sda/xj/mlc-llm-llama3/mlc-llm/android/MLCChat
setting env:
注意点: 在/sda/xj/mlc-llm-llama3/mlc-llm目录执行
export ANDROID_NDK=/home/lenovo/Android/Sdk/ndk/26.1.10909125
export ANDROID_HOME=/home/lenovo/Android/Sdk
export PATH=$PATH:/home/lenovo/Android/Sdk/cmake/3.10.2.4988404/bin
export PATH=$PATH:/home/lenovo/Android/Sdk/platform-tools
export TVM_NDK_CC=$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android23-clang
export JAVA_HOME=/home/lenovo/.jdks/corretto-18.0.2
export PATH=$PATH:$JAVA_HOME/bin
export MLC_LLM_SOURCE_DIR=/sda/xj/mlc-llm-llama3/mlc-llm
export TVM_SOURCE_DIR=/sda/xj/mlc-llm-llama3/mlc-llm/3rdparty/tvm
注意点:在/sda/xj/mlc-llm-llama3/mlc-llm/android/MLCChat执行mlc_llm package
注意点:生成聊天配置也是在/sda/xj/mlc-llm-llama3/mlc-llm/android/MLCChat这个目录执行指令
192.168.1.129
export ANDROID_NDK=/home/xj/Android/Sdk/ndk/26.1.10909125
export ANDROID_HOME=/home/xj/Android/Sdk
export PATH=$PATH:/home/xj/Android/Sdk/cmake/3.10.2.4988404/bin
export PATH=$PATH:/home/xj/Android/Sdk/platform-tools
export TVM_NDK_CC=$ANDROID_NDK/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android23-clang
export JAVA_HOME=/home/xj/.jdks/corretto-18.0.2
export PATH=$PATH:$JAVA_HOME/bin
export MLC_LLM_SOURCE_DIR=/home/xj/mlc-llm
export TVM_SOURCE_DIR=/home/xj/mlc-llm/3rdparty/tvm
source $HOME/.cargo/env
// 转化模型权重
mlc_llm convert_weight ./dist/models/Qwen1.5-1.8B-Chat/ --quantization q4f16_1 -o dist/models/qwen1.5-1.8b-q4f16_1
// 生成聊天配置
mlc_llm gen_config ./dist/models/Qwen1.5-1.8B-Chat/ --quantization q4f16_1 --conv-template redpajama_chat -o dist/models/qwen1.5-1.8b-q4f16_1/
mlc_llm gen_config ./dist/models/Qwen1.5-1.8B-Chat \
    --model-type qwen2 \
    --quantization q4f16_1 \
    --conv-template chatml \
    --context-window-size 2048 \
    --max-batch-size 1 \
    -o dist/models/qwen1.5-1.8b-q4f16_1
自动化编译打包
进入android project下执行构建:
cd /sda/xj/mlc-llm-llama3/mlc-llm/android/MLCChat
使用Gradle Wrapper编译项目:
./gradlew build
打包Release版本的APK:
./gradlew assembleDebug
打包Release版本的APK:
./gradlew assembleRelease
清理项目:
./gradlew clean










网友评论