преди 5 месеца · b3a6fb94c8
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,4 @@
 
				+.git
			
 
				+node_modules
			
 
				+dist
			
 
				+.venv
			
--- a/.github/workflows/docker_publish.yml
+++ b/.github/workflows/docker_publish.yml
@@ -0,0 +1,58 @@
 
				+name: CI
			
 
				+
			
 
				+on:
			
 
				+  push:
			
 
				+    branches:
			
 
				+      - main
			
 
				+      - feat/*
			
 
				+      - develop
			
 
				+    tags:
			
 
				+      - v*
			
 
				+
			
 
				+jobs:
			
 
				+  build:
			
 
				+    runs-on: ubuntu-latest
			
 
				+
			
 
				+    steps:
			
 
				+      - name: Checkout code
			
 
				+        uses: actions/checkout@v3
			
 
				+      - name: Set up QEMU for ARM emulation
			
 
				+        uses: docker/setup-qemu-action@v2
			
 
				+        with:
			
 
				+          platforms: linux/amd64,linux/arm64
			
 
				+      - name: Login to ALIYUN Docker Hub
			
 
				+        uses: docker/login-action@v1 
			
 
				+        with:
			
 
				+          username: ${{ secrets.ALIYUN_DOCKER_HUB_USERNAME }}
			
 
				+          password: ${{ secrets.ALIYUN_DOCKER_HUB_ACCESS_TOKEN }}
			
 
				+          registry: registry.cn-hangzhou.aliyuncs.com
			
 
				+      - name: Set Docker tag
			
 
				+        id: docker_tag
			
 
				+        run: |
			
 
				+          if [ "${{ github.event_name }}" == "push" ] && [[ "${{ github.ref }}" == refs/tags/* ]]; then
			
 
				+            # Tag push, use the tag name directly
			
 
				+            echo "tag=${{ github.ref_name }}" >> $GITHUB_ENV
			
 
				+          else
			
 
				+            # Branch push, use branch name and short commit ID
			
 
				+            if [ "${{ github.ref_name }}" == "main" ]; then
			
 
				+              echo "tag=main-latest" >> $GITHUB_ENV
			
 
				+            else
			
 
				+              branch=$(echo "${{ github.ref_name }}" | sed 's/\//-/g')
			
 
				+              short_sha=$(echo "${{ github.sha }}" | cut -c1-8)
			
 
				+              echo "tag=${branch}-${short_sha}" >> $GITHUB_ENV
			
 
				+            fi
			
 
				+          fi
			
 
				+
			
 
				+      - name: Build and push adhweb docker image
			
 
				+        run: |
			
 
				+          docker buildx create --use
			
 
				+          docker buildx build --platform linux/amd64,linux/arm64 -t ${{secrets.ALIYUN_DOCKER_HUB_NAMESPACE}}/adh-web:${{ env.tag }} -f docker/adhWeb.Dockerfile . --push
			
 
				+      - name: Build and push adhserver docker image
			
 
				+        run: |
			
 
				+          docker buildx create --use
			
 
				+          docker buildx build --platform linux/amd64,linux/arm64 -t ${{secrets.ALIYUN_DOCKER_HUB_NAMESPACE}}/adh-api:${{ env.tag }} -f docker/adhServer.Dockerfile . --push
			
 
				+
			
 
				+
			
 
				+          
			
 
				+
			
 
				+          
			
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,45 @@
 
				+# IDE File
			
 
				+.idea
			
 
				+.vscode
			
 
				+
			
 
				+# log File
			
 
				+logs
			
 
				+*log.txt
			
 
				+
			
 
				+# Output File
			
 
				+outputs
			
 
				+*.wav
			
 
				+*.mp3
			
 
				+*.mp4
			
 
				+
			
 
				+# Python tmp File
			
 
				+__pycache__
			
 
				+
			
 
				+# web tmp File
			
 
				+node_modules
			
 
				+dist
			
 
				+package-lock.json
			
 
				+
			
 
				+# local Folder
			
 
				+data
			
 
				+volumes
			
 
				+config.yaml
			
 
				+.DS_Store
			
 
				+
			
 
				+# Environment variables (contains sensitive API keys)
			
 
				+.env
			
 
				+
			
 
				+# Electron build output
			
 
				+dist-electron
			
 
				+out
			
 
				+*.exe
			
 
				+*.dmg
			
 
				+*.AppImage
			
 
				+
			
 
				+# SSL Certificates (private keys should never be committed)
			
 
				+certs/*.key
			
 
				+certs/*.crt
			
 
				+certs/*.pem
			
 
				+certs/*.csr
			
 
				+certs/*.conf
			
 
				+!certs/README.md
			
--- a/CODE_CLEANUP_SUMMARY.md
+++ b/CODE_CLEANUP_SUMMARY.md
@@ -0,0 +1,91 @@
 
				+# 项目代码整理总结
			
 
				+
			
 
				+## 📋 整理完成时间
			
 
				+2025-02-03
			
 
				+
			
 
				+## ✅ 已完成的整理工作
			
 
				+
			
 
				+### 1. 删除未使用的依赖
			
 
				+- ✅ **已删除** `web/web` 文件夹
			
 
				+  - 包含未使用的 TensorFlow.js 和 BlazeFace 依赖
			
 
				+  - 项目实际使用后端 API 进行人脸检测，不需要这些前端库
			
 
				+
			
 
				+### 2. .gitignore 优化
			
 
				+- ✅ **已更新** `.gitignore` 文件，新增以下忽略规则：
			
 
				+  - Python 缓存文件：`__pycache__/`, `*.pyc`, `*.pyo`
			
 
				+  - 日志文件：`logs/`, `*.log`
			
 
				+  - 构建产物：`dist/`, `.next/`, `out/`, `build/`
			
 
				+  - 临时文件：`*.tmp`, `*.temp`, `*.cache`
			
 
				+  - 环境变量：`.env`, `.env.local`, `*.env`
			
 
				+  - SSL 证书：`scripts/certs/*.key`, `scripts/certs/*.crt`
			
 
				+  - OS 文件：`.DS_Store`, `Thumbs.db`
			
 
				+  - IDE 文件：`*.swp`, `*.swo`
			
 
				+
			
 
				+### 3. 敏感信息检查
			
 
				+- ✅ **已检查** 所有配置文件
			
 
				+  - 所有引擎配置文件中的 `api_key` 字段均为空字符串（安全）
			
 
				+  - `docker-compose.yaml` 使用环境变量 `${DASHSCOPE_API_KEY}`（安全）
			
 
				+  
			
 
				+- ⚠️ **需要注意**：
			
 
				+  - `configs/agents/difyAgent.yaml` 中包含示例 API key 和服务器地址
			
 
				+    - 第 18 行：`default: "http://47.110.48.75/v1"`
			
 
				+    - 第 26 行：`default: "app-gId1iPrVr9AtNWw1ZQ8CiUtv"`
			
 
				+    - 第 34 行：`default: "usky"`
			
 
				+  - **建议**：确认这些是否为示例值，如果是真实密钥需要移除或替换为占位符
			
 
				+
			
 
				+### 4. 项目结构检查
			
 
				+- ✅ 项目结构清晰，无明显的冗余文件
			
 
				+- ✅ 配置文件模板完整
			
 
				+- ✅ 所有必要的文档文件存在
			
 
				+
			
 
				+## 📝 建议的后续操作
			
 
				+
			
 
				+### 1. 敏感信息处理
			
 
				+如果 `configs/agents/difyAgent.yaml` 中的值是真实密钥：
			
 
				+```yaml
			
 
				+# 建议修改为：
			
 
				+default: ""  # 或 "your-api-key-here"
			
 
				+```
			
 
				+
			
 
				+### 2. 提交前检查
			
 
				+运行以下命令确认没有意外提交的文件：
			
 
				+```bash
			
 
				+# 检查被忽略的文件
			
 
				+git status --ignored
			
 
				+
			
 
				+# 检查是否有大文件
			
 
				+find . -type f -size +10M -not -path "./.git/*" -not -path "./node_modules/*"
			
 
				+
			
 
				+# 检查是否有敏感信息
			
 
				+git grep -i "api.*key\|password\|secret" -- configs/
			
 
				+```
			
 
				+
			
 
				+### 3. 清理已跟踪的临时文件（如果需要）
			
 
				+如果之前有临时文件被提交，需要从 Git 中移除：
			
 
				+```bash
			
 
				+# 移除已跟踪的 __pycache__ 目录
			
 
				+git rm -r --cached digitalHuman/**/__pycache__
			
 
				+
			
 
				+# 移除已跟踪的日志文件
			
 
				+git rm -r --cached logs/
			
 
				+
			
 
				+# 移除已跟踪的构建产物
			
 
				+git rm -r --cached web/dist/ meet/dist/
			
 
				+```
			
 
				+
			
 
				+## 📊 整理统计
			
 
				+
			
 
				+- **删除的文件夹**：1 个（`web/web`）
			
 
				+- **更新的文件**：2 个（`.gitignore`, 新增检查清单）
			
 
				+- **检查的配置文件**：15+ 个
			
 
				+- **发现的潜在问题**：1 个（需要确认的 API key）
			
 
				+
			
 
				+## ✨ 整理后的项目状态
			
 
				+
			
 
				+项目代码已整理完毕，可以安全提交。主要改进：
			
 
				+1. ✅ 移除了未使用的依赖
			
 
				+2. ✅ 完善了 .gitignore 配置
			
 
				+3. ✅ 检查了敏感信息
			
 
				+4. ✅ 验证了项目结构
			
 
				+
			
 
				+**注意**：提交前请确认 `configs/agents/difyAgent.yaml` 中的 API key 是否为示例值。
			
--- a/LICENSE
+++ b/LICENSE
@@ -0,0 +1,21 @@
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2023 wan-h
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
--- a/PRE_COMMIT_CHECKLIST.md
+++ b/PRE_COMMIT_CHECKLIST.md
@@ -0,0 +1,83 @@
 
				+# 代码提交前检查清单
			
 
				+
			
 
				+## ✅ 已完成检查项
			
 
				+
			
 
				+### 1. .gitignore 配置
			
 
				+- ✅ Python 缓存文件 (`__pycache__/`, `*.pyc`)
			
 
				+- ✅ 日志文件 (`logs/`, `*.log`)
			
 
				+- ✅ 构建产物 (`dist/`, `build/`, `out/`)
			
 
				+- ✅ 依赖文件 (`node_modules/`, `package-lock.json`)
			
 
				+- ✅ 环境变量文件 (`.env`, `*.env`)
			
 
				+- ✅ SSL 证书私钥 (`*.key`, `*.crt`, `*.pem`)
			
 
				+- ✅ 临时文件 (`*.tmp`, `*.cache`)
			
 
				+- ✅ 输出文件 (`outputs/`, `*.wav`, `*.mp3`, `*.mp4`)
			
 
				+
			
 
				+### 2. 敏感信息检查
			
 
				+- ⚠️ **需要检查**: `configs/agents/difyAgent.yaml` 中包含示例 API key 和默认值
			
 
				+  - 位置: 第 18 行 (api_server), 第 26 行 (api_key), 第 34 行 (username)
			
 
				+  - 建议: 确认这些是否为示例值，如果是真实密钥需要移除
			
 
				+
			
 
				+### 3. 项目结构清理
			
 
				+- ✅ 已删除未使用的 `web/web` 文件夹（包含未使用的 TensorFlow.js 依赖）
			
 
				+- ✅ 检查了项目结构，无明显的冗余文件
			
 
				+
			
 
				+### 4. 配置文件
			
 
				+- ✅ 配置文件模板完整 (`configs/config_template.yaml`)
			
 
				+- ✅ 所有引擎和 Agent 配置文件存在
			
 
				+- ✅ 配置文件中的 API key 字段均为空字符串（安全）
			
 
				+
			
 
				+## ⚠️ 需要手动检查项
			
 
				+
			
 
				+### 1. 敏感信息清理
			
 
				+检查以下文件是否包含真实的 API 密钥或密码：
			
 
				+- `configs/agents/difyAgent.yaml` - 包含示例 API key
			
 
				+- `docker-compose.yaml` - 使用环境变量 `${DASHSCOPE_API_KEY}`（安全）
			
 
				+- 所有 `configs/` 目录下的 YAML 文件
			
 
				+
			
 
				+### 2. 临时文件清理
			
 
				+确认以下目录/文件是否需要提交：
			
 
				+- `logs/` - 应该被 .gitignore 忽略
			
 
				+- `__pycache__/` - 应该被 .gitignore 忽略
			
 
				+- `images.rar` - 检查是否需要提交（如果是资源文件可能需要）
			
 
				+
			
 
				+### 3. 构建产物
			
 
				+确认以下目录不应提交：
			
 
				+- `web/dist/` - Next.js 构建产物
			
 
				+- `meet/dist/` - Next.js 构建产物
			
 
				+- `web/android/app/build/` - Android 构建产物
			
 
				+
			
 
				+### 4. 证书文件
			
 
				+- `scripts/certs/*.crt` 和 `scripts/certs/*.key` - 应该被忽略（已在 .gitignore 中）
			
 
				+
			
 
				+## 📝 建议操作
			
 
				+
			
 
				+1. **清理敏感信息**：
			
 
				+   ```bash
			
 
				+   # 检查是否有真实的 API key 被提交
			
 
				+   git grep -i "api.*key\|password\|secret" -- configs/
			
 
				+   ```
			
 
				+
			
 
				+2. **确认 .gitignore 生效**：
			
 
				+   ```bash
			
 
				+   # 检查是否有应该被忽略的文件被跟踪
			
 
				+   git status --ignored
			
 
				+   ```
			
 
				+
			
 
				+3. **检查大文件**：
			
 
				+   ```bash
			
 
				+   # 查找可能不应该提交的大文件
			
 
				+   find . -type f -size +10M -not -path "./.git/*" -not -path "./node_modules/*"
			
 
				+   ```
			
 
				+
			
 
				+4. **验证配置文件**：
			
 
				+   - 确认所有配置文件中的敏感字段都是空值或示例值
			
 
				+   - 确认 `config.yaml` 文件在 .gitignore 中（本地配置文件不应提交）
			
 
				+
			
 
				+## 🚀 提交前最后检查
			
 
				+
			
 
				+- [ ] 运行 `git status` 确认没有意外添加的文件
			
 
				+- [ ] 运行 `git diff` 检查所有更改
			
 
				+- [ ] 确认没有硬编码的 API 密钥或密码
			
 
				+- [ ] 确认所有构建产物和临时文件都被忽略
			
 
				+- [ ] 确认日志文件不会被提交
			
 
				+- [ ] 确认证书私钥不会被提交
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,156 @@
 
				+# AWESOME-DIGITAL-HUMAN
			
 
				+**打造有温度的数字人**  
			
 
				+**给数字人注入灵魂**  
			
 
				+---  
			
 
				+🎉🎉🎉 社区官网公测版本正式发布: https://www.light4ai.com  
			
 
				+[B站视频-社区官网介绍](https://www.bilibili.com/video/BV1YN72z7EBz)  
			
 
				+官网在开源版本基础上额外支持(详情见[操作指南](https://light4ai.feishu.cn/docx/XmGFd5QJwoBdDox8M7zcAcRJnje)):  
			
 
				+* 个人应用管理  
			
 
				+* 内置服务接入  
			
 
				+* 限定主题  
			
 
				+* 应用分享(链接分享、网页嵌入分享)
			
 
				+###### *社区业余时间发电，你的star是我们最大的动力，感谢！*
			
 
				+---  
			
 
				+
			
 
				+## 演示
			
 
				+https://github.com/user-attachments/assets/6596fdb6-d9a1-4936-8c3d-312c683690b6
			
 
				+
			
 
				+## 主要特性
			
 
				+* 支持 Docker 快速部署
			
 
				+* 超轻量级，配置要求低于2核2G
			
 
				+* 支持 Dify/FastGPT/Coze 等编排框架服务接入
			
 
				+* 支持 ASR、LLM、TTS、Agent 模块化扩展
			
 
				+* 支持 Live2d 人物模型扩展和控制方式
			
 
				+* 支持PC端和移动端web访问
			
 
				+* 支持沉浸式智能对话  
			
 
				+PC端页面预览：  
			
 
				+![](./assets/pc_web.png)  
			
 
				+移动端页面预览：  
			
 
				+![](./assets/phone_web.png)
			
 
				+
			
 
				+## 设计架构
			
 
				+大模型的厂商众多、各种工具繁多、要打造自己的数字人需要一定的代码能力和时间投入。  
			
 
				+可通过Coding扩展模块，让一切变得高度定制化。  
			
 
				+可通过Agent编排框架，让一切变得更加简单。  
			
 
				+![](./assets/arch.png)
			
 
				+
			
 
				+## 模式支持
			
 
				+> **交互模式**  
			
 
				+* 对话模式：专注于数字人文字交互  
			
 
				+* 沉浸模式：专注与数字人之间拟人方式的直接交互  
			
 
				+> **Agent模式**
			
 
				+* ReapterAgent（测试使用）：重复用户输入的语句  
			
 
				+* DifyAgent：接入Dify的服务  
			
 
				+* FastgptAgent：接入fastgpt的服务  
			
 
				+* CozeAgent：接入coze的服务
			
 
				+* OpenaiAgent：接入适配openai接口的服务  
			
 
				+
			
 
				+## 版本记录
			
 
				+> ### v1.0.0
			
 
				+**界面简约，注重模块扩展性**
			
 
				+* [v1.0.0 - 2024-06-25](https://github.com/wan-h/awesome-digital-human-live2d/tree/v1.0.0)
			
 
				+  * 前端架构：react + antD
			
 
				+  * 后端架构：fastapi
			
 
				+  * ASR已接入：baiduAPI、googleAPI
			
 
				+  * LLM已接入：baiduAPI、openaiAPI
			
 
				+  * TTS已接入：baiduAPI、edgeAPI
			
 
				+  * Agent支持：repeater(复读机)、dialogue(对话)
			
 
				+  * 人物类型支持：女友（1）、心理师（1）、素人（11）
			
 
				+> ### v2.0.0
			
 
				+**拥抱Dify生态，打造自己的数字人灵魂**
			
 
				+* [v2.0.0 - 2024-08-08](https://github.com/wan-h/awesome-digital-human-live2d/tree/v2.0.0)
			
 
				+  * 前端页面全面升级：nextjs + nextui + tailwind
			
 
				+  * 前端页面兼容移动端访问
			
 
				+  * 前端支持两种交互模式：聊天模式、数字人模式
			
 
				+  * 前端支持人物模型和背景切换以及个人定制扩展
			
 
				+  * Agent支持：difyAgent（ASR、TTS均可接入Dify）、FastGPTAgent、OpenaiAgent
			
 
				+> ### v3.0.0
			
 
				+**强化交互体验**
			
 
				+* [v3.0.0 - 2025-06-01](https://github.com/wan-h/awesome-digital-human-live2d/tree/main)
			
 
				+  * 前端页面全面升级：nextjs + heroui + tailwind
			
 
				+  * 支持动态背景
			
 
				+  * 沉浸模式（实时交互、对话打断等等直接交互方式优化）
			
 
				+  * 支持流式引擎([协议文档](./docs/streaming_protocol.md))
			
 
				+    * FunASR streaming(在沉浸模式可选)  
			
 
				+  * Agent扩展支持：CozeAgent（ASR、TTS均可接入Coze）
			
 
				+
			
 
				+## TODOList
			
 
				+- [ ] rtc音视频流支持
			
 
				+- [ ] 跨模态交互支持(麦克风/摄像头)
			
 
				+- [ ] 人物模型AI生成尝试
			
 
				+- [ ] 情感控制人物表情动作支持
			
 
				+
			
 
				+## 部署&开发
			
 
				+[部署说明](./docs/deploy_instrction.md)  
			
 
				+[Windows 部署指南](./docs/WINDOWS_DEPLOYMENT.md)（推荐）  
			
 
				+[开发说明](./docs/developer_instrction.md)  
			
 
				+[v2.0.0 常见问题](./docs/Q&A.md)  
			
 
				+[Edge 展台模式使用指南](./docs/kiosk-mode-guide.md)  
			
 
				+[启动说明](./docs/启动说明.md)  
			
 
				+[项目结构说明](./docs/PROJECT_STRUCTURE.md)
			
 
				+
			
 
				+### 快速启动（开发模式 + Chrome 全屏）
			
 
				+项目提供了便捷的启动脚本，可以一键启动前端和后端服务，并自动在 Chrome 全屏模式下打开：
			
 
				+
			
 
				+**Windows 用户：**
			
 
				+- **批处理脚本**（推荐）：双击 `scripts/start-digital-human-chrome.bat` 文件
			
 
				+- 详细使用说明请参考 [启动说明](./docs/启动说明.md)
			
 
				+
			
 
				+**功能说明：**
			
 
				+1. 自动启动后端服务（Python FastAPI，端口 8880）
			
 
				+2. 自动启动前端服务（Next.js，端口 3000）
			
 
				+3. 自动检测服务就绪状态
			
 
				+4. 服务就绪后自动使用 Chrome 全屏模式打开前端页面
			
 
				+
			
 
				+**使用要求：**
			
 
				+- 已安装 Python 3.x 和所需依赖（运行 `pip install -r requirements.txt`）
			
 
				+- 已安装 Node.js 和 npm
			
 
				+- 已安装 Google Chrome 浏览器
			
 
				+
			
 
				+**注意事项：**
			
 
				+- 首次运行会自动安装前端依赖（如果 node_modules 不存在）
			
 
				+- 后端和前端服务会在独立窗口中运行，方便查看日志
			
 
				+- 按 `Alt+F4` 退出 Chrome 全屏模式
			
 
				+- 按 `F11` 切换全屏模式
			
 
				+- 要停止服务，请关闭后端和前端服务窗口  
			
 
				+
			
 
				+[v2.0.0 B站视频教程-部署](https://www.bilibili.com/video/BV1szePeaEak/)  
			
 
				+[v2.0.0 B站视频教程-All-in-Dify部署](https://www.bilibili.com/video/BV1kZWvesE25/)
			
 
				+
			
 
				+## Love & Share
			
 
				+**知乎板块**  
			
 
				+[数字人-定义数字世界中的你](https://zhuanlan.zhihu.com/p/676746017)  
			
 
				+[RAG架构浅析](https://zhuanlan.zhihu.com/p/703262854)  
			
 
				+[dify源码解析-RAG](https://zhuanlan.zhihu.com/p/704341817)  
			
 
				+[RAG-索引之PDF文档解析](https://zhuanlan.zhihu.com/p/707271297)  
			
 
				+[Dify打造专属数字人灵魂](https://zhuanlan.zhihu.com/p/714961925)  
			
 
				+[数字人的All in Dify](https://zhuanlan.zhihu.com/p/716359038)  
			
 
				+[数字人的All in Coze](https://zhuanlan.zhihu.com/p/1928506957968413871)
			
 
				+  
			
 
				+**微信公众号板块**  
			
 
				+[数字人-定义数字世界中的你](https://mp.weixin.qq.com/s/SQvFysHO8daN0HMA0AaJZw)  
			
 
				+[RAG架构浅析](https://mp.weixin.qq.com/s/4iWrJonD8_kjxw4ILibzSw)  
			
 
				+[dify源码解析-RAG](https://mp.weixin.qq.com/s/muCTFTWLY8j5UtxwCaW93A)  
			
 
				+[RAG-索引之PDF文档解析](https://mp.weixin.qq.com/s/innbTL6aeOsl9vyJSN6yBw)  
			
 
				+[Dify打造专属数字人灵魂](https://mp.weixin.qq.com/s/3B4YgYjDY42DNTgE76XOtw)  
			
 
				+[数字人的All in Dify](https://mp.weixin.qq.com/s/Uf17jWpjVzAfzX42TP09gw)  
			
 
				+[数字人的All in Coze](https://mp.weixin.qq.com/s/DbFUmmxBmlPgMOQ16tRDfw)
			
 
				+
			
 
				+**Dify 官方板块**  
			
 
				+[Dify公众号文章：使用 Dify 打造数字人灵魂](https://mp.weixin.qq.com/s?__biz=Mzg5MDkyOTY3NA==&mid=2247486070&idx=3&sn=0911ba8723278a83c1554afd2de861ab&chksm=cefc58effe2456e39a9f0f0afac4ec5447bb1aafff42a68d05b2a3f523baae299b93d7ae6ff9&mpshare=1&scene=1&srcid=1021NXKMC2W697dCXEwqsCkN&sharer_shareinfo=93041ce9bdefcde0aa121d27a3f3f6dd&sharer_shareinfo_first=8c8f03435bc9af5236a4505b831d1388&exportkey=n_ChQIAhIQQaNAHzm7bGdYinsq2L2zbRKfAgIE97dBBAEAAAAAANTKKNX7j3cAAAAOpnltbLcz9gKNyK89dVj0%2F3Ojxo5%2FA9C00dmnAyJraAwSYIfMr4csl8xZvE%2FSwCi3nKbPJZ4mnLdQdVm2EQP2SNJQIMUqV1PGB%2BGpSSdjOs6L7ejtFS9GCpkr6LMmAKVW904Tu4tGhZwjaU14QjLRGXZ7rQEKMOQjdQTyDf%2BluwFEDAXlLMozezq6ypTwXIu0HoLjs4Q6x4gtHS%2BpH6vhOfGgR7LtVbZcXAFFWokyvREiMuHayOSrjtpDD9CQK5KYELY7Ejd%2B48JRj7dRJZiAGebg2KRYtB7%2BpJqgyKaNO4mCcT%2BT9KjHq4WIssWaF0Vq5G4D2el%2FhIgfuEpreoR1hUKOMkcBiAXZ&acctmode=0&pass_ticket=Tg8MLw6UPqgdcjRxs7YP26i09LNlJcKEH%2Bw9YwPdaE4OzNwhW7RbDzgVM3X5rkY1&wx_header=0#rd)
			
 
				+
			
 
				+**产研板块**  
			
 
				+[数字人调研问卷](https://ec5cjmeodk.feishu.cn/share/base/dashboard/shrcnu1DNMUCTU18f5tF2q9qoQh)（感谢 [@plumixius](https://github.com/plumixius) 同学）
			
 
				+
			
 
				+## Thanks
			
 
				+### 开源项目
			
 
				+* [Dify](https://github.com/langgenius/dify)  
			
 
				+* [Live2D](https://github.com/Live2D)  
			
 
				+* [FunASR](https://github.com/modelscope/FunASR)
			
 
				+* 源码中涉及到的所有库作者
			
 
				+
			
 
				+## 社区联系
			
 
				+**扫码请备注 ADH**    
			
 
				+| 商务合作 | 兴趣小组 |
			
 
				+| --- | --- |
			
 
				+| ![](assets/wechat_2.png) | ![](assets/wechat_1.png) |
			
--- a/build/README.md
+++ b/build/README.md
@@ -0,0 +1,40 @@
 
				+# 构建资源目录
			
 
				+
			
 
				+## 图标文件
			
 
				+
			
 
				+请在此目录放置应用图标文件：
			
 
				+
			
 
				+- `icon.ico` - Windows 图标文件（256x256 或更大，包含多个尺寸）
			
 
				+
			
 
				+### 如何创建图标
			
 
				+
			
 
				+1. 准备一个 512x512 或更大的 PNG 图片
			
 
				+2. 使用在线工具转换为 ICO 格式：
			
 
				+   - https://convertio.co/zh/png-ico/
			
 
				+   - https://www.icoconverter.com/
			
 
				+3. 将生成的 `icon.ico` 文件放在此目录
			
 
				+
			
 
				+### 图标要求
			
 
				+
			
 
				+- 格式：ICO
			
 
				+- 尺寸：至少 256x256，建议包含多个尺寸（16x16, 32x32, 48x48, 256x256）
			
 
				+- 背景：透明或纯色
			
 
				+
			
 
				+如果没有图标文件，electron-builder 会使用默认图标。
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
--- a/build/icon.ico
+++ b/build/icon.ico
--- a/build/sdogu-08l3j-001.ico
+++ b/build/sdogu-08l3j-001.ico
--- a/certs/README.md
+++ b/certs/README.md
@@ -0,0 +1,142 @@
 
				+# SSL 证书目录
			
 
				+
			
 
				+此目录用于存放 SSL/TLS 证书文件。
			
 
				+
			
 
				+## 生成自签名证书
			
 
				+
			
 
				+### Linux/Mac
			
 
				+
			
 
				+```bash
			
 
				+chmod +x scripts/generate_ssl_cert.sh
			
 
				+./scripts/generate_ssl_cert.sh
			
 
				+```
			
 
				+
			
 
				+### Windows
			
 
				+
			
 
				+**前提条件**: 需要先安装 OpenSSL
			
 
				+
			
 
				+#### 安装 OpenSSL (Windows)
			
 
				+
			
 
				+**详细安装指南请查看**: [docs/INSTALL_OPENSSL_WINDOWS.md](../docs/INSTALL_OPENSSL_WINDOWS.md)
			
 
				+
			
 
				+**快速安装方式**：
			
 
				+
			
 
				+1. **使用 Git for Windows** (推荐，最简单)
			
 
				+   - Git for Windows 自带 OpenSSL
			
 
				+   - 下载: https://git-scm.com/download/win
			
 
				+   - 安装后，OpenSSL 会自动添加到 PATH
			
 
				+   - **安装目录**: 默认 `C:\Program Files\Git`（自动配置，无需手动设置）
			
 
				+
			
 
				+2. **手动安装 OpenSSL** (如果不想安装 Git)
			
 
				+   - 下载: https://slproweb.com/products/Win32OpenSSL.html
			
 
				+   - 选择 "Win64 OpenSSL v3.x.x Light"（推荐）
			
 
				+   - **安装目录**: 推荐 `C:\Program Files\OpenSSL-Win64`
			
 
				+   - 安装时勾选 "Copy OpenSSL DLLs to The Windows system directory"
			
 
				+   - **配置 PATH**: 添加 `C:\Program Files\OpenSSL-Win64\bin` 到系统 PATH
			
 
				+   - 详细步骤请查看上面的安装指南
			
 
				+
			
 
				+3. **使用 Chocolatey** (如果已安装)
			
 
				+   ```cmd
			
 
				+   choco install openssl
			
 
				+   ```
			
 
				+
			
 
				+4. **使用 WSL** (如果已安装 WSL)
			
 
				+   ```bash
			
 
				+   sudo apt-get update
			
 
				+   sudo apt-get install openssl
			
 
				+   ```
			
 
				+
			
 
				+#### 验证 OpenSSL 安装
			
 
				+
			
 
				+打开命令提示符或 PowerShell，运行：
			
 
				+
			
 
				+```cmd
			
 
				+openssl version
			
 
				+```
			
 
				+
			
 
				+如果显示版本信息，说明安装成功。
			
 
				+
			
 
				+#### 生成证书
			
 
				+
			
 
				+```cmd
			
 
				+scripts\generate_ssl_cert.bat
			
 
				+```
			
 
				+
			
 
				+## 手动生成证书
			
 
				+
			
 
				+如果脚本无法运行，可以手动执行以下命令：
			
 
				+
			
 
				+### 1. 生成私钥
			
 
				+
			
 
				+```bash
			
 
				+openssl genrsa -out certs/server.key 2048
			
 
				+```
			
 
				+
			
 
				+### 2. 生成证书签名请求 (CSR)
			
 
				+
			
 
				+```bash
			
 
				+openssl req -new -key certs/server.key -out certs/server.csr -subj "/C=CN/ST=State/L=City/O=Organization/CN=localhost"
			
 
				+```
			
 
				+
			
 
				+### 3. 创建配置文件 (server.conf)
			
 
				+
			
 
				+创建文件 `certs/server.conf`，内容如下：
			
 
				+
			
 
				+```
			
 
				+[req]
			
 
				+distinguished_name = req_distinguished_name
			
 
				+req_extensions = v3_req
			
 
				+
			
 
				+[req_distinguished_name]
			
 
				+
			
 
				+[v3_req]
			
 
				+basicConstraints = CA:FALSE
			
 
				+keyUsage = nonRepudiation, digitalSignature, keyEncipherment
			
 
				+subjectAltName = @alt_names
			
 
				+
			
 
				+[alt_names]
			
 
				+DNS.1 = localhost
			
 
				+DNS.2 = *.localhost
			
 
				+IP.1 = 127.0.0.1
			
 
				+IP.2 = ::1
			
 
				+```
			
 
				+
			
 
				+### 4. 生成自签名证书
			
 
				+
			
 
				+```bash
			
 
				+openssl x509 -req -days 365 -in certs/server.csr -signkey certs/server.key -out certs/server.crt -extensions v3_req -extfile certs/server.conf
			
 
				+```
			
 
				+
			
 
				+### 5. 清理临时文件
			
 
				+
			
 
				+```bash
			
 
				+rm certs/server.csr certs/server.conf
			
 
				+```
			
 
				+
			
 
				+## 使用证书
			
 
				+
			
 
				+生成证书后，服务器会自动检测 `certs/server.key` 和 `certs/server.crt` 文件。
			
 
				+
			
 
				+如果存在这些文件，服务器将自动启用 HTTPS。
			
 
				+
			
 
				+访问地址：
			
 
				+- HTTP: `http://localhost:8000`
			
 
				+- HTTPS: `https://localhost:8000`
			
 
				+
			
 
				+## 浏览器警告
			
 
				+
			
 
				+自签名证书会在浏览器中显示安全警告，这是正常的。您可以：
			
 
				+
			
 
				+1. **Chrome/Edge**: 点击"高级" -> "继续前往 localhost（不安全）"
			
 
				+2. **Firefox**: 点击"高级" -> "接受风险并继续"
			
 
				+3. **Safari**: 点击"显示详细信息" -> "访问此网站"
			
 
				+
			
 
				+## 生产环境
			
 
				+
			
 
				+⚠️ **重要**: 自签名证书仅用于开发环境。在生产环境中，请使用由受信任的 CA（如 Let's Encrypt）签发的证书。
			
 
				+
			
 
				+## 文件说明
			
 
				+
			
 
				+- `server.key`: 私钥文件（请妥善保管，不要泄露）
			
 
				+- `server.crt`: 证书文件
			
 
				+- `.gitignore`: 已配置忽略证书文件，不会提交到版本控制
			
--- a/configs/agents/cozeAgent.yaml
+++ b/configs/agents/cozeAgent.yaml
@@ -0,0 +1,28 @@
 
				+NAME: "Coze"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Coze智能体"
			
 
				+META: {
			
 
				+  official: "https://www.coze.cn/",
			
 
				+  configuration: "",
			
 
				+  tips: "支持接入云端的Coze智能体",
			
 
				+  fee: ""
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "token",
			
 
				+    description: "Coze Token.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "bot_id",
			
 
				+    description: "Coze bot_id.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  }
			
 
				+]
			
--- a/configs/agents/difyAgent.yaml
+++ b/configs/agents/difyAgent.yaml
@@ -0,0 +1,36 @@
 
				+NAME: "Dify"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Dify应用"
			
 
				+META: {
			
 
				+  official: "https://dify.ai/",
			
 
				+  configuration: "https://mp.weixin.qq.com/s/YXyHYN1dC_nJAOCco7ZJjg",
			
 
				+  tips: "支持本地部署的Dify应用",
			
 
				+  fee: ""
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "api_server",
			
 
				+    description: "Dify API Server.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "http://47.110.48.75/v1"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "api_key",
			
 
				+    description: "Dify API Key.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "app-gId1iPrVr9AtNWw1ZQ8CiUtv"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "username",
			
 
				+    description: "Dify Username.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "usky"
			
 
				+  }
			
 
				+]
			
--- a/configs/agents/fastgptAgent.yaml
+++ b/configs/agents/fastgptAgent.yaml
@@ -0,0 +1,36 @@
 
				+NAME: "FastGPT"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入FastGPT应用"
			
 
				+META: {
			
 
				+  official: "https://fastgpt.cn",
			
 
				+  configuration: "FastGPT云服务: https://cloud.fastgpt.cn",
			
 
				+  tips: "",
			
 
				+  fee: ""
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "base_url",
			
 
				+    description: "FastGPT base url.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "api_key",
			
 
				+    description: "FastGPT API Key.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "uid",
			
 
				+    description: "FastGPT customUid.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "adh"
			
 
				+  }
			
 
				+]
			
--- a/configs/agents/openaiAPI.yaml
+++ b/configs/agents/openaiAPI.yaml
@@ -0,0 +1,36 @@
 
				+NAME: "OpenAI"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Openai协议的服务"
			
 
				+META: {
			
 
				+  official: "",
			
 
				+  configuration: "",
			
 
				+  tips: "兼容所有符合Openai协议的API",
			
 
				+  fee: ""
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "model",
			
 
				+    description: "ID of the model to use.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "base_url",
			
 
				+    description: "The base url for request.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "https://api.openai.com/v1"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "api_key",
			
 
				+    description: "The api key for request.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  }
			
 
				+]
			
--- a/configs/agents/repeaterAgent.yaml
+++ b/configs/agents/repeaterAgent.yaml
@@ -0,0 +1,9 @@
 
				+NAME: "Repeater"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "复读机"
			
 
				+META: {
			
 
				+  official: "",
			
 
				+  configuration: "",
			
 
				+  tips: "测试使用",
			
 
				+  fee: ""
			
 
				+}
			
--- a/configs/config_template.yaml
+++ b/configs/config_template.yaml
@@ -0,0 +1,21 @@
 
				+COMMON:
			
 
				+  NAME: "Awesome-Digital-Human"
			
 
				+  VERSION: "v3.0.0"
			
 
				+  LOG_LEVEL: "DEBUG"
			
 
				+SERVER:
			
 
				+  IP: "0.0.0.0"
			
 
				+  PORT: 8880
			
 
				+  WORKSPACE_PATH: "./outputs"
			
 
				+  ENGINES:
			
 
				+    ASR: 
			
 
				+      SUPPORT_LIST: [ "difyAPI.yaml", "cozeAPI.yaml", "tencentAPI.yaml", "funasrStreamingAPI.yaml", "dashscopeAPI.yaml", "dashscopeStreamingAPI.yaml"]
			
 
				+      DEFAULT: "dashscopeAPI.yaml"
			
 
				+    TTS: 
			
 
				+      SUPPORT_LIST: [ "edgeAPI.yaml", "tencentAPI.yaml", "difyAPI.yaml", "cozeAPI.yaml" ]
			
 
				+      DEFAULT: "difyAPI.yaml"
			
 
				+    LLM:
			
 
				+      SUPPORT_LIST: []
			
 
				+      DEFAULT: ""
			
 
				+  AGENTS:
			
 
				+    SUPPORT_LIST: [ "repeaterAgent.yaml", "openaiAPI.yaml", "difyAgent.yaml", "fastgptAgent.yaml", "cozeAgent.yaml" ]
			
 
				+    DEFAULT: "difyAgent.yaml"
			
--- a/configs/engines/asr/cozeAPI.yaml
+++ b/configs/engines/asr/cozeAPI.yaml
@@ -0,0 +1,21 @@
 
				+NAME: "Coze"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Coze智能体"
			
 
				+META: {
			
 
				+  official: "https://www.coze.cn/",
			
 
				+  configuration: "",
			
 
				+  tips: "支持接入云端的Coze智能体",
			
 
				+  fee: "",
			
 
				+  infer_type: "normal"
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "token",
			
 
				+    description: "Coze Token.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  }
			
 
				+]
			
--- a/configs/engines/asr/dashscopeAPI.yaml
+++ b/configs/engines/asr/dashscopeAPI.yaml
@@ -0,0 +1,65 @@
 
				+# Dashscope (阿里云通义千问) Fun-ASR Configuration
			
 
				+# For details, see: https://help.aliyun.com/zh/dashscope/developer-reference/asr-api
			
 
				+NAME: dashscopeASR
			
 
				+VERSION: "v2.0.0"
			
 
				+DESC: "阿里云通义千问 Fun-ASR 实时语音识别（支持通义千问3-ASR-Flash）"
			
 
				+META:
			
 
				+  official: "https://help.aliyun.com/zh/dashscope/"
			
 
				+  tips: "使用阿里云通义千问的 Fun-ASR API 进行语音识别，支持中英文混合识别。推荐使用 fun-asr-realtime 模型（通义千问3-ASR-Flash）"
			
 
				+  fee: "付费"
			
 
				+  infer_type: "normal"
			
 
				+CUSTOM:
			
 
				+  api_key: "sk-b4c852e0727f4b0c90bb191842dfe0a0"
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "model",
			
 
				+    description: "识别模型（推荐使用fun-asr-realtime，即通义千问3-ASR-Flash）",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: ["fun-asr-realtime", "paraformer-realtime-v2", "paraformer-v2"],
			
 
				+    default: "fun-asr-realtime"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "sample_rate",
			
 
				+    description: "音频采样率",
			
 
				+    type: "int",
			
 
				+    required: false,
			
 
				+    choices: [8000, 16000],
			
 
				+    default: 16000
			
 
				+  },
			
 
				+  {
			
 
				+    name: "format",
			
 
				+    description: "音频格式",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: ["pcm", "wav", "mp3", "opus", "speex", "aac"],
			
 
				+    default: "mp3"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "language_hints",
			
 
				+    description: "语言提示（仅 paraformer-realtime-v2 和 paraformer-v2 支持）",
			
 
				+    type: "list",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ["zh", "en"]
			
 
				+  }
			
 
				+  ,
			
 
				+  {
			
 
				+    name: "wake_word",
			
 
				+    description: "唤醒词（识别到后才开始交互）",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "小天小天"
			
 
				+  }
			
 
				+  ,
			
 
				+  {
			
 
				+    name: "inactivity_seconds",
			
 
				+    description: "空闲超时秒数（识别调用超时）",
			
 
				+    type: "int",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: 300
			
 
				+  }
			
 
				+]
			
 
				+
			
--- a/configs/engines/asr/dashscopeStreamingAPI.yaml
+++ b/configs/engines/asr/dashscopeStreamingAPI.yaml
@@ -0,0 +1,47 @@
 
				+# Dashscope (阿里云通义千问) Fun-ASR Streaming Configuration
			
 
				+# For details, see: https://help.aliyun.com/zh/dashscope/developer-reference/asr-api
			
 
				+NAME: dashscopeStreamingASR
			
 
				+VERSION: "v1.0.0"
			
 
				+DESC: "阿里云通义千问 Fun-ASR 流式实时语音识别"
			
 
				+META:
			
 
				+  official: "https://help.aliyun.com/zh/dashscope/"
			
 
				+  tips: "使用阿里云通义千问的 Fun-ASR 流式 API 进行实时语音识别，支持中英文混合识别，实时返回识别结果"
			
 
				+  fee: "付费"
			
 
				+  infer_type: "stream"
			
 
				+CUSTOM:
			
 
				+  api_key: ""
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "model",
			
 
				+    description: "识别模型",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: ["fun-asr-realtime", "paraformer-realtime-v2", "paraformer-v2"],
			
 
				+    default: "fun-asr-realtime"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "sample_rate",
			
 
				+    description: "音频采样率",
			
 
				+    type: "int",
			
 
				+    required: false,
			
 
				+    choices: [8000, 16000],
			
 
				+    default: 16000
			
 
				+  },
			
 
				+  {
			
 
				+    name: "format",
			
 
				+    description: "音频格式",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: ["pcm", "wav"],
			
 
				+    default: "pcm"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "language_hints",
			
 
				+    description: "语言提示（仅 paraformer-realtime-v2 支持）",
			
 
				+    type: "list",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ["zh", "en"]
			
 
				+  }
			
 
				+]
			
 
				+
			
--- a/configs/engines/asr/difyAPI.yaml
+++ b/configs/engines/asr/difyAPI.yaml
@@ -0,0 +1,37 @@
 
				+NAME: "Dify"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Dify应用"
			
 
				+META: {
			
 
				+  official: "https://dify.ai/",
			
 
				+  configuration: "https://mp.weixin.qq.com/s/YXyHYN1dC_nJAOCco7ZJjg",
			
 
				+  tips: "支持本地部署的Dify应用",
			
 
				+  fee: "free",
			
 
				+  infer_type: "normal"
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "api_server",
			
 
				+    description: "Dify API Server.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "http://47.110.48.75/v1"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "api_key",
			
 
				+    description: "Dify API Key.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "app-tGRwuecfJpqXAsQGWwMFVw0I"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "username",
			
 
				+    description: "Dify Username.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "zhaojinyu"
			
 
				+  }
			
 
				+]
			
--- a/configs/engines/asr/funasrStreamingAPI.yaml
+++ b/configs/engines/asr/funasrStreamingAPI.yaml
@@ -0,0 +1,30 @@
 
				+# Funasr Streaming ASR Engine Configuration
			
 
				+# For details on the model, see: https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx/summary
			
 
				+NAME: funasrStreaming
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Stream ASR"
			
 
				+META: {
			
 
				+  official: "https://github.com/modelscope/FunASR",
			
 
				+  tips: "支持本地部署的FunAsrStream应用",
			
 
				+  fee: "free",
			
 
				+  infer_type: "stream"
			
 
				+}
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "api_url",
			
 
				+    description: "Funasr Streaming API URL",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "ws://adh-funasr:10095"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "mode",
			
 
				+    description: "Funasr Streaming mode",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    # choices: ["online", "offline", "2pass"],
			
 
				+    choices: ["2pass"],
			
 
				+    default: "2pass"
			
 
				+  }
			
 
				+]
			
--- a/configs/engines/asr/tencentAPI.yaml
+++ b/configs/engines/asr/tencentAPI.yaml
@@ -0,0 +1,29 @@
 
				+NAME: "Tencent-API"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入腾讯服务"
			
 
				+META: {
			
 
				+  official: "",
			
 
				+  configuration: "https://console.cloud.tencent.com/asr",
			
 
				+  tips: "",
			
 
				+  fee: "",
			
 
				+  infer_type: "normal"
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "secret_id",
			
 
				+    description: "tencent secret_id.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "secret_key",
			
 
				+    description: "tencent secret_key.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  }
			
 
				+]
			
--- a/configs/engines/llm/openaiAPI.yaml
+++ b/configs/engines/llm/openaiAPI.yaml
@@ -0,0 +1,32 @@
 
				+NAME: "OpenAI"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: ""
			
 
				+META: {
			
 
				+  FEE: "free"
			
 
				+}
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "model",
			
 
				+    description: "ID of the model to use.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "base_url",
			
 
				+    description: "The base url for request.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "https://api.openai.com/v1"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "api_key",
			
 
				+    description: "The api key for request.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  }
			
 
				+]
			
--- a/configs/engines/tts/aliNLS.yaml
+++ b/configs/engines/tts/aliNLS.yaml
@@ -0,0 +1,39 @@
 
				+NAME: "AliNLSTTS" # Name of the engine, will be used for registration
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Ali服务"
			
 
				+META: {
			
 
				+  official: "",
			
 
				+  configuration: "https://nls-portal.console.aliyun.com/applist",
			
 
				+  tips: "",
			
 
				+  fee: ""
			
 
				+}
			
 
				+URL: "wss://nls-gateway-cn-shanghai.aliyuncs.com/ws/v1" # Default NLS Gateway URL, can change to other region
			
 
				+FORMAT: "wav"         # Output audio format (mp3, wav). NLS SDK default is pcm, we change to `wav`.
			
 
				+SAMPLE_RATE: 16000    # Audio sample rate. NLS SDK default is 16000 for pcm.
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "voice",
			
 
				+    description: "Voice for AliNLS.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "zhimi_emo"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "token",
			
 
				+    description: "Ali API token.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "app_key",
			
 
				+    description: "Ali API app key.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  }
			
 
				+]
			
--- a/configs/engines/tts/cozeAPI.yaml
+++ b/configs/engines/tts/cozeAPI.yaml
@@ -0,0 +1,36 @@
 
				+NAME: "Coze"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Coze智能体"
			
 
				+META: {
			
 
				+  official: "https://www.coze.cn/",
			
 
				+  configuration: "",
			
 
				+  tips: "支持接入云端的Coze智能体",
			
 
				+  fee: ""
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "token",
			
 
				+    description: "Coze Token.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "bot_id",
			
 
				+    description: "Coze bot_id.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  # {
			
 
				+  #   name: "voice",
			
 
				+  #   description: "Voice for Coze.",
			
 
				+  #   type: "string",
			
 
				+  #   required: false,
			
 
				+  #   choices: ["Getting from voice api..."],
			
 
				+  #   default: "魅力女友"
			
 
				+  # },
			
 
				+]
			
--- a/configs/engines/tts/difyAPI.yaml
+++ b/configs/engines/tts/difyAPI.yaml
@@ -0,0 +1,36 @@
 
				+NAME: "Dify"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入Dify应用"
			
 
				+META: {
			
 
				+  official: "https://dify.ai/",
			
 
				+  configuration: "https://mp.weixin.qq.com/s/YXyHYN1dC_nJAOCco7ZJjg",
			
 
				+  tips: "支持本地部署的Dify应用",
			
 
				+  fee: ""
			
 
				+}
			
 
				+# 暴露给前端的参数选项以及默认值
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "api_server",
			
 
				+    description: "Dify API Server.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "http://47.110.48.75/v1"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "api_key",
			
 
				+    description: "Dify API Key.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "app-gId1iPrVr9AtNWw1ZQ8CiUtv"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "username",
			
 
				+    description: "Dify Username.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "usky"
			
 
				+  }
			
 
				+]
			
--- a/configs/engines/tts/edgeAPI.yaml
+++ b/configs/engines/tts/edgeAPI.yaml
@@ -0,0 +1,44 @@
 
				+NAME: "EdgeTTS"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "适配EdgeTTS"
			
 
				+META: {
			
 
				+  official: "https://github.com/rany2/edge-tts",
			
 
				+  configuration: "",
			
 
				+  tips: "开源项目可能存在不稳定的情况",
			
 
				+  fee: "free"
			
 
				+}
			
 
				+# 需求参数
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "voice",
			
 
				+    description: "Voice for TTS.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: ["Getting from voice api..."],
			
 
				+    default: "zh-CN-XiaoxiaoNeural"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "rate",
			
 
				+    description: "Set rate, default +0%.",
			
 
				+    type: "int",
			
 
				+    required: false,
			
 
				+    range: [-100, 100],
			
 
				+    default: 0
			
 
				+  },
			
 
				+  {
			
 
				+    name: "volume",
			
 
				+    description: "Set volume, default +0%.",
			
 
				+    type: "int",
			
 
				+    required: false,
			
 
				+    range: [-100, 100],
			
 
				+    default: 0
			
 
				+  },
			
 
				+  {
			
 
				+    name: "pitch",
			
 
				+    description: "Set pitch, default +0Hz.",
			
 
				+    type: "int",
			
 
				+    required: false,
			
 
				+    range: [-100, 100],
			
 
				+    default: 0
			
 
				+  }
			
 
				+]
			
--- a/configs/engines/tts/tencentAPI.yaml
+++ b/configs/engines/tts/tencentAPI.yaml
@@ -0,0 +1,51 @@
 
				+NAME: "Tencent-API"
			
 
				+VERSION: "v0.0.1"
			
 
				+DESC: "接入腾讯服务"
			
 
				+META: {
			
 
				+  official: "",
			
 
				+  configuration: "https://console.cloud.tencent.com/tts",
			
 
				+  tips: "",
			
 
				+  fee: ""
			
 
				+}
			
 
				+PARAMETERS: [
			
 
				+  {
			
 
				+    name: "secret_id",
			
 
				+    description: "tencent secret_id.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "secret_key",
			
 
				+    description: "tencent secret_key.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: ""
			
 
				+  },
			
 
				+  {
			
 
				+    name: "voice",
			
 
				+    description: "Voice for TTS.",
			
 
				+    type: "string",
			
 
				+    required: false,
			
 
				+    choices: [],
			
 
				+    default: "爱小璟"
			
 
				+  },
			
 
				+  {
			
 
				+    name: "volume",
			
 
				+    description: "Set volume, default +0%.",
			
 
				+    type: "float",
			
 
				+    required: false,
			
 
				+    range: [-10, 10],
			
 
				+    default: 0.0
			
 
				+  },
			
 
				+  {
			
 
				+    name: "speed",
			
 
				+    description: "Set speed, default +0%.",
			
 
				+    type: "float",
			
 
				+    required: false,
			
 
				+    range: [-2, 6],
			
 
				+    default: 0.0
			
 
				+  }
			
 
				+]
			
--- a/digitalHuman/__init__.py
+++ b/digitalHuman/__init__.py
@@ -0,0 +1,2 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
--- a/digitalHuman/agent/__init__.py
+++ b/digitalHuman/agent/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .agentPool import AgentPool
			
--- a/digitalHuman/agent/agentBase.py
+++ b/digitalHuman/agent/agentBase.py
@@ -0,0 +1,16 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from uuid import uuid4
			
 
				+from abc import abstractmethod
			
 
				+from digitalHuman.protocol import BaseMessage
			
 
				+from digitalHuman.core import BaseRunner
			
 
				+
			
 
				+__all__ = ["BaseAgent"]
			
 
				+
			
 
				+class BaseAgent(BaseRunner):
			
 
				+    async def createConversation(self, **kwargs) -> str:
			
 
				+        return str(uuid4())
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    async def run(self, input: BaseMessage, **kwargs):
			
 
				+        raise NotImplementedError  
			
--- a/digitalHuman/agent/agentPool.py
+++ b/digitalHuman/agent/agentPool.py
@@ -0,0 +1,44 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from threading import RLock
			
 
				+from typing import List
			
 
				+from yacs.config import CfgNode as CN
			
 
				+from digitalHuman.utils import logger
			
 
				+from .agentBase import BaseAgent
			
 
				+from .core import AgentFactory
			
 
				+
			
 
				+__all__ = ["AgentPool"]
			
 
				+
			
 
				+class AgentPool():
			
 
				+    singleLock = RLock()
			
 
				+    _init = False
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        if not self._init:
			
 
				+            self._pool = dict()
			
 
				+            self._init = True
			
 
				+    
			
 
				+    # Single Instance
			
 
				+    def __new__(cls, *args, **kwargs):
			
 
				+        with AgentPool.singleLock:
			
 
				+            if not hasattr(cls, '_instance'):
			
 
				+                AgentPool._instance = super().__new__(cls)
			
 
				+        return AgentPool._instance
			
 
				+
			
 
				+    def __del__(self):
			
 
				+        self._pool.clear()
			
 
				+        self._init = False
			
 
				+    
			
 
				+    def setup(self, config: CN):
			
 
				+        for cfg in config.SUPPORT_LIST:
			
 
				+            self._pool[cfg.NAME] = AgentFactory.create(cfg)
			
 
				+            logger.info(f"[AgentPool] AGENT Engine {cfg.NAME} is created.")
			
 
				+        logger.info(f"[AgentPool] AGENT Engine default is {config.DEFAULT}.")
			
 
				+            
			
 
				+    def get(self, name: str) -> BaseAgent:
			
 
				+        if name not in self._pool:
			
 
				+            raise KeyError(f"[AgentPool] No such engine: {name}") 
			
 
				+        return self._pool[name]
			
 
				+
			
 
				+    def list(self) -> List[str]:
			
 
				+        return list(self._pool.keys())
			
--- a/digitalHuman/agent/builder.py
+++ b/digitalHuman/agent/builder.py
@@ -0,0 +1,5 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from digitalHuman.utils import Registry
			
 
				+
			
 
				+AGENTS = Registry()
			
--- a/digitalHuman/agent/core/__init__.py
+++ b/digitalHuman/agent/core/__init__.py
@@ -0,0 +1,10 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .difyAgent import DifyApiAgent
			
 
				+from .repeaterAgent import RepeaterAgent
			
 
				+from .fastgptAgent import FastgptApiAgent
			
 
				+from .openaiAgent import OpenaiApiAgent
			
 
				+from .cozeAgent import CozeApiAgent
			
 
				+from .agentFactory import AgentFactory
			
 
				+
			
 
				+__all__ = ['AgentFactory']
			
--- a/digitalHuman/agent/core/agentFactory.py
+++ b/digitalHuman/agent/core/agentFactory.py
@@ -0,0 +1,23 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import AGENTS
			
 
				+from ..agentBase import BaseAgent
			
 
				+from typing import List
			
 
				+from yacs.config import CfgNode as CN
			
 
				+from digitalHuman.utils import logger
			
 
				+from digitalHuman.protocol import ENGINE_TYPE
			
 
				+
			
 
				+class AgentFactory():
			
 
				+    """
			
 
				+    Agent Factory
			
 
				+    """
			
 
				+    @staticmethod
			
 
				+    def create(config: CN) -> BaseAgent:
			
 
				+        if config.NAME in AGENTS.list():
			
 
				+            logger.info(f"[AgentFactory] Create instance: {config.NAME}")
			
 
				+            return AGENTS.get(config.NAME)(config, ENGINE_TYPE.AGENT)
			
 
				+        else:
			
 
				+            raise RuntimeError(f"[AgentFactory] Please check config, support AGENT engine: {AGENTS.list()}")
			
 
				+    @staticmethod
			
 
				+    def list() -> List:
			
 
				+        return AGENTS.list()
			
--- a/digitalHuman/agent/core/cozeAgent.py
+++ b/digitalHuman/agent/core/cozeAgent.py
@@ -0,0 +1,88 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import AGENTS
			
 
				+from ..agentBase import BaseAgent
			
 
				+import re
			
 
				+import json
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import httpxAsyncClient, logger, resonableStreamingParser, checkResponse
			
 
				+
			
 
				+__all__ = ["CozeApiAgent"]
			
 
				+
			
 
				+
			
 
				+@AGENTS.register("Coze")
			
 
				+class CozeApiAgent(BaseAgent):
			
 
				+    async def createConversation(self, **kwargs) -> str:
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        token = paramters["token"]
			
 
				+
			
 
				+        headers = {
			
 
				+            'Authorization': f'Bearer {token}',
			
 
				+            'Content-Type': 'application/json'
			
 
				+        }
			
 
				+
			
 
				+        response = await httpxAsyncClient.post('https://api.coze.cn/v1/conversation/create', headers=headers)
			
 
				+        result = checkResponse(response, "CozeApiAgent", "create conversation")
			
 
				+        return result['data']['id']
			
 
				+
			
 
				+
			
 
				+    async def run(
			
 
				+        self, 
			
 
				+        input: TextMessage, 
			
 
				+        streaming: bool,
			
 
				+        **kwargs
			
 
				+    ):
			
 
				+        try:
			
 
				+            if not streaming:
			
 
				+                raise KeyError("Dify Agent only supports streaming mode")
			
 
				+            # 参数校验
			
 
				+            paramters = self.checkParameter(**kwargs)
			
 
				+            token = paramters["token"]
			
 
				+            bot_id = paramters["bot_id"]
			
 
				+            conversation_id = paramters["conversation_id"] if "conversation_id" in paramters else ""
			
 
				+            
			
 
				+            headers = {
			
 
				+                'Authorization': f'Bearer {token}',
			
 
				+                'Content-Type': 'application/json'
			
 
				+            }
			
 
				+
			
 
				+            payload = {
			
 
				+                'bot_id': bot_id,
			
 
				+                'user_id': 'adh',
			
 
				+                'stream': True,
			
 
				+                'auto_save_history': True,
			
 
				+                'additional_messages': [{
			
 
				+                    'role': 'user',
			
 
				+                    'content': input.data,
			
 
				+                    "content_type":"text"
			
 
				+                }]
			
 
				+            }
			
 
				+
			
 
				+            api_url = f'https://api.coze.cn/v3/chat?conversation_id={conversation_id}'
			
 
				+
			
 
				+            if not conversation_id:
			
 
				+                conversation_id = await self.createConversation(**kwargs)
			
 
				+                yield eventStreamConversationId(conversation_id)
			
 
				+            
			
 
				+            async with httpxAsyncClient.stream('POST', api_url, headers=headers, json=payload) as response:
			
 
				+                event = None
			
 
				+                async for chunk in response.aiter_lines():
			
 
				+                    chunkStr = chunk.strip()
			
 
				+                    if not chunkStr: continue
			
 
				+                    if chunkStr.startswith('event:'):
			
 
				+                        event = chunkStr.split(':', 1)[1].strip()
			
 
				+                    if event == 'conversation.message.delta' and 'data:' in chunkStr:
			
 
				+                        message_data = chunkStr.split('data:', 1)[1].strip()
			
 
				+                        if message_data:
			
 
				+                            message_json = json.loads(message_data)
			
 
				+                            reasoning_content = message_json.get('reasoning_content', '')
			
 
				+                            if reasoning_content:
			
 
				+                                yield eventStreamThink(reasoning_content)
			
 
				+                            content = message_json.get('content', '')
			
 
				+                            if content:
			
 
				+                                yield eventStreamText(content)
			
 
				+            yield eventStreamDone()
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DifyApiAgent] Exception: {e}", exc_info=True)
			
 
				+            yield eventStreamError(str(e))
			
--- a/digitalHuman/agent/core/difyAgent.py
+++ b/digitalHuman/agent/core/difyAgent.py
@@ -0,0 +1,109 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import AGENTS
			
 
				+from ..agentBase import BaseAgent
			
 
				+import re
			
 
				+import json
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import httpxAsyncClient, logger, resonableStreamingParser
			
 
				+
			
 
				+__all__ = ["DifyApiAgent"]
			
 
				+
			
 
				+
			
 
				+@AGENTS.register("Dify")
			
 
				+class DifyApiAgent(BaseAgent):
			
 
				+    async def createConversation(self, **kwargs) -> str:
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        api_server = paramters["api_server"]
			
 
				+        api_key = paramters["api_key"]
			
 
				+        username = paramters["username"]
			
 
				+
			
 
				+        headers = {
			
 
				+            'Content-Type': 'application/json',
			
 
				+            'Authorization': f'Bearer {api_key}'
			
 
				+        }
			
 
				+        payload = {
			
 
				+            "inputs": {},
			
 
				+            "query": "hello",
			
 
				+            "response_mode": "blocking",
			
 
				+            "user": username,
			
 
				+            "conversation_id": "",
			
 
				+            "files":[]
			
 
				+        }
			
 
				+
			
 
				+        response = await httpxAsyncClient.post(api_server + "/chat-messages", headers=headers, json=payload)
			
 
				+        if response.status_code != 200:
			
 
				+            raise RuntimeError(f"DifyAPI agent api error: {response.status_code}")
			
 
				+
			
 
				+        data = json.loads(response.text)
			
 
				+        if 'conversation_id' not in data:
			
 
				+            logger.error(f"[AGENT] Engine create conversation failed: {data}")
			
 
				+            return ""
			
 
				+        return data['conversation_id']
			
 
				+
			
 
				+
			
 
				+    async def run(
			
 
				+        self, 
			
 
				+        input: TextMessage, 
			
 
				+        streaming: bool,
			
 
				+        **kwargs
			
 
				+    ):
			
 
				+        try:
			
 
				+            if not streaming:
			
 
				+                raise KeyError("Dify Agent only supports streaming mode")
			
 
				+            # 参数校验
			
 
				+            paramters = self.checkParameter(**kwargs)
			
 
				+            api_server = paramters["api_server"]
			
 
				+            api_key = paramters["api_key"]
			
 
				+            username = paramters["username"]
			
 
				+        
			
 
				+            conversation_id = paramters["conversation_id"] if "conversation_id" in paramters else ""
			
 
				+            headers = {
			
 
				+                'Content-Type': 'application/json',
			
 
				+                'Authorization': f'Bearer {api_key}'
			
 
				+            }
			
 
				+
			
 
				+            responseMode = "streaming" if streaming else "blocking"
			
 
				+            payload = {
			
 
				+                "inputs": {},
			
 
				+                "query": input.data,
			
 
				+                "response_mode": responseMode,
			
 
				+                "user": username,
			
 
				+                "conversation_id": conversation_id,
			
 
				+                "files":[]
			
 
				+            }
			
 
				+
			
 
				+            pattern = re.compile(r'data:\s*({.*})')
			
 
				+            async with httpxAsyncClient.stream('POST', api_server + "/chat-messages", headers=headers, json=payload) as response:
			
 
				+                coversaiotnIdRequire = False if conversation_id else True
			
 
				+                async def generator(coversaiotnIdRequire):
			
 
				+                    message_id = ""
			
 
				+                    async for chunk in response.aiter_lines():
			
 
				+                        chunkStr = chunk.strip()
			
 
				+                        if not chunkStr: continue
			
 
				+                        chunkData = pattern.search(chunkStr)
			
 
				+                        # 返回不完整，该模板匹配会失效
			
 
				+                        if not chunkStr.endswith('}') or not chunkData: 
			
 
				+                            logger.warning(f"[AGENT] Engine return truncated data: {chunkStr}")
			
 
				+                            continue
			
 
				+                        chunkData = chunkData.group(1)
			
 
				+
			
 
				+                        # 处理流式返回字符串
			
 
				+                        data = json.loads(chunkData)
			
 
				+                        # 首次返回conversation_id
			
 
				+                        if coversaiotnIdRequire and 'conversation_id' in data:
			
 
				+                            yield (EVENT_TYPE.CONVERSATION_ID, data['conversation_id'])
			
 
				+                            coversaiotnIdRequire = False
			
 
				+                        if not message_id and 'message_id' in data:
			
 
				+                            message_id = data['message_id']
			
 
				+                        if "message" in data["event"] and 'answer' in data:
			
 
				+                            logger.debug(f"[AGENT] Engine response: {data}")
			
 
				+                            yield (EVENT_TYPE.TEXT, data['answer'])
			
 
				+                    yield (EVENT_TYPE.MESSAGE_ID, message_id)
			
 
				+                async for parseResult in resonableStreamingParser(generator(coversaiotnIdRequire)):
			
 
				+                    yield parseResult
			
 
				+            yield eventStreamDone()
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DifyApiAgent] Exception: {e}", exc_info=True)
			
 
				+            yield eventStreamError(str(e))
			
--- a/digitalHuman/agent/core/fastgptAgent.py
+++ b/digitalHuman/agent/core/fastgptAgent.py
@@ -0,0 +1,78 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import AGENTS
			
 
				+from ..agentBase import BaseAgent
			
 
				+import re
			
 
				+import json
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import httpxAsyncClient, logger, resonableStreamingParser
			
 
				+
			
 
				+
			
 
				+__all__ = ["FastgptApiAgent"]
			
 
				+
			
 
				+
			
 
				+@AGENTS.register("FastGPT")
			
 
				+class FastgptApiAgent(BaseAgent):
			
 
				+    async def run(
			
 
				+        self, 
			
 
				+        input: TextMessage, 
			
 
				+        streaming: bool,
			
 
				+        **kwargs
			
 
				+    ):
			
 
				+        try:
			
 
				+            if not streaming:
			
 
				+                raise KeyError("FastGPT Agent only supports streaming mode")
			
 
				+
			
 
				+            # 参数校验
			
 
				+            paramters = self.checkParameter(**kwargs)
			
 
				+            base_url = paramters["base_url"]
			
 
				+            api_key = paramters["api_key"]
			
 
				+            uid = paramters["uid"]
			
 
				+            conversation_id = paramters["conversation_id"] if "conversation_id" in paramters else ""
			
 
				+
			
 
				+            headers = {
			
 
				+                'Content-Type': 'application/json',
			
 
				+                'Authorization': f'Bearer {api_key}'
			
 
				+            }
			
 
				+            payload = {
			
 
				+                "chatId": conversation_id,
			
 
				+                "stream": streaming,
			
 
				+                "detail": False,
			
 
				+                "messages":[
			
 
				+                    {
			
 
				+                        "role": "user",
			
 
				+                        "content": input.data,
			
 
				+                    }
			
 
				+                ],
			
 
				+                "customUid": uid
			
 
				+            }
			
 
				+            pattern = re.compile(r'data:\s*({.*})')
			
 
				+            coversaiotnIdRequire = False if conversation_id else True
			
 
				+            if coversaiotnIdRequire:
			
 
				+                conversation_id = await self.createConversation()
			
 
				+                yield eventStreamConversationId(conversation_id)
			
 
				+            async with httpxAsyncClient.stream('POST', base_url + "/v1/chat/completions", headers=headers, json=payload) as response:
			
 
				+                async def generator():
			
 
				+                    async for chunk in response.aiter_lines():
			
 
				+                        chunkStr = chunk.strip()
			
 
				+                        if not chunkStr: continue
			
 
				+                        chunkData = pattern.search(chunkStr)
			
 
				+                        if not chunkStr.endswith('}') or not chunkData: 
			
 
				+                            if 'DONE' in chunkStr: break
			
 
				+                            logger.warning(f"[AGENT] Engine return truncated data: {chunkStr}")
			
 
				+                            continue
			
 
				+                        chunkData = chunkData.group(1)
			
 
				+
			
 
				+                        data = json.loads(chunkData)
			
 
				+                        # 处理流式返回字符串
			
 
				+                        if len(data["choices"]) > 0:
			
 
				+                            logger.debug(f"[AGENT] Engine response: {data}")
			
 
				+                            content = data["choices"][0]['delta']['content']
			
 
				+                            if content:
			
 
				+                                yield (EVENT_TYPE.TEXT, content)
			
 
				+                async for parseResult in resonableStreamingParser(generator()):
			
 
				+                    yield parseResult
			
 
				+            yield eventStreamDone()
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[FastgptApiAgent] Exception: {e}", exc_info=True)
			
 
				+            yield eventStreamError(str(e))
			
--- a/digitalHuman/agent/core/openaiAgent.py
+++ b/digitalHuman/agent/core/openaiAgent.py
@@ -0,0 +1,65 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import AGENTS
			
 
				+from ..agentBase import BaseAgent
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import logger, resonableStreamingParser
			
 
				+from digitalHuman.core import OpenaiLLM
			
 
				+
			
 
				+__all__ = ["OpenaiApiAgent"]
			
 
				+
			
 
				+@AGENTS.register("OpenAI")
			
 
				+class OpenaiApiAgent(BaseAgent):
			
 
				+    async def run(
			
 
				+        self, 
			
 
				+        user: UserDesc,
			
 
				+        input: TextMessage, 
			
 
				+        streaming: bool = True,
			
 
				+        conversation_id: str = "",
			
 
				+        **kwargs
			
 
				+    ):
			
 
				+        try:
			
 
				+            if not isinstance(input, TextMessage):
			
 
				+                raise RuntimeError("OpenAI Agent only support TextMessage")
			
 
				+            # 参数校验
			
 
				+            paramters = self.checkParameter(**kwargs)
			
 
				+            API_URL = paramters["base_url"]
			
 
				+            API_KEY = paramters["api_key"]
			
 
				+            API_MODEL = paramters["model"]
			
 
				+
			
 
				+            coversaiotnIdRequire = False if conversation_id else True
			
 
				+            if coversaiotnIdRequire:
			
 
				+                conversation_id = await self.createConversation()
			
 
				+                yield eventStreamConversationId(conversation_id)
			
 
				+
			
 
				+            async def generator(user_id: str, conversation_id: str, query: str):
			
 
				+                thinkResponses = ""
			
 
				+                responses = ""
			
 
				+                currentMessage = [RoleMessage(role=ROLE_TYPE.USER, content=query)]
			
 
				+                messages = currentMessage
			
 
				+                async for chunk in OpenaiLLM.chat(
			
 
				+                    base_url=API_URL,
			
 
				+                    api_key=API_KEY,
			
 
				+                    model=API_MODEL,
			
 
				+                    messages=messages
			
 
				+                ):
			
 
				+                    if not chunk: continue
			
 
				+                    if len(chunk.choices) == 0: continue
			
 
				+                    delta = chunk.choices[0].delta.model_dump()
			
 
				+                    if 'reasoning_content' in delta and delta['reasoning_content']:
			
 
				+                        reasoning_content = delta['reasoning_content']
			
 
				+                        thinkResponses += reasoning_content
			
 
				+                        yield (EVENT_TYPE.THINK, reasoning_content)
			
 
				+                    elif 'content' in delta and delta['content']:
			
 
				+                        content = delta['content']
			
 
				+                        responses += content
			
 
				+                        yield (EVENT_TYPE.TEXT, content)
			
 
				+                currentMessage.append(RoleMessage(role=ROLE_TYPE.ASSISTANT, content=responses))
			
 
				+            async for parseResult in resonableStreamingParser(generator(user.user_id, conversation_id, input.data)):
			
 
				+                yield parseResult
			
 
				+            yield eventStreamDone()
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[OpenaiApiAgent] Exception: {e}", exc_info=True)
			
 
				+            yield eventStreamError(str(e))
			
 
				+
			
 
				+           
			
--- a/digitalHuman/agent/core/repeaterAgent.py
+++ b/digitalHuman/agent/core/repeaterAgent.py
@@ -0,0 +1,18 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import AGENTS
			
 
				+from ..agentBase import BaseAgent
			
 
				+from digitalHuman.protocol import *
			
 
				+
			
 
				+__all__ = ["Repeater"]
			
 
				+
			
 
				+
			
 
				+@AGENTS.register("Repeater")
			
 
				+class RepeaterAgent(BaseAgent):
			
 
				+    async def run(
			
 
				+        self, 
			
 
				+        input: TextMessage, 
			
 
				+        **kwargs
			
 
				+    ):
			
 
				+        yield eventStreamText(input.data)
			
 
				+        yield eventStreamDone()
			
--- a/digitalHuman/bin/__init__.py
+++ b/digitalHuman/bin/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .app import runServer
			
--- a/digitalHuman/bin/app.py
+++ b/digitalHuman/bin/app.py
@@ -0,0 +1,24 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import os
			
 
				+import uvicorn
			
 
				+from digitalHuman.engine import EnginePool
			
 
				+from digitalHuman.agent import AgentPool
			
 
				+from digitalHuman.server import app
			
 
				+from digitalHuman.utils import config
			
 
				+
			
 
				+__all__ = ["runServer"]
			
 
				+
			
 
				+def runServer():
			
 
				+    enginePool = EnginePool()
			
 
				+    enginePool.setup(config.SERVER.ENGINES)
			
 
				+    agentPool = AgentPool()
			
 
				+    agentPool.setup(config.SERVER.AGENTS)
			
 
				+    
			
 
				+    # 后端使用 HTTP 模式（前端使用 HTTPS）
			
 
				+    uvicorn.run(
			
 
				+        app, 
			
 
				+        host=config.SERVER.IP, 
			
 
				+        port=config.SERVER.PORT, 
			
 
				+        log_level="info"
			
 
				+    )
			
--- a/digitalHuman/core/__init__.py
+++ b/digitalHuman/core/__init__.py
@@ -0,0 +1,4 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .runner import BaseRunner
			
 
				+from .openai import OpenaiLLM
			
--- a/digitalHuman/core/openai.py
+++ b/digitalHuman/core/openai.py
@@ -0,0 +1,28 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from openai import AsyncOpenAI
			
 
				+from openai.types.chat import ChatCompletionChunk
			
 
				+from typing import List, AsyncGenerator
			
 
				+from digitalHuman.protocol import RoleMessage
			
 
				+
			
 
				+class OpenaiLLM():
			
 
				+    @staticmethod
			
 
				+    async def chat(
			
 
				+        base_url: str, 
			
 
				+        api_key: str, 
			
 
				+        model: str, 
			
 
				+        messages: List[RoleMessage],
			
 
				+        **kwargs
			
 
				+    ) -> AsyncGenerator[ChatCompletionChunk, None]:
			
 
				+        client = AsyncOpenAI(
			
 
				+            base_url=base_url,
			
 
				+            api_key=api_key
			
 
				+        )
			
 
				+        completions = await client.chat.completions.create(
			
 
				+            model=model,
			
 
				+            messages=[message.model_dump() for message in messages],
			
 
				+            stream=True,
			
 
				+            **kwargs
			
 
				+        )
			
 
				+        async for chunk in completions:
			
 
				+            yield chunk
			
--- a/digitalHuman/core/runner.py
+++ b/digitalHuman/core/runner.py
@@ -0,0 +1,84 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from typing import List, Dict
			
 
				+from yacs.config import CfgNode as CN
			
 
				+from abc import ABC, abstractmethod
			
 
				+from digitalHuman.protocol import BaseMessage, ParamDesc, EngineDesc, ENGINE_TYPE, INFER_TYPE
			
 
				+
			
 
				+__all__ = ["BaseRunner"]
			
 
				+
			
 
				+class BaseRunner(ABC):
			
 
				+    def __init__(self, config: CN, type: ENGINE_TYPE):
			
 
				+        self.cfg = config
			
 
				+        self._engineType = type
			
 
				+        self.setup()
			
 
				+    
			
 
				+    def __del__(self):
			
 
				+        self.release()
			
 
				+    
			
 
				+    @property
			
 
				+    def name(self) -> str:
			
 
				+        return self.cfg.NAME
			
 
				+    
			
 
				+    @property
			
 
				+    def type(self) -> ENGINE_TYPE:
			
 
				+        return self._engineType
			
 
				+    
			
 
				+    @property
			
 
				+    def inferType(self) -> INFER_TYPE:
			
 
				+        if "infer_type" not in self.meta(): return INFER_TYPE.NORMAL
			
 
				+        if self.meta()['infer_type'] == 'stream': 
			
 
				+            return INFER_TYPE.STREAM
			
 
				+        elif self.meta()['infer_type'] == 'normal':
			
 
				+            return INFER_TYPE.NORMAL
			
 
				+        else:
			
 
				+            raise RuntimeError(f"Invalid infer type: {self.meta()['infer_type']}")
			
 
				+    
			
 
				+    def desc(self) -> EngineDesc:
			
 
				+        return EngineDesc(
			
 
				+            name=self.name,
			
 
				+            type=self.type,
			
 
				+            infer_type=self.inferType,
			
 
				+            desc=self.cfg.DESC if "DESC" in self.cfg else "",
			
 
				+            meta=self.meta()
			
 
				+        )
			
 
				+    
			
 
				+    def meta(self) -> Dict:
			
 
				+        if "META" not in self.cfg: return {}
			
 
				+        return self.cfg.META
			
 
				+    
			
 
				+    def custom(self) -> Dict:
			
 
				+        if "CUSTOM" not in self.cfg: return {}
			
 
				+        return self.cfg.CUSTOM
			
 
				+
			
 
				+    def parameters(self) -> List[ParamDesc]:
			
 
				+        if "PARAMETERS" not in self.cfg: return []
			
 
				+        params = []
			
 
				+        for param in self.cfg.PARAMETERS:
			
 
				+            params.append(ParamDesc.model_validate(param))
			
 
				+        return params
			
 
				+    
			
 
				+    def checkParameter(self, **kwargs) -> Dict:
			
 
				+        paramters = {}
			
 
				+        for paramter in self.parameters():
			
 
				+            if paramter.name not in kwargs:
			
 
				+                if not paramter.required: 
			
 
				+                    paramters[paramter.name] = paramter.default
			
 
				+                    continue
			
 
				+                raise RuntimeError(f"Missing parameter: {paramter.name}")
			
 
				+            paramters[paramter.name] = kwargs[paramter.name]
			
 
				+        # 额外参数填充
			
 
				+        for k, v in kwargs.items():
			
 
				+            if k not in paramters:
			
 
				+                paramters[k] = v
			
 
				+        return paramters
			
 
				+    
			
 
				+    def setup(self):
			
 
				+        pass
			
 
				+
			
 
				+    def release(self):
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    async def run(self, input: BaseMessage, **kwargs):
			
 
				+        raise NotImplementedError  
			
--- a/digitalHuman/engine/__init__.py
+++ b/digitalHuman/engine/__init__.py
@@ -0,0 +1,4 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .enginePool import EnginePool
			
 
				+from .engineBase import BaseEngine, BaseTTSEngine
			
--- a/digitalHuman/engine/asr/__init__.py
+++ b/digitalHuman/engine/asr/__init__.py
@@ -0,0 +1,11 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .tencentASR import TencentApiAsr
			
 
				+from .difyASR import DifyApiAsr
			
 
				+from .cozeASR import CozeApiAsr
			
 
				+from .funasrStreamingASR import FunasrStreamingAsr
			
 
				+from .dashscopeASR import DashscopeASR
			
 
				+from .dashscopeStreamingASR import DashscopeStreamingASR
			
 
				+from .asrFactory import ASRFactory
			
 
				+
			
 
				+__all__ = ['ASRFactory']
			
--- a/digitalHuman/engine/asr/asrFactory.py
+++ b/digitalHuman/engine/asr/asrFactory.py
@@ -0,0 +1,25 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import ASREngines
			
 
				+from ..engineBase import BaseEngine
			
 
				+from typing import List
			
 
				+from yacs.config import CfgNode as CN
			
 
				+from digitalHuman.protocol import ENGINE_TYPE
			
 
				+from digitalHuman.utils import logger
			
 
				+
			
 
				+__all__ = ["ASRFactory"]
			
 
				+
			
 
				+class ASRFactory():
			
 
				+    """
			
 
				+    Automatic Speech Recognition Factory
			
 
				+    """
			
 
				+    @staticmethod
			
 
				+    def create(config: CN) -> BaseEngine:
			
 
				+        if config.NAME in ASREngines.list():
			
 
				+            logger.info(f"[ASRFactory] Create engine: {config.NAME}")
			
 
				+            return ASREngines.get(config.NAME)(config, ENGINE_TYPE.ASR)
			
 
				+        else:
			
 
				+            raise RuntimeError(f"[ASRFactory] Please check config, support ASR engine: {ASREngines.list()}")
			
 
				+    @staticmethod
			
 
				+    def list() -> List:
			
 
				+        return ASREngines.list()
			
--- a/digitalHuman/engine/asr/cozeASR.py
+++ b/digitalHuman/engine/asr/cozeASR.py
@@ -0,0 +1,42 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from ..builder import ASREngines
			
 
				+from ..engineBase import BaseASREngine
			
 
				+import io, base64
			
 
				+from digitalHuman.protocol import AudioMessage, TextMessage, AUDIO_TYPE
			
 
				+from digitalHuman.utils import logger, httpxAsyncClient, wavToMp3, checkResponse
			
 
				+
			
 
				+__all__ = ["CozeApiAsr"]
			
 
				+
			
 
				+
			
 
				+@ASREngines.register("Coze")
			
 
				+class CozeApiAsr(BaseASREngine): 
			
 
				+    def setup(self):
			
 
				+        self.url = "https://api.coze.cn/v1/audio/transcriptions"
			
 
				+
			
 
				+    async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        API_TOKEN = paramters["token"]
			
 
				+
			
 
				+        headers = {
			
 
				+            'Authorization': f'Bearer {API_TOKEN}'
			
 
				+        }
			
 
				+
			
 
				+        files = {
			
 
				+            'file': ('adh.mp3', input.data)
			
 
				+        }
			
 
				+
			
 
				+        if isinstance(input.data, str):
			
 
				+            input.data = base64.b64decode(input.data)
			
 
				+        if input.type == AUDIO_TYPE.WAV:
			
 
				+            input.data = wavToMp3(input.data)
			
 
				+            input.type = AUDIO_TYPE.MP3
			
 
				+
			
 
				+        response = await httpxAsyncClient.post(self.url, headers=headers, files=files)
			
 
				+        resp = checkResponse(response, "CozeApiAsr")
			
 
				+        result = resp["data"]["text"]
			
 
				+        logger.debug(f"[ASR] Engine response: {result}")
			
 
				+        message = TextMessage(data=result)
			
 
				+        return message
			
--- a/digitalHuman/engine/asr/dashscopeASR.py
+++ b/digitalHuman/engine/asr/dashscopeASR.py
@@ -0,0 +1,132 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import os
			
 
				+import base64
			
 
				+import asyncio
			
 
				+import tempfile
			
 
				+from http import HTTPStatus
			
 
				+from dashscope.audio.asr import Recognition
			
 
				+from digitalHuman.utils import logger
			
 
				+from digitalHuman.engine.builder import ASREngines
			
 
				+from digitalHuman.protocol import AudioMessage, TextMessage, AUDIO_TYPE, DATA_TYPE
			
 
				+from digitalHuman.engine.engineBase import BaseASREngine
			
 
				+
			
 
				+__all__ = ["DashscopeASR"]
			
 
				+
			
 
				+
			
 
				+@ASREngines.register("dashscopeASR")
			
 
				+class DashscopeASR(BaseASREngine):
			
 
				+    def setup(self):
			
 
				+        """初始化配置"""
			
 
				+        try:
			
 
				+            import dashscope
			
 
				+            # 从配置或环境变量获取 API Key
			
 
				+            custom_config = self.custom()
			
 
				+            api_key = custom_config.get('api_key') or os.getenv('DASHSCOPE_API_KEY')
			
 
				+            if api_key:
			
 
				+                dashscope.api_key = api_key
			
 
				+                logger.info("[DashscopeASR] API Key configured successfully")
			
 
				+            else:
			
 
				+                logger.warning("[DashscopeASR] No API Key found, please set DASHSCOPE_API_KEY environment variable or configure in yaml")
			
 
				+        except ImportError:
			
 
				+            logger.error("[DashscopeASR] Please install dashscope: pip install dashscope")
			
 
				+            raise
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DashscopeASR] Setup error: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
			
 
				+        """
			
 
				+        执行语音识别
			
 
				+        input: AudioMessage，包含音频数据
			
 
				+        返回: TextMessage，包含识别文本
			
 
				+        """
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        model = paramters.get("model", "fun-asr-realtime")
			
 
				+        sample_rate = paramters.get("sample_rate", 16000)
			
 
				+        format_type = paramters.get("format", "wav")
			
 
				+        language_hints = paramters.get("language_hints", ["zh", "en"])
			
 
				+        
			
 
				+        try:
			
 
				+            # 处理音频数据
			
 
				+            audio_data = input.data
			
 
				+            if isinstance(audio_data, str):
			
 
				+                # 如果是base64编码的字符串，先解码
			
 
				+                audio_data = base64.b64decode(audio_data)
			
 
				+            
			
 
				+            # 保存为临时文件
			
 
				+            file_suffix = f'.{input.type}' if hasattr(input, 'type') else '.wav'
			
 
				+            with tempfile.NamedTemporaryFile(delete=False, suffix=file_suffix) as tmp_file:
			
 
				+                tmp_file.write(audio_data)
			
 
				+                audio_path = tmp_file.name
			
 
				+            
			
 
				+            logger.debug(f"[DashscopeASR] Using model: {model}, format: {format_type}, sample_rate: {sample_rate}")
			
 
				+            
			
 
				+            # 创建识别对象
			
 
				+            # 注意：language_hints 只支持 paraformer-realtime-v2 模型
			
 
				+            if model in ['paraformer-realtime-v2', 'paraformer-v2']:
			
 
				+                recognition = Recognition(
			
 
				+                    model=model,
			
 
				+                    format=format_type,
			
 
				+                    sample_rate=sample_rate,
			
 
				+                    language_hints=language_hints,
			
 
				+                    callback=None
			
 
				+                )
			
 
				+            else:
			
 
				+                # fun-asr-realtime 等模型不支持 language_hints
			
 
				+                recognition = Recognition(
			
 
				+                    model=model,
			
 
				+                    format=format_type,
			
 
				+                    sample_rate=sample_rate,
			
 
				+                    callback=None
			
 
				+                )
			
 
				+            
			
 
				+            # 执行识别（在线程池中执行同步调用）
			
 
				+            logger.debug(f"[DashscopeASR] Starting recognition for audio file: {audio_path}")
			
 
				+            result = await asyncio.get_event_loop().run_in_executor(
			
 
				+                None, recognition.call, audio_path
			
 
				+            )
			
 
				+            
			
 
				+            # 清理临时文件
			
 
				+            try:
			
 
				+                os.remove(audio_path)
			
 
				+            except Exception as e:
			
 
				+                logger.warning(f"[DashscopeASR] Failed to remove temp file: {e}")
			
 
				+            
			
 
				+            # 处理结果
			
 
				+            if result.status_code == HTTPStatus.OK:
			
 
				+                # 获取识别结果
			
 
				+                sentence = result.get_sentence()
			
 
				+                logger.debug(f"[DashscopeASR] Sentence type: {type(sentence)}, content: {sentence}")
			
 
				+                
			
 
				+                # 从句子对象中提取文本
			
 
				+                if isinstance(sentence, dict):
			
 
				+                    # 字典类型，提取text字段
			
 
				+                    text = sentence.get('text', '')
			
 
				+                elif isinstance(sentence, list) and len(sentence) > 0:
			
 
				+                    # 如果是列表，获取第一个元素
			
 
				+                    first_item = sentence[0]
			
 
				+                    text = first_item.get('text', '') if isinstance(first_item, dict) else str(first_item)
			
 
				+                elif isinstance(sentence, str):
			
 
				+                    text = sentence
			
 
				+                else:
			
 
				+                    # 尝试获取所有可用的文本字段
			
 
				+                    text = str(sentence) if sentence else ''
			
 
				+                
			
 
				+                logger.info(f"[DashscopeASR] Recognition result: {text}")
			
 
				+                logger.debug(
			
 
				+                    f"[Metric] requestId: {recognition.get_last_request_id()}, "
			
 
				+                    f"first package delay ms: {recognition.get_first_package_delay()}, "
			
 
				+                    f"last package delay ms: {recognition.get_last_package_delay()}"
			
 
				+                )
			
 
				+                return TextMessage(data=text)
			
 
				+            else:
			
 
				+                error_msg = f"Recognition failed: {result.message}"
			
 
				+                logger.error(f"[DashscopeASR] {error_msg}")
			
 
				+                raise RuntimeError(error_msg)
			
 
				+                
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DashscopeASR] Error during recognition: {e}")
			
 
				+            raise
			
 
				+
			
--- a/digitalHuman/engine/asr/dashscopeStreamingASR.py
+++ b/digitalHuman/engine/asr/dashscopeStreamingASR.py
@@ -0,0 +1,232 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import os
			
 
				+import json
			
 
				+import asyncio
			
 
				+from http import HTTPStatus
			
 
				+from fastapi import WebSocket, WebSocketDisconnect
			
 
				+from dashscope.audio.asr import RecognitionCallback, Recognition
			
 
				+from digitalHuman.utils import logger
			
 
				+from digitalHuman.engine.builder import ASREngines
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.engine.engineBase import StreamBaseEngine
			
 
				+
			
 
				+__all__ = ["DashscopeStreamingASR"]
			
 
				+
			
 
				+
			
 
				+class ASRCallback(RecognitionCallback):
			
 
				+    """ASR 回调处理类"""
			
 
				+    def __init__(self, websocket: WebSocket):
			
 
				+        self.websocket = websocket
			
 
				+        self.partial_text = ""
			
 
				+        self.final_text = ""
			
 
				+    
			
 
				+    def on_open(self):
			
 
				+        logger.debug("[DashscopeStreamingASR] Connection opened")
			
 
				+    
			
 
				+    def on_close(self):
			
 
				+        logger.debug("[DashscopeStreamingASR] Connection closed")
			
 
				+    
			
 
				+    def on_event(self, result):
			
 
				+        """处理识别事件"""
			
 
				+        try:
			
 
				+            if result.status_code == HTTPStatus.OK:
			
 
				+                sentence = result.get_sentence()
			
 
				+                if sentence:
			
 
				+                    text = sentence.get('text', '')
			
 
				+                    # 判断是否为最终结果
			
 
				+                    if sentence.get('end_time'):
			
 
				+                        # 最终结果
			
 
				+                        self.final_text = text
			
 
				+                        logger.debug(f"[DashscopeStreamingASR] Final: {text}")
			
 
				+                    else:
			
 
				+                        # 部分结果
			
 
				+                        self.partial_text = text
			
 
				+                        logger.debug(f"[DashscopeStreamingASR] Partial: {text}")
			
 
				+            else:
			
 
				+                logger.error(f"[DashscopeStreamingASR] Error: {result.message}")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DashscopeStreamingASR] Callback error: {e}")
			
 
				+    
			
 
				+    def on_error(self, error):
			
 
				+        logger.error(f"[DashscopeStreamingASR] Error: {error}")
			
 
				+    
			
 
				+    async def get_partial_result(self):
			
 
				+        """获取部分识别结果"""
			
 
				+        if self.partial_text:
			
 
				+            text = self.partial_text
			
 
				+            return text
			
 
				+        return ""
			
 
				+    
			
 
				+    async def get_final_result(self):
			
 
				+        """获取最终识别结果"""
			
 
				+        if self.final_text:
			
 
				+            text = self.final_text
			
 
				+            self.final_text = ""
			
 
				+            self.partial_text = ""
			
 
				+            return text
			
 
				+        return ""
			
 
				+
			
 
				+
			
 
				+@ASREngines.register("dashscopeStreamingASR")
			
 
				+class DashscopeStreamingASR(StreamBaseEngine):
			
 
				+    def setup(self):
			
 
				+        """初始化配置"""
			
 
				+        try:
			
 
				+            import dashscope
			
 
				+            # 从配置或环境变量获取 API Key
			
 
				+            api_key = self.cfg.get('CUSTOM', {}).get('api_key') or os.getenv('DASHSCOPE_API_KEY')
			
 
				+            if api_key:
			
 
				+                dashscope.api_key = api_key
			
 
				+                logger.info("[DashscopeStreamingASR] API Key configured successfully")
			
 
				+            else:
			
 
				+                logger.warning("[DashscopeStreamingASR] No API Key found, please set DASHSCOPE_API_KEY environment variable or configure in yaml")
			
 
				+        except ImportError:
			
 
				+            logger.error("[DashscopeStreamingASR] Please install dashscope: pip install dashscope")
			
 
				+            raise
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DashscopeStreamingASR] Setup error: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    async def _task_send(self, adhWebsocket: WebSocket, asr_callback: ASRCallback):
			
 
				+        """
			
 
				+        发送识别结果到前端
			
 
				+        """
			
 
				+        try:
			
 
				+            last_partial = ""
			
 
				+            while True:
			
 
				+                await asyncio.sleep(0.1)  # 100ms 检查一次
			
 
				+                
			
 
				+                # 检查是否有最终结果
			
 
				+                final_text = await asr_callback.get_final_result()
			
 
				+                if final_text:
			
 
				+                    await WebSocketHandler.send_message(
			
 
				+                        adhWebsocket, 
			
 
				+                        WS_SEND_ACTION_TYPE.ENGINE_FINAL_OUTPUT, 
			
 
				+                        final_text
			
 
				+                    )
			
 
				+                    last_partial = ""
			
 
				+                    continue
			
 
				+                
			
 
				+                # 检查是否有部分结果
			
 
				+                partial_text = await asr_callback.get_partial_result()
			
 
				+                if partial_text and partial_text != last_partial:
			
 
				+                    await WebSocketHandler.send_message(
			
 
				+                        adhWebsocket, 
			
 
				+                        WS_SEND_ACTION_TYPE.ENGINE_PARTIAL_OUTPUT, 
			
 
				+                        partial_text
			
 
				+                    )
			
 
				+                    last_partial = partial_text
			
 
				+                    
			
 
				+        except WebSocketDisconnect:
			
 
				+            logger.debug("[DashscopeStreamingASR] adhWebsocket closed, task_send exit")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DashscopeStreamingASR] task_send error: {e}")
			
 
				+            await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ERROR, str(e))
			
 
				+
			
 
				+    async def _task_recv(self, adhWebsocket: WebSocket, recognition: Recognition):
			
 
				+        """
			
 
				+        接收前端音频数据并发送到识别服务
			
 
				+        """
			
 
				+        try:
			
 
				+            await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ENGINE_STARTED)
			
 
				+            
			
 
				+            while True:
			
 
				+                action, payload = await WebSocketHandler.recv_message(adhWebsocket)
			
 
				+                
			
 
				+                match action:
			
 
				+                    case WS_RECV_ACTION_TYPE.PING:
			
 
				+                        await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.PONG, b"")
			
 
				+                    
			
 
				+                    case WS_RECV_ACTION_TYPE.ENGINE_START:
			
 
				+                        raise RuntimeError("[DashscopeStreamingASR] Engine has been started")
			
 
				+                    
			
 
				+                    case WS_RECV_ACTION_TYPE.ENGINE_PARTIAL_INPUT:
			
 
				+                        # 发送音频数据到识别服务
			
 
				+                        await asyncio.get_event_loop().run_in_executor(
			
 
				+                            None, recognition.send_audio_frame, payload
			
 
				+                        )
			
 
				+                    
			
 
				+                    case WS_RECV_ACTION_TYPE.ENGINE_FINAL_INPUT:
			
 
				+                        # 发送最后的音频数据
			
 
				+                        await asyncio.get_event_loop().run_in_executor(
			
 
				+                            None, recognition.send_audio_frame, payload
			
 
				+                        )
			
 
				+                    
			
 
				+                    case WS_RECV_ACTION_TYPE.ENGINE_STOP:
			
 
				+                        # 停止识别
			
 
				+                        await asyncio.get_event_loop().run_in_executor(
			
 
				+                            None, recognition.stop
			
 
				+                        )
			
 
				+                        await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ENGINE_STOPPED)
			
 
				+                        return
			
 
				+                    
			
 
				+                    case _:
			
 
				+                        raise RuntimeError(f"[DashscopeStreamingASR] Unknown action: {action}")
			
 
				+                        
			
 
				+        except WebSocketDisconnect:
			
 
				+            logger.debug("[DashscopeStreamingASR] adhWebsocket closed, task_recv exit")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DashscopeStreamingASR] task_recv error: {e}")
			
 
				+            await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ERROR, str(e))
			
 
				+
			
 
				+    async def run(self, websocket: WebSocket, **kwargs) -> None:
			
 
				+        """运行流式识别"""
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        model = paramters.get("model", "fun-asr-realtime")
			
 
				+        sample_rate = paramters.get("sample_rate", 16000)
			
 
				+        format_type = paramters.get("format", "pcm")
			
 
				+        language_hints = paramters.get("language_hints", ["zh", "en"])
			
 
				+        
			
 
				+        await WebSocketHandler.send_message(websocket, WS_SEND_ACTION_TYPE.ENGINE_INITIALZING)
			
 
				+        
			
 
				+        try:
			
 
				+            # 创建回调对象
			
 
				+            asr_callback = ASRCallback(websocket)
			
 
				+            
			
 
				+            # 创建识别对象
			
 
				+            # 注意：language_hints 只支持 paraformer-realtime-v2 和 paraformer-v2 模型
			
 
				+            if model in ['paraformer-realtime-v2', 'paraformer-v2']:
			
 
				+                recognition = Recognition(
			
 
				+                    model=model,
			
 
				+                    format=format_type,
			
 
				+                    sample_rate=sample_rate,
			
 
				+                    language_hints=language_hints,
			
 
				+                    callback=asr_callback
			
 
				+                )
			
 
				+            else:
			
 
				+                # fun-asr-realtime 等模型不支持 language_hints
			
 
				+                recognition = Recognition(
			
 
				+                    model=model,
			
 
				+                    format=format_type,
			
 
				+                    sample_rate=sample_rate,
			
 
				+                    callback=asr_callback
			
 
				+                )
			
 
				+            
			
 
				+            # 启动识别
			
 
				+            await asyncio.get_event_loop().run_in_executor(
			
 
				+                None, recognition.start
			
 
				+            )
			
 
				+            
			
 
				+            # 创建发送和接收任务
			
 
				+            task_recv = asyncio.create_task(self._task_recv(websocket, recognition))
			
 
				+            task_send = asyncio.create_task(self._task_send(websocket, asr_callback))
			
 
				+            
			
 
				+            # 等待任务完成
			
 
				+            await asyncio.gather(task_recv, task_send)
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[DashscopeStreamingASR] Run error: {e}")
			
 
				+            await WebSocketHandler.send_message(websocket, WS_SEND_ACTION_TYPE.ERROR, str(e))
			
 
				+        finally:
			
 
				+            # 清理资源
			
 
				+            try:
			
 
				+                if recognition:
			
 
				+                    await asyncio.get_event_loop().run_in_executor(
			
 
				+                        None, recognition.stop
			
 
				+                    )
			
 
				+            except:
			
 
				+                pass
			
 
				+
			
 
				+
			
--- a/digitalHuman/engine/asr/difyASR.py
+++ b/digitalHuman/engine/asr/difyASR.py
@@ -0,0 +1,43 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from ..builder import ASREngines
			
 
				+from ..engineBase import BaseASREngine
			
 
				+import io, base64
			
 
				+from digitalHuman.protocol import AudioMessage, TextMessage, AUDIO_TYPE
			
 
				+from digitalHuman.utils import logger, httpxAsyncClient, wavToMp3
			
 
				+
			
 
				+__all__ = ["DifyApiAsr"]
			
 
				+
			
 
				+
			
 
				+@ASREngines.register("Dify")
			
 
				+class DifyApiAsr(BaseASREngine): 
			
 
				+    async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        API_SERVER = paramters["api_server"]
			
 
				+        API_KEY = paramters["api_key"]
			
 
				+        API_USERNAME = paramters["username"]
			
 
				+
			
 
				+        headers = {
			
 
				+            'Authorization': f'Bearer {API_KEY}'
			
 
				+        }
			
 
				+
			
 
				+        payload = {
			
 
				+            'user': API_USERNAME
			
 
				+        }
			
 
				+
			
 
				+        if isinstance(input.data, str):
			
 
				+            input.data = base64.b64decode(input.data)
			
 
				+        if input.type == AUDIO_TYPE.WAV:
			
 
				+            input.data = wavToMp3(input.data)
			
 
				+            input.type = AUDIO_TYPE.MP3
			
 
				+        files = {'file': ('file', io.BytesIO(input.data), 'audio/mp3')}
			
 
				+        response = await httpxAsyncClient.post(API_SERVER + "/audio-to-text", headers=headers, files=files, data=payload)
			
 
				+        if response.status_code != 200:
			
 
				+            raise RuntimeError(f"Dify asr api error: {response.status_code}")
			
 
				+        result = response.json()["text"]
			
 
				+        logger.debug(f"[ASR] Engine response: {result}")
			
 
				+        message = TextMessage(data=result)
			
 
				+        return message
			
 
				+        
			
--- a/digitalHuman/engine/asr/funasrStreamingASR.py
+++ b/digitalHuman/engine/asr/funasrStreamingASR.py
@@ -0,0 +1,167 @@
 
				+import json
			
 
				+import asyncio
			
 
				+import time
			
 
				+import websockets
			
 
				+from fastapi import WebSocket, WebSocketDisconnect
			
 
				+from digitalHuman.utils import logger
			
 
				+from digitalHuman.engine.builder import ASREngines
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.engine.engineBase import StreamBaseEngine
			
 
				+
			
 
				+__all__ = ["FunasrStreamingAsr"]
			
 
				+
			
 
				+
			
 
				+@ASREngines.register("funasrStreaming")
			
 
				+class FunasrStreamingAsr(StreamBaseEngine):
			
 
				+    async def _reset_sentence(self, funasrWebsocket: websockets.ClientConnection):
			
 
				+        """重置说话识别, 防止连续识别添加标点符号"""
			
 
				+        message = json.dumps(
			
 
				+            {
			
 
				+                "is_speaking": False,
			
 
				+            }
			
 
				+        )
			
 
				+        await funasrWebsocket.send(message)
			
 
				+        message = json.dumps(
			
 
				+            {
			
 
				+                "is_speaking": True,
			
 
				+            }
			
 
				+        )
			
 
				+        await funasrWebsocket.send(message)
			
 
				+
			
 
				+    async def _task_send(self, adhWebsocket: WebSocket, funasrWebsocket: websockets.ClientConnection):
			
 
				+        """
			
 
				+        funasr server -> adh server -> adh web
			
 
				+        """
			
 
				+        text_send = ""
			
 
				+        text_send_2pass_online = ""
			
 
				+        text_send_2pass_offline = ""
			
 
				+        wake_word = "小天小天"
			
 
				+        is_awake = False
			
 
				+        inactivity_deadline = time.monotonic() + 300  # 5分钟超时
			
 
				+
			
 
				+        def process_text_for_wake(text: str) -> tuple[bool, str]:
			
 
				+            nonlocal is_awake
			
 
				+            if not is_awake:
			
 
				+                if wake_word in text:
			
 
				+                    is_awake = True
			
 
				+                    return True, text.replace(wake_word, "").strip()
			
 
				+                return False, ""
			
 
				+            return True, text.replace(wake_word, "").strip()
			
 
				+        try:
			
 
				+            while True:
			
 
				+                # 超时检查
			
 
				+                if time.monotonic() > inactivity_deadline:
			
 
				+                    await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ENGINE_STOPPED, "inactivity_timeout")
			
 
				+                    return
			
 
				+                meg = await funasrWebsocket.recv()
			
 
				+                meg = json.loads(meg)
			
 
				+                wav_name = meg.get("wav_name", "demo")
			
 
				+                text = meg["text"]
			
 
				+                timestamp = ""
			
 
				+                offline_msg_done = meg.get("is_final", False)
			
 
				+                if "timestamp" in meg:
			
 
				+                    timestamp = meg["timestamp"]
			
 
				+                if "mode" not in meg:
			
 
				+                    continue
			
 
				+                if meg["mode"] == "online":
			
 
				+                    text_send += text
			
 
				+                elif meg["mode"] == "offline":
			
 
				+                    text_send += text
			
 
				+                    offline_msg_done = True
			
 
				+                else:
			
 
				+                    if meg["mode"] == "2pass-online":
			
 
				+                        text_send_2pass_online += text
			
 
				+                        text_send = text_send_2pass_offline + text_send_2pass_online
			
 
				+                    else:
			
 
				+                        offline_msg_done = True
			
 
				+                        text_send_2pass_online = ""
			
 
				+                        text_send = text_send_2pass_offline + text
			
 
				+                        text_send_2pass_offline += text
			
 
				+                if offline_msg_done:
			
 
				+                    awakened, cleaned = process_text_for_wake(text_send)
			
 
				+                    if awakened and cleaned:
			
 
				+                        await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ENGINE_FINAL_OUTPUT, cleaned)
			
 
				+                        inactivity_deadline = time.monotonic() + 300
			
 
				+                    text_send = ""
			
 
				+                    text_send_2pass_online = ""
			
 
				+                    text_send_2pass_offline = ""
			
 
				+                    await self._reset_sentence(funasrWebsocket)
			
 
				+                else:
			
 
				+                    awakened, cleaned = process_text_for_wake(text_send)
			
 
				+                    if awakened and cleaned:
			
 
				+                        await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ENGINE_PARTIAL_OUTPUT, cleaned)
			
 
				+                        inactivity_deadline = time.monotonic() + 300
			
 
				+        except WebSocketDisconnect:
			
 
				+            logger.debug("adhWebsocket closed, task_send exit")
			
 
				+        except websockets.ConnectionClosed:
			
 
				+            logger.debug("funasrWebsocket closed, task_send exit")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"FunasrStreamingAsr task_send error: {e}")
			
 
				+            await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ERROR, str(e))
			
 
				+
			
 
				+    async def _task_recv(self, adhWebsocket: WebSocket, funasrWebsocket: websockets.ClientConnection, mode: str):
			
 
				+        """
			
 
				+        adh web -> adh server -> funasr server
			
 
				+        """
			
 
				+        try:
			
 
				+            message = json.dumps(
			
 
				+                {
			
 
				+                    "mode": mode,
			
 
				+                    "chunk_size": [5, 10, 5], # chunk_size: 60 * 10 ms. 左看300ms, 右看300ms
			
 
				+                    "chunk_interval": 10,
			
 
				+                    "encoder_chunk_look_back": 4,
			
 
				+                    "decoder_chunk_look_back": 0,
			
 
				+                    "wav_name": "adh",
			
 
				+                    "is_speaking": True,
			
 
				+                    "hotwords": "",
			
 
				+                    "itn": True,
			
 
				+                }
			
 
				+            )
			
 
				+            await funasrWebsocket.send(message)
			
 
				+            await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ENGINE_STARTED)
			
 
				+            while True:
			
 
				+                action, payload = await WebSocketHandler.recv_message(adhWebsocket)
			
 
				+                match action:
			
 
				+                    case WS_RECV_ACTION_TYPE.PING:
			
 
				+                        await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.PONG.value, b"")
			
 
				+                    case WS_RECV_ACTION_TYPE.ENGINE_START:
			
 
				+                        raise RuntimeError("FunasrStreamingAsr has benn started")
			
 
				+                    case WS_RECV_ACTION_TYPE.ENGINE_PARTIAL_INPUT:
			
 
				+                        await funasrWebsocket.send(payload)
			
 
				+                    case WS_RECV_ACTION_TYPE.ENGINE_FINAL_INPUT:
			
 
				+                        message = json.dumps(
			
 
				+                            {
			
 
				+                                "is_speaking": False
			
 
				+                            }
			
 
				+                        )
			
 
				+                        await funasrWebsocket.send(message)
			
 
				+                        await funasrWebsocket.send(payload)
			
 
				+                    case WS_RECV_ACTION_TYPE.ENGINE_STOP:
			
 
				+                        await funasrWebsocket.close()
			
 
				+                        await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ENGINE_STOPPED)
			
 
				+                        return
			
 
				+                    case _:
			
 
				+                        raise RuntimeError(f"FunasrStreamingAsr task_recv error: {action} not found")
			
 
				+        except WebSocketDisconnect:
			
 
				+            logger.debug("funasrWebsocket closed, task_recv exit")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"FunasrStreamingAsr task_recv error: {e}")
			
 
				+            await WebSocketHandler.send_message(adhWebsocket, WS_SEND_ACTION_TYPE.ERROR, str(e))
			
 
				+
			
 
				+    async def run(self, websocket: WebSocket, **kwargs) -> None:
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        API_URL = paramters["api_url"]
			
 
				+        MODE = paramters["mode"]
			
 
				+        await WebSocketHandler.send_message(websocket, WS_SEND_ACTION_TYPE.ENGINE_INITIALZING)
			
 
				+        # 连接服务器
			
 
				+        try:
			
 
				+            async with websockets.connect(API_URL, subprotocols=["binary"], ping_interval=None) as funasrWebsocket:
			
 
				+                # adh web -> adh server -> funasr server
			
 
				+                task_recv = asyncio.create_task(self._task_recv(websocket, funasrWebsocket, MODE))
			
 
				+                # funasr server -> adh server -> adh web
			
 
				+                task_send = asyncio.create_task(self._task_send(websocket, funasrWebsocket))
			
 
				+                await asyncio.gather(task_recv, task_send)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"FunasrStreamingAsr run error: {e}")
			
 
				+            # 异常会被 async with 自动处理，这里只记录错误
			
--- a/digitalHuman/engine/asr/tencentASR.py
+++ b/digitalHuman/engine/asr/tencentASR.py
@@ -0,0 +1,113 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# 参数配置参考: https://cloud.tencent.com/document/api/1093/35646
			
 
				+
			
 
				+from ..builder import ASREngines
			
 
				+from ..engineBase import BaseASREngine
			
 
				+import hashlib
			
 
				+import hmac
			
 
				+import time
			
 
				+import json
			
 
				+import base64
			
 
				+from datetime import datetime, timezone
			
 
				+from typing import Tuple, Dict
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import logger, httpxAsyncClient
			
 
				+from pydantic import BaseModel
			
 
				+
			
 
				+__all__ = ["TencentApiAsr"]
			
 
				+
			
 
				+
			
 
				+def sign(key, msg: str):
			
 
				+    return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
			
 
				+
			
 
				+class TencentCloudApiKey(BaseModel):
			
 
				+    secret_id: str
			
 
				+    secret_key: str
			
 
				+
			
 
				+
			
 
				+@ASREngines.register("Tencent-API")
			
 
				+class TencentApiAsr(BaseASREngine): 
			
 
				+    def setup(self):
			
 
				+        self._url = "https://asr.tencentcloudapi.com"
			
 
				+    
			
 
				+    def _buildRequest(self, input: AudioMessage, tencentApiKey: TencentCloudApiKey) -> Tuple[Dict, str]:
			
 
				+        VoiceFormat = "mp3" if input.type == AUDIO_TYPE.MP3 else "wav"
			
 
				+        service = "asr"
			
 
				+        host = "asr.tencentcloudapi.com"
			
 
				+        version = "2019-06-14"
			
 
				+        action = "SentenceRecognition"
			
 
				+        algorithm = "TC3-HMAC-SHA256"
			
 
				+        timestamp = int(time.time())
			
 
				+        date = datetime.fromtimestamp(timestamp, timezone.utc).strftime("%Y-%m-%d")
			
 
				+        params = {
			
 
				+            "EngSerViceType": "16k_zh-PY",
			
 
				+            "SourceType": 1,
			
 
				+            "VoiceFormat": VoiceFormat,
			
 
				+            "Data": input.data,
			
 
				+            "DataLen": len(input.data)
			
 
				+        }
			
 
				+        payload = json.dumps(params)
			
 
				+        # ************* 步骤 1：拼接规范请求串 *************
			
 
				+        http_request_method = "POST"
			
 
				+        canonical_uri = "/"
			
 
				+        canonical_querystring = ""
			
 
				+        ct = "application/json; charset=utf-8"
			
 
				+        canonical_headers = "content-type:%s\nhost:%s\nx-tc-action:%s\n" % (ct, host, action.lower())
			
 
				+        signed_headers = "content-type;host;x-tc-action"
			
 
				+        hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest()
			
 
				+        canonical_request = (http_request_method + "\n" +
			
 
				+                            canonical_uri + "\n" +
			
 
				+                            canonical_querystring + "\n" +
			
 
				+                            canonical_headers + "\n" +
			
 
				+                            signed_headers + "\n" +
			
 
				+                            hashed_request_payload)
			
 
				+
			
 
				+        # ************* 步骤 2：拼接待签名字符串 *************
			
 
				+        credential_scope = date + "/" + service + "/" + "tc3_request"
			
 
				+        hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
			
 
				+        string_to_sign = (algorithm + "\n" +
			
 
				+                        str(timestamp) + "\n" +
			
 
				+                        credential_scope + "\n" +
			
 
				+                        hashed_canonical_request)
			
 
				+
			
 
				+        # ************* 步骤 3：计算签名 *************
			
 
				+        secret_date = sign(("TC3" + tencentApiKey.secret_key).encode("utf-8"), date)
			
 
				+        secret_service = sign(secret_date, service)
			
 
				+        secret_signing = sign(secret_service, "tc3_request")
			
 
				+        signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
			
 
				+
			
 
				+        # ************* 步骤 4：拼接 Authorization *************
			
 
				+        authorization = (algorithm + " " +
			
 
				+                        "Credential=" + tencentApiKey.secret_id + "/" + credential_scope + ", " +
			
 
				+                        "SignedHeaders=" + signed_headers + ", " +
			
 
				+                        "Signature=" + signature)
			
 
				+
			
 
				+        # ************* 步骤 5：构造并发起请求 *************
			
 
				+        headers = {
			
 
				+            "Authorization": authorization,
			
 
				+            "Content-Type": "application/json; charset=utf-8",
			
 
				+            "Host": host,
			
 
				+            "X-TC-Action": action,
			
 
				+            "X-TC-Timestamp": str(timestamp),
			
 
				+            "X-TC-Version": version
			
 
				+        }
			
 
				+
			
 
				+        return (headers, payload)
			
 
				+
			
 
				+    async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
			
 
				+        if isinstance(input.data, bytes):
			
 
				+            input.data = base64.b64encode(input.data).decode("utf-8")
			
 
				+
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        SECRECT_ID = paramters["secret_id"]
			
 
				+        SECRECT_KEY = paramters["secret_key"]
			
 
				+        headers, payload = self._buildRequest(input, TencentCloudApiKey(secret_id=SECRECT_ID, secret_key=SECRECT_KEY))
			
 
				+        response = await httpxAsyncClient.post(self._url, headers=headers, data=payload)
			
 
				+        if response.status_code != 200:
			
 
				+            raise RuntimeError(f"Tencet asr api error: {response.status_code}")
			
 
				+        result = response.json()["Response"]["Result"]
			
 
				+        logger.debug(f"[ASR] Engine response: {result}")
			
 
				+        message = TextMessage(data=result)
			
 
				+        return message
			
--- a/digitalHuman/engine/builder.py
+++ b/digitalHuman/engine/builder.py
@@ -0,0 +1,7 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from digitalHuman.utils import Registry
			
 
				+
			
 
				+TTSEngines = Registry()
			
 
				+ASREngines = Registry()
			
 
				+LLMEngines = Registry()
			
--- a/digitalHuman/engine/engineBase.py
+++ b/digitalHuman/engine/engineBase.py
@@ -0,0 +1,37 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from fastapi import WebSocket
			
 
				+from typing import List
			
 
				+from abc import abstractmethod
			
 
				+from digitalHuman.core import BaseRunner
			
 
				+from digitalHuman.protocol import BaseMessage, TextMessage, AudioMessage, VoiceDesc
			
 
				+
			
 
				+__all__ = ["BaseEngine"]
			
 
				+
			
 
				+class BaseEngine(BaseRunner):
			
 
				+    @abstractmethod
			
 
				+    async def run(self, input: BaseMessage, **kwargs) -> BaseMessage:
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+class BaseLLMEngine(BaseEngine):
			
 
				+    @abstractmethod
			
 
				+    async def run(self, input, streaming: bool = True, **kwargs):
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+class BaseASREngine(BaseEngine):
			
 
				+    @abstractmethod
			
 
				+    async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+class BaseTTSEngine(BaseEngine):
			
 
				+    async def voices(self, **kwargs) -> List[VoiceDesc]:
			
 
				+        return []
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+class StreamBaseEngine(BaseEngine):
			
 
				+    @abstractmethod
			
 
				+    async def run(self, websocket: WebSocket, **kwargs) -> None:
			
 
				+        raise NotImplementedError
			
--- a/digitalHuman/engine/enginePool.py
+++ b/digitalHuman/engine/enginePool.py
@@ -0,0 +1,62 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from threading import RLock
			
 
				+from typing import List
			
 
				+from collections import defaultdict
			
 
				+from yacs.config import CfgNode as CN
			
 
				+from digitalHuman.utils import logger
			
 
				+from digitalHuman.protocol import ENGINE_TYPE
			
 
				+from .engineBase import BaseEngine
			
 
				+from .asr import ASRFactory
			
 
				+from .tts import TTSFactory
			
 
				+from .llm import LLMFactory
			
 
				+
			
 
				+__all__ = ["EnginePool"]
			
 
				+
			
 
				+class EnginePool():
			
 
				+    singleLock = RLock()
			
 
				+    _init = False
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        if not self._init:
			
 
				+            self._pool = defaultdict(dict)
			
 
				+            self._init = True
			
 
				+    
			
 
				+    # Single Instance
			
 
				+    def __new__(cls, *args, **kwargs):
			
 
				+        with EnginePool.singleLock:
			
 
				+            if not hasattr(cls, '_instance'):
			
 
				+                EnginePool._instance = super().__new__(cls)
			
 
				+        return EnginePool._instance
			
 
				+
			
 
				+    def __del__(self):
			
 
				+        self._pool.clear()
			
 
				+        self._init = False
			
 
				+    
			
 
				+    def setup(self, config: CN):
			
 
				+        # asr
			
 
				+        for asrCfg in config.ASR.SUPPORT_LIST:
			
 
				+            self._pool[ENGINE_TYPE.ASR][asrCfg.NAME] = ASRFactory.create(asrCfg)
			
 
				+            logger.info(f"[EnginePool] ASR Engine {asrCfg.NAME} is created.")
			
 
				+        logger.info(f"[EnginePool] ASR Engine default is {config.ASR.DEFAULT}.")
			
 
				+        # tts
			
 
				+        for ttsCfg in config.TTS.SUPPORT_LIST:
			
 
				+            self._pool[ENGINE_TYPE.TTS][ttsCfg.NAME] = TTSFactory.create(ttsCfg)
			
 
				+            logger.info(f"[EnginePool] TTS Engine {ttsCfg.NAME} is created.")
			
 
				+        logger.info(f"[EnginePool] TTS Engine default is {config.TTS.DEFAULT}.")
			
 
				+        # llm
			
 
				+        for llmCfg in config.LLM.SUPPORT_LIST:
			
 
				+            self._pool[ENGINE_TYPE.LLM][llmCfg.NAME] = LLMFactory.create(llmCfg)
			
 
				+            logger.info(f"[EnginePool] LLM Engine {llmCfg.NAME} is created.")
			
 
				+        logger.info(f"[EnginePool] LLM Engine default is {config.LLM.DEFAULT}.")
			
 
				+    
			
 
				+    def listEngine(self, engineType: ENGINE_TYPE) -> List[str]:
			
 
				+        if engineType not in self._pool: return []
			
 
				+        return self._pool[engineType].keys()
			
 
				+            
			
 
				+    def getEngine(self, engineType: ENGINE_TYPE, engineName: str) -> BaseEngine:
			
 
				+        if engineType not in self._pool:
			
 
				+            raise KeyError(f"[EnginePool] No such engine type: {engineType}")
			
 
				+        if engineName not in self._pool[engineType]:
			
 
				+            raise KeyError(f"[EnginePool] No such engine: {engineName}")
			
 
				+        return self._pool[engineType][engineName]
			
--- a/digitalHuman/engine/llm/__init__.py
+++ b/digitalHuman/engine/llm/__init__.py
@@ -0,0 +1,5 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .llmFactory import LLMFactory
			
 
				+
			
 
				+__all__ = ['LLMFactory']
			
--- a/digitalHuman/engine/llm/llmFactory.py
+++ b/digitalHuman/engine/llm/llmFactory.py
@@ -0,0 +1,25 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import LLMEngines
			
 
				+from ..engineBase import BaseEngine
			
 
				+from typing import List
			
 
				+from yacs.config import CfgNode as CN
			
 
				+from digitalHuman.protocol import ENGINE_TYPE
			
 
				+from digitalHuman.utils import logger
			
 
				+
			
 
				+__all__ = ["LLMFactory"]
			
 
				+
			
 
				+class LLMFactory():
			
 
				+    """
			
 
				+    Large Language Model Factory
			
 
				+    """
			
 
				+    @staticmethod
			
 
				+    def create(config: CN) -> BaseEngine:
			
 
				+        if config.NAME in LLMEngines.list():
			
 
				+            logger.info(f"[LLMFactory] Create engine: {config.NAME}")
			
 
				+            return LLMEngines.get(config.NAME)(config, ENGINE_TYPE.LLM)
			
 
				+        else:
			
 
				+            raise RuntimeError(f"[LLMFactory] Please check config, support LLM: {LLMEngines.list()}")
			
 
				+    @staticmethod
			
 
				+    def list() -> List:
			
 
				+        return LLMEngines.list()
			
--- a/digitalHuman/engine/tts/__init__.py
+++ b/digitalHuman/engine/tts/__init__.py
@@ -0,0 +1,10 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .tencentTTS import *
			
 
				+from .edgeTTS import *
			
 
				+from .difyTTS import *
			
 
				+from .cozeTTS import *
			
 
				+from .ttsFactory import TTSFactory
			
 
				+# from .aliNLSTTS import AliNLSTTS
			
 
				+
			
 
				+__all__ = ['TTSFactory']
			
--- a/digitalHuman/engine/tts/aliNLSTTS.py
+++ b/digitalHuman/engine/tts/aliNLSTTS.py
@@ -0,0 +1,149 @@
 
				+import asyncio
			
 
				+import random
			
 
				+import threading
			
 
				+from io import BytesIO
			
 
				+from typing import Optional # Added for type hinting
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import logger
			
 
				+import nls # Alibaba NLS SDK, when need to be installed
			
 
				+from ..builder import TTSEngines
			
 
				+from ..engineBase import BaseEngine
			
 
				+from yacs.config import CfgNode as CN
			
 
				+
			
 
				+__all__ = ["AliNLSTTS"]
			
 
				+
			
 
				+VOICE_LIST = [
			
 
				+    VoiceDesc(name="zhifeng_emo", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="zhibing_emo", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="zhitian_emo", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zhibei_emo", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zhiyan_emo", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zhimi_emo", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zhimiao_emo", gender=GENDER_TYPE.FEMALE),
			
 
				+]
			
 
				+
			
 
				+@TTSEngines.register("AliNLSTTS")
			
 
				+class AliNLSTTS(BaseEngine):
			
 
				+    EMOTION_LIST = ['angry', 'fear', 'happy', 'hate', 'neutral', 'sad', 'surprise']
			
 
				+
			
 
				+    def generate_remotion_ssml_text(self, text: str) -> str:
			
 
				+        return f'<speak><emotion category="{random.choice(self.EMOTION_LIST)}" intensity="1.0">{text}</emotion></speak>'
			
 
				+    
			
 
				+    async def voices(self) -> List[VoiceDesc]:
			
 
				+        return VOICE_LIST
			
 
				+
			
 
				+    class NlsWorker:
			
 
				+        def __init__(
			
 
				+            self, 
			
 
				+            text: str, 
			
 
				+            config: CN,
			
 
				+            voice: str,
			
 
				+            token: str,
			
 
				+            api_key: str,
			
 
				+        ):
			
 
				+            self._text = text
			
 
				+            self._config = config
			
 
				+            self._voice = voice
			
 
				+            self._token = token
			
 
				+            self._api_key = api_key
			
 
				+            self._audio_buffer = BytesIO()
			
 
				+            self._completion_event = threading.Event()
			
 
				+            self._error_occurred = False
			
 
				+            self._error_message = ""
			
 
				+
			
 
				+            # Configure NLS SDK debugging based on environment or config
			
 
				+            # nls.enableTrace(True) # Enable for debugging if needed
			
 
				+
			
 
				+        def on_error(self, message, *args):
			
 
				+            logger.error(f"[{self._config.NAME}] On error: {message}, args: {args}")
			
 
				+            self._error_message = str(message)
			
 
				+            self._error_occurred = True
			
 
				+            self._completion_event.set() # Signal completion even on error
			
 
				+
			
 
				+        def on_close(self, *args):
			
 
				+            logger.debug(f"[{self._config.NAME}] On close: args: {args}")
			
 
				+            self._completion_event.set() # Ensure completion is signaled
			
 
				+
			
 
				+        def on_data(self, data, *args):
			
 
				+            if data:
			
 
				+                self._audio_buffer.write(data)
			
 
				+
			
 
				+        def on_completed(self, message, *args):
			
 
				+            logger.debug(f"[{self._config.NAME}] On completed: {message}")
			
 
				+            self._completion_event.set()
			
 
				+
			
 
				+        def synthesize(self) -> Optional[bytes]:
			
 
				+            tts = nls.NlsSpeechSynthesizer(
			
 
				+                url=self._config.URL,
			
 
				+                appkey=self._api_key,
			
 
				+                token=self._token,
			
 
				+                on_data=self.on_data,
			
 
				+                on_completed=self.on_completed,
			
 
				+                on_error=self.on_error,
			
 
				+                on_close=self.on_close,
			
 
				+                callback_args=[] 
			
 
				+            )
			
 
				+
			
 
				+            logger.debug(f"[{self._config.NAME}] Starting TTS synthesis for text: {self._text[:50]}...")
			
 
				+            # The NLS SDK's start method expects parameters like voice, format, sample_rate.
			
 
				+            # Make sure these are correctly passed from the config.
			
 
				+            # The text input here is expected to be SSML.
			
 
				+            logger.info(f"{self._text=}")
			
 
				+            tts.start(
			
 
				+                self._text,
			
 
				+                voice=self._voice,
			
 
				+                aformat=self._config.FORMAT.lower(), # SDK expects 'pcm', 'mp3', 'wav'
			
 
				+                sample_rate=self._config.SAMPLE_RATE
			
 
				+            )
			
 
				+
			
 
				+            self._completion_event.wait() # Wait for callbacks to complete
			
 
				+
			
 
				+            if self._error_occurred:
			
 
				+                logger.error(f"[{self._config.NAME}] Synthesis failed: {self._error_message}")
			
 
				+                return None
			
 
				+
			
 
				+            self._audio_buffer.seek(0)
			
 
				+            return self._audio_buffer.getvalue()
			
 
				+
			
 
				+    async def run(self, input: TextMessage, **kwargs) -> Optional[AudioMessage]:
			
 
				+        logger.info(f"[{self.cfg.NAME}] Received text for TTS: {input.data[:50]}...")
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        voice = paramters["voice"]
			
 
				+        token = paramters["token"]
			
 
				+        api_key = paramters["api_key"]
			
 
				+        if not input.data:
			
 
				+            logger.warning(f"[{self.cfg.NAME}] Received empty text for TTS.")
			
 
				+            return None
			
 
				+
			
 
				+        worker = self.NlsWorker(
			
 
				+            text=self.generate_remotion_ssml_text(input.data), 
			
 
				+            config=self.cfg,
			
 
				+            voice=voice,
			
 
				+            token=token,
			
 
				+            api_key=api_key
			
 
				+        )
			
 
				+        # change to async function
			
 
				+        loop = asyncio.get_event_loop()
			
 
				+
			
 
				+        audio_content = await loop.run_in_executor(None, worker.synthesize)
			
 
				+        config_audio_out_format = self.cfg.FORMAT.lower()
			
 
				+        if audio_content:
			
 
				+            if config_audio_out_format == "mp3":
			
 
				+                audio_format = AUDIO_TYPE.MP3
			
 
				+            elif config_audio_out_format == "wav":
			
 
				+                audio_format = AUDIO_TYPE.WAV
			
 
				+            else:
			
 
				+                raise ValueError(f"Unsupported {config_audio_out_format} for ALI NLS tts")
			
 
				+
			
 
				+            logger.info(f"[{self.cfg.NAME}] TTS synthesis successful. Audio size: {len(audio_content)} bytes")
			
 
				+            return AudioMessage(
			
 
				+                data=audio_content,
			
 
				+                format=audio_format,
			
 
				+                sampleRate=self.cfg.SAMPLE_RATE,
			
 
				+                sampleWidth=0, # This might need adjustment based on format
			
 
				+                desc="Alibaba NLS TTS"
			
 
				+            )
			
 
				+        else:
			
 
				+            logger.error(f"[{self.cfg.NAME}] TTS synthesis failed to produce audio content.")
			
 
				+            return None
			
--- a/digitalHuman/engine/tts/cozeTTS.py
+++ b/digitalHuman/engine/tts/cozeTTS.py
@@ -0,0 +1,87 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from ..builder import TTSEngines
			
 
				+from ..engineBase import BaseTTSEngine
			
 
				+import base64
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import logger, httpxAsyncClient, checkResponse
			
 
				+
			
 
				+__all__ = ["CozeApiTts"]
			
 
				+
			
 
				+
			
 
				+@TTSEngines.register("Coze")
			
 
				+class CozeApiTts(BaseTTSEngine):
			
 
				+    def setup(self):
			
 
				+        self.url = "https://api.coze.cn/v1/audio/speech"
			
 
				+        # TODO: 多人请求差异化
			
 
				+        # self.voicesMap = {}
			
 
				+
			
 
				+    # async def voices(self, **kwargs) -> List[VoiceDesc]:
			
 
				+    #     # 参数校验
			
 
				+    #     paramters = self.checkParameter(**kwargs)
			
 
				+    #     API_TOKEN = paramters["token"]
			
 
				+    #     if not API_TOKEN: return []
			
 
				+    #     headers = {
			
 
				+    #         'Authorization': f'Bearer {API_TOKEN}',
			
 
				+    #         'Content-Type': 'application/json'
			
 
				+    #     }
			
 
				+    #     resp = []
			
 
				+    #     page_num = 1
			
 
				+    #     page_size = 100
			
 
				+    #     while True:
			
 
				+    #         payload = {
			
 
				+    #             "page_num": page_num,
			
 
				+    #             "page_size": page_size
			
 
				+    #         }
			
 
				+    #         response = await httpxAsyncClient.get("https://api.coze.cn/v1/audio/voices", headers=headers, params=payload)
			
 
				+    #         result = checkResponse(response, "CozeApiTts", "get voice list")
			
 
				+    #         has_more = result['data']['has_more']
			
 
				+    #         voices = result['data']['voice_list']
			
 
				+    #         self.voicesMap.update((voice['name'], voice['voice_id']) for voice in voices)
			
 
				+    #         for voice in voices:
			
 
				+    #             resp.append(VoiceDesc(
			
 
				+    #                 name=voice['name'],
			
 
				+    #                 gender=GENDER_TYPE.FEMALE if 'female' in voice['speaker_id'] else GENDER_TYPE.MALE,
			
 
				+    #             ))
			
 
				+    #         if has_more:
			
 
				+    #             page_num += 1
			
 
				+    #         else:
			
 
				+    #             break
			
 
				+    #     return resp
			
 
				+
			
 
				+    async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        token = paramters["token"]
			
 
				+        bot_id = paramters["bot_id"]
			
 
				+
			
 
				+        headers = {
			
 
				+            'Authorization': f'Bearer {token}',
			
 
				+            'Content-Type': 'application/json'
			
 
				+        }
			
 
				+
			
 
				+        # 获取智能体配置信息
			
 
				+        response = await httpxAsyncClient.get(f"https://api.coze.cn/v1/bots/{bot_id}", headers=headers)
			
 
				+        resp = checkResponse(response, "CozeApiTts", "get bot info")
			
 
				+        voice_id = resp['data']['voice_info_list'][0]['voice_id']
			
 
				+
			
 
				+        payload = {
			
 
				+            'input': input.data,
			
 
				+            'voice_id': voice_id,
			
 
				+            'speed': 1.0,
			
 
				+            'response_format': 'mp3',
			
 
				+            'sample_rate': 16000,
			
 
				+        }
			
 
				+
			
 
				+        logger.debug(f"[TTS] Engine input: {input.data}")
			
 
				+        response = await httpxAsyncClient.post(self.url, json=payload, headers=headers)
			
 
				+        if response.status_code != 200:
			
 
				+            raise RuntimeError(f"CozeAPI tts api error: {response.text}")
			
 
				+
			
 
				+        message = AudioMessage(
			
 
				+            data=base64.b64encode(response.content).decode('utf-8'),
			
 
				+            sampleRate=16000,
			
 
				+            sampleWidth=2,
			
 
				+        )
			
 
				+        return message
			
--- a/digitalHuman/engine/tts/difyTTS.py
+++ b/digitalHuman/engine/tts/difyTTS.py
@@ -0,0 +1,92 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from ..builder import TTSEngines
			
 
				+from ..engineBase import BaseTTSEngine
			
 
				+import base64
			
 
				+import httpx
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import logger, mp3ToWav
			
 
				+
			
 
				+__all__ = ["DifyApiTts"]
			
 
				+
			
 
				+
			
 
				+@TTSEngines.register("Dify")
			
 
				+class DifyApiTts(BaseTTSEngine):
			
 
				+    def setup(self):
			
 
				+        """初始化 HTTP 客户端，优化连接池和超时设置"""
			
 
				+        super().setup()
			
 
				+        # 创建专用的 HTTP 客户端，优化连接池和超时设置
			
 
				+        # 使用连接池复用连接，减少连接建立时间
			
 
				+        # 设置合理的超时时间：连接超时 5s，读取超时 30s（TTS 可能需要一些时间）
			
 
				+        self._client = httpx.AsyncClient(
			
 
				+            timeout=httpx.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0),
			
 
				+            limits=httpx.Limits(max_keepalive_connections=10, max_connections=20),
			
 
				+            # 注意：如需启用 HTTP/2，请安装 httpx[http2]：pip install httpx[http2]
			
 
				+            # http2=True,  # 暂时禁用，避免缺少 h2 包的错误
			
 
				+        )
			
 
				+    
			
 
				+    def release(self):
			
 
				+        """释放 HTTP 客户端资源"""
			
 
				+        super().release()
			
 
				+        # 注意：httpx.AsyncClient 会在程序退出时自动关闭
			
 
				+        # 如果需要立即关闭，可以在异步上下文中调用 await self._client.aclose()
			
 
				+        # 这里只做标记，避免在 release 中处理异步操作
			
 
				+        if hasattr(self, '_client'):
			
 
				+            self._client = None
			
 
				+    
			
 
				+    async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        API_SERVER = paramters["api_server"]
			
 
				+        API_KEY = paramters["api_key"]
			
 
				+        API_USERNAME = paramters["username"]
			
 
				+
			
 
				+        headers = {
			
 
				+            'Authorization': f'Bearer {API_KEY}',
			
 
				+            'Content-Type': 'application/json',
			
 
				+            'Accept': 'audio/*',  # 明确指定接受音频类型
			
 
				+        }
			
 
				+        payload = {
			
 
				+            "text": input.data,
			
 
				+            "user": API_USERNAME,
			
 
				+        }
			
 
				+
			
 
				+        logger.debug(f"[TTS] Engine input: {input.data[:50]}..." if len(input.data) > 50 else f"[TTS] Engine input: {input.data}")
			
 
				+        
			
 
				+        try:
			
 
				+            # 使用优化的客户端发送请求
			
 
				+            response = await self._client.post(
			
 
				+                API_SERVER.rstrip('/') + "/text-to-audio",
			
 
				+                json=payload,
			
 
				+                headers=headers,
			
 
				+                follow_redirects=True,  # 自动跟随重定向
			
 
				+            )
			
 
				+            
			
 
				+            if response.status_code != 200:
			
 
				+                error_msg = f"DifyAPI tts api error: {response.status_code}"
			
 
				+                if response.text:
			
 
				+                    error_msg += f", response: {response.text[:200]}"
			
 
				+                raise RuntimeError(error_msg)
			
 
				+            
			
 
				+            # 直接使用响应内容，无需额外转换
			
 
				+            audio_content = response.content
			
 
				+            
			
 
				+            message = AudioMessage(
			
 
				+                data=base64.b64encode(audio_content).decode('utf-8'),
			
 
				+                sampleRate=16000,
			
 
				+                sampleWidth=2,
			
 
				+            )
			
 
				+            
			
 
				+            logger.debug(f"[TTS] Successfully generated audio, size: {len(audio_content)} bytes")
			
 
				+            return message
			
 
				+            
			
 
				+        except httpx.TimeoutException as e:
			
 
				+            logger.error(f"[TTS] Request timeout: {e}")
			
 
				+            raise RuntimeError(f"DifyAPI tts request timeout: {e}")
			
 
				+        except httpx.RequestError as e:
			
 
				+            logger.error(f"[TTS] Request error: {e}")
			
 
				+            raise RuntimeError(f"DifyAPI tts request error: {e}")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[TTS] Unexpected error: {e}")
			
 
				+            raise
			
--- a/digitalHuman/engine/tts/edgeTTS.py
+++ b/digitalHuman/engine/tts/edgeTTS.py
@@ -0,0 +1,137 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from ..builder import TTSEngines
			
 
				+from ..engineBase import BaseTTSEngine
			
 
				+import edge_tts
			
 
				+import base64
			
 
				+from typing import List
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import logger, mp3ToWav
			
 
				+
			
 
				+__all__ = ["EdgeApiTts"]
			
 
				+
			
 
				+VOICE_LIST = [
			
 
				+    VoiceDesc(name="zh-HK-HiuGaaiNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zh-HK-HiuMaanNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zh-HK-WanLungNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="zh-CN-XiaoxiaoNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zh-CN-XiaoyiNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zh-CN-YunjianNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="zh-CN-YunxiNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="zh-CN-YunxiaNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="zh-CN-YunyangNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="zh-CN-liaoning-XiaobeiNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zh-TW-HsiaoChenNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zh-TW-YunJheNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="zh-TW-HsiaoYuNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="zh-CN-shaanxi-XiaoniNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-AU-NatashaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-AU-WilliamNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-CA-ClaraNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-CA-LiamNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-HK-YanNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-HK-SamNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-IN-NeerjaExpressiveNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-IN-NeerjaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-IN-PrabhatNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-IE-ConnorNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-IE-EmilyNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-KE-AsiliaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-KE-ChilembaNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-NZ-MitchellNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-NZ-MollyNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-NG-AbeoNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-NG-EzinneNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-PH-JamesNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-PH-RosaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-AvaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-AndrewNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-US-EmmaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-BrianNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-SG-LunaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-SG-WayneNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-ZA-LeahNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-ZA-LukeNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-TZ-ElimuNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-TZ-ImaniNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-GB-LibbyNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-GB-MaisieNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-GB-RyanNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-GB-SoniaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-GB-ThomasNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-US-AnaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-AndrewMultilingualNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-US-AriaNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-AvaMultilingualNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-BrianMultilingualNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-US-ChristopherNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-US-EmmaMultilingualNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-EricNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-US-GuyNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-US-JennyNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-MichelleNeural", gender=GENDER_TYPE.FEMALE),
			
 
				+    VoiceDesc(name="en-US-RogerNeural", gender=GENDER_TYPE.MALE),
			
 
				+    VoiceDesc(name="en-US-SteffanNeural", gender=GENDER_TYPE.MALE)
			
 
				+]
			
 
				+@TTSEngines.register("EdgeTTS")
			
 
				+class EdgeApiTts(BaseTTSEngine):
			
 
				+    async def voices(self, **kwargs) -> List[VoiceDesc]:
			
 
				+        return VOICE_LIST
			
 
				+        """
			
 
				+        结构体
			
 
				+        [{
			
 
				+            'Name': 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)', 
			
 
				+            'ShortName': 'af-ZA-AdriNeural', 
			
 
				+            'Gender': 'Female', 
			
 
				+            'Locale': 'af-ZA', 
			
 
				+            'SuggestedCodec': 'audio-24khz-48kbitrate-mono-mp3', 
			
 
				+            'FriendlyName': 'Microsoft Adri Online (Natural) - Afrikaans (South Africa)', 
			
 
				+            'Status': 'GA', 
			
 
				+            'VoiceTag': {'ContentCategories': ['General'], 'VoicePersonalities': ['Friendly', 'Positive']
			
 
				+        }, ...]
			
 
				+        """
			
 
				+        voices = await edge_tts.list_voices()
			
 
				+        # 过滤 zh / en
			
 
				+        voices = [voice for voice in voices if voice['ShortName'].startswith('zh') or voice['ShortName'].startswith('en')]
			
 
				+        test = [VoiceDesc(name=voice['ShortName'], gender=GENDER_TYPE.FEMALE if voice['Gender'] == 'Female' else GENDER_TYPE.MALE) for voice in voices]
			
 
				+        for t in test:
			
 
				+            print(f'VoiceDesc(name="{t.name}", gender={"GENDER_TYPE.FEMALE" if t.gender == GENDER_TYPE.FEMALE else "GENDER_TYPE.MALE"}),')
			
 
				+        return [VoiceDesc(name=voice['ShortName'], gender=GENDER_TYPE.FEMALE if voice['Gender'] == 'Female' else GENDER_TYPE.MALE) for voice in voices]
			
 
				+
			
 
				+    async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
			
 
				+        # 参数填充
			
 
				+        for paramter in self.parameters():
			
 
				+            if paramter.name == "voice":
			
 
				+                voice = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
			
 
				+            if paramter.name == "rate":
			
 
				+                rate = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
			
 
				+            if paramter.name == "volume":
			
 
				+                volume = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
			
 
				+            if paramter.name == "pitch":
			
 
				+                pitch = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
			
 
				+        if not voice:
			
 
				+            raise KeyError("LitAPI tts voice is required")
			
 
				+        logger.debug(f"[TTS] Engine input[{voice}]: {input.data}")
			
 
				+        rate = "+" + str(rate) + "%" if rate >= 0 else "" + str(rate) + "%"
			
 
				+        volume = "+" + str(volume) + "%" if volume >= 0 else "" + str(volume) + "%"
			
 
				+        pitch = "+" + str(pitch) + "Hz" if pitch >= 0 else "" + str(pitch) + "HZ"
			
 
				+        communicate = edge_tts.Communicate(
			
 
				+            text=input.data, 
			
 
				+            voice=voice,
			
 
				+            rate=rate,
			
 
				+            volume=volume,
			
 
				+            pitch=pitch
			
 
				+        )
			
 
				+        data = b''
			
 
				+        async for message in communicate.stream():
			
 
				+            if message["type"] == "audio":
			
 
				+                data += message["data"]
			
 
				+        # mp3 -> wav
			
 
				+        # data = mp3ToWav(data)
			
 
				+        message = AudioMessage(
			
 
				+            data=base64.b64encode(data).decode('utf-8'),
			
 
				+            sampleRate=16000,
			
 
				+            sampleWidth=2,
			
 
				+        )
			
 
				+        return message
			
--- a/digitalHuman/engine/tts/tencentTTS.py
+++ b/digitalHuman/engine/tts/tencentTTS.py
@@ -0,0 +1,191 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from ..builder import TTSEngines
			
 
				+from ..engineBase import BaseTTSEngine
			
 
				+import hashlib
			
 
				+import hmac
			
 
				+import time
			
 
				+import json
			
 
				+from uuid import uuid4
			
 
				+from datetime import datetime, timezone
			
 
				+from typing import Tuple, Dict
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.utils import logger, httpxAsyncClient
			
 
				+from pydantic import BaseModel
			
 
				+from typing import List, Optional
			
 
				+from decimal import Decimal
			
 
				+
			
 
				+
			
 
				+__all__ = ["TencentApiTts"]
			
 
				+
			
 
				+
			
 
				+MAX_INPUT_LENGTH = 150
			
 
				+
			
 
				+# neutral(中性)、sad(悲伤)、happy(高兴)、angry(生气)、fear(恐惧)、sajiao(撒娇)、amaze(震惊)、disgusted(厌恶)、peaceful(平静)
			
 
				+# 中性、悲伤、高兴、生气、恐惧、撒娇、震惊、厌恶、平静
			
 
				+class TencentVoiceEmotion(StrEnum):
			
 
				+    NEUTRAL = "neutral"
			
 
				+    SAD = "sad"
			
 
				+    HAPPY = "happy"
			
 
				+    ANGRY = "angry"
			
 
				+    FEAR = "fear"
			
 
				+    SAJIAO = "sajiao"
			
 
				+    AMAZE = "amaze"
			
 
				+    DISGUSTED = "disgusted"
			
 
				+    PEACEFUL = "peaceful"
			
 
				+
			
 
				+class TencentVoiceDesc(BaseModel):
			
 
				+    id: int
			
 
				+    name: str
			
 
				+    gender: GENDER_TYPE
			
 
				+    language: str
			
 
				+    multi_emotional: bool
			
 
				+
			
 
				+VOICE_LIST = [
			
 
				+    TencentVoiceDesc(id=501000, name="智斌", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501001, name="智兰", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501002, name="智菊", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501003, name="智宇", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501004, name="月华", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501005, name="飞镜", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501006, name="千嶂", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501007, name="浅草", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501008, name="WeJames", gender=GENDER_TYPE.MALE, language="英文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=501009, name="WeWinny", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=False),
			
 
				+    TencentVoiceDesc(id=601000, name="爱小溪", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601001, name="爱小洛", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601002, name="爱小辰", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601003, name="爱小荷", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601004, name="爱小树", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601005, name="爱小静", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601006, name="爱小耀", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601007, name="爱小叶", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601008, name="爱小豪", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601009, name="爱小芊", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601010, name="爱小娇", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601011, name="爱小川", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601012, name="爱小璟", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601013, name="爱小伊", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
			
 
				+    TencentVoiceDesc(id=601014, name="爱小简", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
			
 
				+]
			
 
				+
			
 
				+class TencentCloudApiKey(BaseModel):
			
 
				+    secret_id: str
			
 
				+    secret_key: str
			
 
				+
			
 
				+def findVoice(name: str) -> Optional[TencentVoiceDesc]:
			
 
				+    for voice in VOICE_LIST:
			
 
				+        if voice.name == name:
			
 
				+            return voice
			
 
				+    return None
			
 
				+
			
 
				+def sign(key, msg: str):
			
 
				+    return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
			
 
				+
			
 
				+@TTSEngines.register("Tencent-API")
			
 
				+class TencentApiTts(BaseTTSEngine): 
			
 
				+    def setup(self):
			
 
				+        self._url = "https://tts.tencentcloudapi.com"
			
 
				+    
			
 
				+    def _buildRequest(
			
 
				+        self, 
			
 
				+        input: TextMessage, 
			
 
				+        tencentApiKey: TencentCloudApiKey, 
			
 
				+        voice: str, 
			
 
				+        volume: float, 
			
 
				+        speed: float, 
			
 
				+        emotionCategory: str = TencentVoiceEmotion.NEUTRAL
			
 
				+    ) -> Tuple[Dict, str]:
			
 
				+        service = "tts"
			
 
				+        host = "tts.tencentcloudapi.com"
			
 
				+        version = "2019-08-23"
			
 
				+        action = "TextToVoice"
			
 
				+        algorithm = "TC3-HMAC-SHA256"
			
 
				+        timestamp = int(time.time())
			
 
				+        date = datetime.fromtimestamp(timestamp, timezone.utc).strftime("%Y-%m-%d")
			
 
				+        tencentVoice = findVoice(voice)
			
 
				+        if not tencentVoice:
			
 
				+            raise ValueError("voice not found")
			
 
				+        params = {
			
 
				+            "Text": input.data,
			
 
				+            "SessionId": str(uuid4()),
			
 
				+            "VoiceType": tencentVoice.id,
			
 
				+            # "Codec": "wav",
			
 
				+            "Codec": "mp3",
			
 
				+            "Volume": volume,
			
 
				+            "Speed": speed,
			
 
				+            "EmotionCategory": emotionCategory
			
 
				+        }
			
 
				+        payload = json.dumps(params)
			
 
				+        # ************* 步骤 1：拼接规范请求串 *************
			
 
				+        http_request_method = "POST"
			
 
				+        canonical_uri = "/"
			
 
				+        canonical_querystring = ""
			
 
				+        ct = "application/json; charset=utf-8"
			
 
				+        canonical_headers = "content-type:%s\nhost:%s\nx-tc-action:%s\n" % (ct, host, action.lower())
			
 
				+        signed_headers = "content-type;host;x-tc-action"
			
 
				+        hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest()
			
 
				+        canonical_request = (http_request_method + "\n" +
			
 
				+                            canonical_uri + "\n" +
			
 
				+                            canonical_querystring + "\n" +
			
 
				+                            canonical_headers + "\n" +
			
 
				+                            signed_headers + "\n" +
			
 
				+                            hashed_request_payload)
			
 
				+
			
 
				+        # ************* 步骤 2：拼接待签名字符串 *************
			
 
				+        credential_scope = date + "/" + service + "/" + "tc3_request"
			
 
				+        hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
			
 
				+        string_to_sign = (algorithm + "\n" +
			
 
				+                        str(timestamp) + "\n" +
			
 
				+                        credential_scope + "\n" +
			
 
				+                        hashed_canonical_request)
			
 
				+
			
 
				+        # ************* 步骤 3：计算签名 *************
			
 
				+        secret_date = sign(("TC3" + tencentApiKey.secret_key).encode("utf-8"), date)
			
 
				+        secret_service = sign(secret_date, service)
			
 
				+        secret_signing = sign(secret_service, "tc3_request")
			
 
				+        signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
			
 
				+
			
 
				+        # ************* 步骤 4：拼接 Authorization *************
			
 
				+        authorization = (algorithm + " " +
			
 
				+                        "Credential=" + tencentApiKey.secret_id + "/" + credential_scope + ", " +
			
 
				+                        "SignedHeaders=" + signed_headers + ", " +
			
 
				+                        "Signature=" + signature)
			
 
				+
			
 
				+        # ************* 步骤 5：构造并发起请求 *************
			
 
				+        headers = {
			
 
				+            "Authorization": authorization,
			
 
				+            "Content-Type": "application/json; charset=utf-8",
			
 
				+            "Host": host,
			
 
				+            "X-TC-Action": action,
			
 
				+            "X-TC-Timestamp": str(timestamp),
			
 
				+            "X-TC-Version": version
			
 
				+        }
			
 
				+
			
 
				+        return (headers, payload)
			
 
				+
			
 
				+    async def voices(self, **kwargs) -> List[VoiceDesc]:
			
 
				+        return [VoiceDesc(name=v.name, gender=v.gender) for v in VOICE_LIST]
			
 
				+    
			
 
				+    async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
			
 
				+        # 参数校验
			
 
				+        paramters = self.checkParameter(**kwargs)
			
 
				+        voice = paramters["voice"]
			
 
				+        speed = paramters["speed"]
			
 
				+        volume = paramters["volume"]
			
 
				+        SECRECT_ID = paramters["secret_id"]
			
 
				+        SECRECT_KEY = paramters["secret_key"]
			
 
				+        tencentCloudApiKey = TencentCloudApiKey(secret_id=SECRECT_ID, secret_key=SECRECT_KEY)
			
 
				+        headers, payload = self._buildRequest(input, tencentCloudApiKey, voice, volume, speed) 
			
 
				+        logger.debug(f"[TTS] Engine input: {input.data}")
			
 
				+        response = await httpxAsyncClient.post(self._url, headers=headers, data=payload)
			
 
				+        if response.status_code != 200:
			
 
				+            raise RuntimeError(f"Builtin tts api error: {response.status_code}")
			
 
				+        audio = response.json()["Response"]["Audio"]
			
 
				+        message = AudioMessage(
			
 
				+            data=audio,
			
 
				+            sampleRate=16000,
			
 
				+            sampleWidth=2,
			
 
				+        )
			
 
				+        return message
			
--- a/digitalHuman/engine/tts/ttsFactory.py
+++ b/digitalHuman/engine/tts/ttsFactory.py
@@ -0,0 +1,25 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from ..builder import TTSEngines
			
 
				+from ..engineBase import BaseEngine
			
 
				+from typing import List
			
 
				+from yacs.config import CfgNode as CN
			
 
				+from digitalHuman.protocol import ENGINE_TYPE
			
 
				+from digitalHuman.utils import logger
			
 
				+
			
 
				+__all__ = ["TTSFactory"]
			
 
				+
			
 
				+class TTSFactory():
			
 
				+    """
			
 
				+    Text to Speech Factory
			
 
				+    """
			
 
				+    @staticmethod
			
 
				+    def create(config: CN) -> BaseEngine:
			
 
				+        if config.NAME in TTSEngines.list():
			
 
				+            logger.info(f"[TTSFactory] Create engine: {config.NAME}")
			
 
				+            return TTSEngines.get(config.NAME)(config, ENGINE_TYPE.TTS)
			
 
				+        else:
			
 
				+            raise RuntimeError(f"[TTSFactory] Please check config, support TTS: {TTSEngines.list()}, but get {config.NAME}")
			
 
				+    @staticmethod
			
 
				+    def list() -> List:
			
 
				+        return TTSEngines.list()
			
--- a/digitalHuman/protocol.py
+++ b/digitalHuman/protocol.py
@@ -0,0 +1,266 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import struct
			
 
				+import asyncio
			
 
				+from enum import Enum
			
 
				+from uuid import uuid4
			
 
				+from typing import Optional, Union, List, Dict, Tuple
			
 
				+from datetime import datetime
			
 
				+from pydantic import BaseModel, Field
			
 
				+from fastapi import WebSocket
			
 
				+
			
 
				+# ======================= 枚举类型 =======================
			
 
				+class StrEnum(str, Enum):
			
 
				+    def __str__(self):
			
 
				+        return str(self.value)
			
 
				+
			
 
				+class IntEnum(int, Enum):
			
 
				+    def __str__(self):
			
 
				+        return str(self.value)
			
 
				+
			
 
				+class ENGINE_TYPE(StrEnum):
			
 
				+    ASR = "ASR"
			
 
				+    TTS = "TTS"
			
 
				+    LLM = "LLM"
			
 
				+    AGENT = "AGENT"
			
 
				+
			
 
				+class GENDER_TYPE(StrEnum):
			
 
				+    MALE = 'MALE'
			
 
				+    FEMALE = 'FEMALE'
			
 
				+    
			
 
				+class EVENT_TYPE(StrEnum):
			
 
				+    CONVERSATION_ID = 'CONVERSATION_ID'
			
 
				+    MESSAGE_ID = 'MESSAGE_ID'
			
 
				+    TEXT = 'TEXT'
			
 
				+    THINK = 'THINK'
			
 
				+    TASK = 'TASK'
			
 
				+    DONE = 'DONE'
			
 
				+    ERROR = 'ERROR'
			
 
				+
			
 
				+class PARAM_TYPE(StrEnum):
			
 
				+    STRING = 'string'
			
 
				+    INT = 'int'
			
 
				+    FLOAT = 'float'
			
 
				+    BOOL = 'bool'
			
 
				+    LIST = 'list'
			
 
				+
			
 
				+
			
 
				+class AUDIO_TYPE(StrEnum):
			
 
				+    MP3 = 'mp3'
			
 
				+    WAV = 'wav'
			
 
				+
			
 
				+class DATA_TYPE(StrEnum):
			
 
				+    TEXT = 'text'
			
 
				+    AUDIO_URL = 'audio_url'
			
 
				+    AUDIO_STREAM = 'audio_stream'
			
 
				+
			
 
				+class ROLE_TYPE(StrEnum):
			
 
				+    SYSTEM = 'system'
			
 
				+    USER = 'user'
			
 
				+    ASSISTANT = 'assistant'
			
 
				+    TOOL = 'tool'
			
 
				+
			
 
				+class INFER_TYPE(StrEnum):
			
 
				+    NORMAL = 'normal'
			
 
				+    STREAM = 'stream'
			
 
				+
			
 
				+class RESPONSE_CODE(IntEnum):
			
 
				+    OK = 0
			
 
				+    ERROR = -1
			
 
				+
			
 
				+# ========================== Message =============================
			
 
				+class BaseMessage(BaseModel):
			
 
				+    """
			
 
				+    Base Protocol
			
 
				+    """
			
 
				+    # id: str = Field(default_factory=lambda: str(uuid4()))
			
 
				+    def __str__(self) -> str:
			
 
				+       return f'Message({self.model_dump()})'
			
 
				+
			
 
				+class AudioMessage(BaseMessage):
			
 
				+    data: Optional[Union[str, bytes]] = None
			
 
				+    dataType: DATA_TYPE = DATA_TYPE.AUDIO_STREAM  # 数据类型：音频流、音频URL等
			
 
				+    type: AUDIO_TYPE = AUDIO_TYPE.WAV  # 音频格式：WAV、MP3等
			
 
				+    sampleRate: int = 16000
			
 
				+    sampleWidth: int = 2
			
 
				+
			
 
				+class TextMessage(BaseMessage):
			
 
				+    data: Optional[str] = None
			
 
				+    dataType: DATA_TYPE = DATA_TYPE.TEXT  # 数据类型
			
 
				+
			
 
				+class RoleMessage(BaseMessage):
			
 
				+    role: ROLE_TYPE
			
 
				+    content: str
			
 
				+
			
 
				+# ========================== server =============================
			
 
				+class BaseResponse(BaseModel):
			
 
				+    code: RESPONSE_CODE
			
 
				+    message: str
			
 
				+
			
 
				+# ========================== voice =============================
			
 
				+class VoiceDesc(BaseModel):
			
 
				+    name: str
			
 
				+    gender: GENDER_TYPE
			
 
				+
			
 
				+
			
 
				+# ========================== param =============================
			
 
				+class ParamDesc(BaseModel):
			
 
				+    name: str
			
 
				+    description: str
			
 
				+    type: PARAM_TYPE
			
 
				+    required: bool
			
 
				+    range: List[Union[str, int, float]] = []
			
 
				+    choices: List[Union[str, int, float]] = []
			
 
				+    default: Union[str, int, float, bool, List]
			
 
				+
			
 
				+# ========================== engine =============================
			
 
				+class EngineDesc(BaseModel):
			
 
				+    name: str
			
 
				+    type: ENGINE_TYPE
			
 
				+    infer_type: INFER_TYPE
			
 
				+    desc: str = ""
			
 
				+    meta: Dict = {}
			
 
				+
			
 
				+class EngineConfig(BaseModel):
			
 
				+    name: str
			
 
				+    type: ENGINE_TYPE
			
 
				+    config: Dict
			
 
				+
			
 
				+# ========================== user =============================
			
 
				+class UserDesc(BaseModel):
			
 
				+    user_id: str
			
 
				+    request_id: str
			
 
				+    cookie: str
			
 
				+    
			
 
				+# ========================== func =============================
			
 
				+def eventStreamResponse(event: EVENT_TYPE, data: str) -> str:
			
 
				+    message = "event: " + str(event) + "\ndata: " + data.replace("\n", "\\n") + "\n\n"
			
 
				+    return message
			
 
				+
			
 
				+def eventStreamText(data: str) -> str:
			
 
				+    return eventStreamResponse(EVENT_TYPE.TEXT, data)
			
 
				+
			
 
				+def eventStreamTask(task_id: str) -> str:
			
 
				+    return eventStreamResponse(EVENT_TYPE.TASK, task_id)
			
 
				+
			
 
				+def eventStreamThink(data: str) -> str:
			
 
				+    return eventStreamResponse(EVENT_TYPE.THINK, data)
			
 
				+
			
 
				+def eventStreamConversationId(conversation_id: str) -> str:
			
 
				+    return eventStreamResponse(EVENT_TYPE.CONVERSATION_ID, conversation_id)
			
 
				+
			
 
				+def eventStreamMessageId(message_id: str) -> str:
			
 
				+    return eventStreamResponse(EVENT_TYPE.MESSAGE_ID, message_id)
			
 
				+
			
 
				+def eventStreamDone() -> str:
			
 
				+    return f"event: {EVENT_TYPE.DONE}\ndata: Done\n\n"
			
 
				+
			
 
				+def eventStreamError(error: str):
			
 
				+    return eventStreamResponse(EVENT_TYPE.ERROR, error)
			
 
				+
			
 
				+def isEventStreamResponse(message: str) -> bool:
			
 
				+    return message.startswith("event:")
			
 
				+
			
 
				+
			
 
				+# ========================== websocket =============================
			
 
				+# 协议常量定义
			
 
				+ACTION_HEADER_SIZE = 18  # action字段大小（18字节）
			
 
				+# 协议格式: [Action(18字节)] + [Payload Size(4字节)] + [Payload(可变长度)]
			
 
				+PROTOCOL_HEADER_FORMAT = ">18sI"  # 大端序: 18字节action + 4字节无符号整数payload_size
			
 
				+PROTOCOL_HEADER_SIZE = struct.calcsize(PROTOCOL_HEADER_FORMAT)  # 22字节
			
 
				+
			
 
				+class WS_RECV_ACTION_TYPE(StrEnum):
			
 
				+    """客户端请求类型"""
			
 
				+    PING = "PING"  # 心跳包
			
 
				+    ENGINE_START = "ENGINE_START"  # 启动引擎
			
 
				+    ENGINE_PARTIAL_INPUT = "PARTIAL_INPUT"  # 引擎输入
			
 
				+    ENGINE_FINAL_INPUT = "FINAL_INPUT"  # 引擎输入
			
 
				+    ENGINE_STOP = "ENGINE_STOP"  # 停止引擎
			
 
				+
			
 
				+class WS_SEND_ACTION_TYPE(StrEnum):
			
 
				+    """服务端响应类型"""
			
 
				+    PONG = "PONG"  # 心跳响应
			
 
				+    ENGINE_INITIALZING = "ENGINE_INITIALZING"  # 引擎初始化
			
 
				+    ENGINE_STARTED = "ENGINE_STARTED"  # 引擎准备就绪
			
 
				+    ENGINE_PARTIAL_OUTPUT = "PARTIAL_OUTPUT"  # 引擎输出
			
 
				+    ENGINE_FINAL_OUTPUT = "FINAL_OUTPUT"  # 引擎输出
			
 
				+    ENGINE_STOPPED = "ENGINE_STOPPED"  # 关闭引擎
			
 
				+    ERROR = "ERROR"  # 错误响应
			
 
				+
			
 
				+def _format_action(action_name: str) -> bytes:
			
 
				+    """格式化action名称为18字节，右侧用空格填充"""
			
 
				+    if len(action_name) > ACTION_HEADER_SIZE:
			
 
				+        raise ValueError(
			
 
				+            f"Action name '{action_name}' exceeds {ACTION_HEADER_SIZE} bytes"
			
 
				+        )
			
 
				+    return action_name.ljust(ACTION_HEADER_SIZE).encode("utf-8")
			
 
				+
			
 
				+def struct_message(action: str, message: str | bytes) -> bytes:
			
 
				+    """构造发送消息"""
			
 
				+    if isinstance(message, str):
			
 
				+        message = message.encode("utf-8")
			
 
				+    action_bytes = _format_action(action)
			
 
				+    payload_size = len(message)
			
 
				+    # 打包协议头部: action(18字节) + payload_size(4字节)
			
 
				+    header = struct.pack(PROTOCOL_HEADER_FORMAT, action_bytes, payload_size)
			
 
				+    return header + message
			
 
				+
			
 
				+def parse_message(message: bytes) -> Tuple[str, bytes]:
			
 
				+    """解析接收到的消息"""
			
 
				+    if len(message) < PROTOCOL_HEADER_SIZE:
			
 
				+        raise ValueError(
			
 
				+            f"Message too short: {len(message)} bytes, expected at least {PROTOCOL_HEADER_SIZE}"
			
 
				+        )
			
 
				+    # 解析协议头部: action(18字节) + payload_size(4字节)
			
 
				+    action, payload_size = struct.unpack(
			
 
				+        PROTOCOL_HEADER_FORMAT, message[:PROTOCOL_HEADER_SIZE]
			
 
				+    )
			
 
				+
			
 
				+    expected_total_size = PROTOCOL_HEADER_SIZE + payload_size
			
 
				+    if len(message) != expected_total_size:
			
 
				+        raise ValueError(
			
 
				+            f"Message size mismatch: got {len(message)} bytes, expected {expected_total_size}"
			
 
				+        )
			
 
				+
			
 
				+    # 提取payload
			
 
				+    payload = message[PROTOCOL_HEADER_SIZE : PROTOCOL_HEADER_SIZE + payload_size] if payload_size > 0 else b""
			
 
				+
			
 
				+    return (action.decode("utf-8").strip(), payload)
			
 
				+
			
 
				+class WebSocketHandler(): 
			
 
				+    """
			
 
				+    websocket处理类(协议控制)
			
 
				+    """
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def connect(ws: WebSocket) -> None:
			
 
				+        """连接WebSocket"""
			
 
				+        await ws.accept()
			
 
				+        # logger.debug(f"WebSocket connected: {ws.client.host}")
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    async def disconnect(ws: WebSocket):
			
 
				+        """断开WebSocket连接"""
			
 
				+        try:
			
 
				+            await ws.close()
			
 
				+        except (RuntimeError, AttributeError, Exception):
			
 
				+            # 忽略关闭时的错误，避免在事件循环关闭后尝试关闭连接
			
 
				+            # 这是 Windows 上 ProactorEventLoop 的已知问题
			
 
				+            # 当事件循环关闭后，WebSocket 连接的析构函数会尝试关闭连接，但此时事件循环已经关闭
			
 
				+            pass
			
 
				+        # logger.debug(f"WebSocket disconnected: {ws.client.host}")
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def send_message(ws: WebSocket, action: str, message: str | bytes = b'') -> None:
			
 
				+        """发送WebSocket消息"""
			
 
				+        data = struct_message(action, message)
			
 
				+        await ws.send_bytes(data)
			
 
				+        # logger.debug(f"Sent action: {action}, payload size: {len(data) - PROTOCOL_HEADER_SIZE} bytes")
			
 
				+    
			
 
				+    @staticmethod
			
 
				+    async def recv_message(ws: WebSocket) -> Tuple[str, bytes]:
			
 
				+        """接收WebSocket消息"""
			
 
				+        message = await ws.receive_bytes()
			
 
				+        action, payload = parse_message(message)
			
 
				+        # logger.debug(f"Received action: {action.decode('utf-8').strip()}, payload size: {len(payload)} bytes")
			
 
				+        return action, payload
			
--- a/digitalHuman/server/__init__.py
+++ b/digitalHuman/server/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from .router import app
			
--- a/digitalHuman/server/api/__init__.py
+++ b/digitalHuman/server/api/__init__.py
--- a/digitalHuman/server/api/agent/__init__.py
+++ b/digitalHuman/server/api/agent/__init__.py
@@ -0,0 +1,2 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
--- a/digitalHuman/server/api/agent/agent_api_v0.py
+++ b/digitalHuman/server/api/agent/agent_api_v0.py
@@ -0,0 +1,84 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from fastapi import APIRouter
			
 
				+from fastapi.responses import JSONResponse, StreamingResponse
			
 
				+from digitalHuman.utils import config
			
 
				+from digitalHuman.agent import AgentPool
			
 
				+from digitalHuman.server.reponse import Response, streamInteralError
			
 
				+from digitalHuman.server.header import HeaderInfo
			
 
				+from digitalHuman.server.models import *
			
 
				+from digitalHuman.server.core.api_agent_v0_impl import *
			
 
				+
			
 
				+router = APIRouter(prefix="/agent/v0")
			
 
				+agentPool = AgentPool()
			
 
				+
			
 
				+
			
 
				+# ========================= 获取agent支持列表 ===========================
			
 
				+@router.get("/engine", response_model=EngineListResp, summary="Get Agent Engine List")
			
 
				+def api_get_agent_list():
			
 
				+    """
			
 
				+    获取agent支持引擎列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_agent_list()
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineListResp), status_code=200)
			
 
				+
			
 
				+# ========================= 获取agent默认引擎 ===========================
			
 
				+@router.get("/engine/default", response_model=EngineDefaultResp, summary="Get Default Agent Engine")
			
 
				+def api_get_agent_default():
			
 
				+    """
			
 
				+    获取默认agent引擎
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_agent_default()
			
 
				+    except Exception as e:
			
 
				+        response.data = ""
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineDefaultResp), status_code=200)
			
 
				+
			
 
				+
			
 
				+# ========================= 获取agent引擎参数列表 ===========================
			
 
				+@router.get("/engine/{engine}", response_model=EngineParam, summary="Get Agent Engine Param")
			
 
				+def api_get_agent_param(engine: str):
			
 
				+    """
			
 
				+    获取agent引擎配置参数列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_agent_param(engine)
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineParam), status_code=200)
			
 
				+
			
 
				+# ========================= 创建agent会话 ===========================
			
 
				+@router.post("/engine/{engine}", response_model=ConversationIdResp, summary="Create Agent Conversation")
			
 
				+async def api_create_agent_conversation(engine: str, item: ConversationInput):
			
 
				+    """
			
 
				+    创建agent会话
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try: 
			
 
				+        response.data = await create_agent_conversation(engine, item.data)
			
 
				+    except Exception as e:
			
 
				+        response.data = ""
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(ConversationIdResp), status_code=200)
			
 
				+
			
 
				+# ========================= 执行agent引擎 ===========================
			
 
				+@router.post("/engine", summary="AI Agent Inference")
			
 
				+async def api_agent_infer(items: AgentEngineInput, header: HeaderInfo):
			
 
				+    if items.engine.lower() == "default":
			
 
				+        items.engine = config.SERVER.AGENTS.DEFAULT
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        streamContent = agent_infer_stream(header, items)
			
 
				+        return StreamingResponse(streamContent, media_type="text/event-stream")
			
 
				+    except Exception as e:
			
 
				+        response.error(str(e))
			
 
				+        return StreamingResponse(streamInteralError("Interal Error"), media_type="text/event-stream")
			
--- a/digitalHuman/server/api/asr/__init__.py
+++ b/digitalHuman/server/api/asr/__init__.py
@@ -0,0 +1,2 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
--- a/digitalHuman/server/api/asr/asr_api_v0.py
+++ b/digitalHuman/server/api/asr/asr_api_v0.py
@@ -0,0 +1,111 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import json
			
 
				+from fastapi import APIRouter, UploadFile, Form
			
 
				+from fastapi.responses import JSONResponse
			
 
				+from digitalHuman.server.reponse import Response
			
 
				+from digitalHuman.server.header import HeaderInfo
			
 
				+from digitalHuman.server.models import *
			
 
				+from digitalHuman.server.core.api_asr_v0_impl import *
			
 
				+
			
 
				+router = APIRouter(prefix="/asr/v0")
			
 
				+enginePool = EnginePool()
			
 
				+
			
 
				+# ========================= 获取asr支持列表 ===========================
			
 
				+@router.get("/engine", response_model=EngineListResp, summary="Get ASR Engine List")
			
 
				+def api_get_asr_list():
			
 
				+    """
			
 
				+    获取asr支持引擎列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_asr_list()
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineListResp), status_code=200)
			
 
				+
			
 
				+# ========================= 获取asr默认引擎 ===========================
			
 
				+@router.get("/engine/default", response_model=EngineDefaultResp, summary="Get Default ASR Engine")
			
 
				+def api_get_asr_default():
			
 
				+    """
			
 
				+    获取默认asr引擎
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_asr_default()
			
 
				+    except Exception as e:
			
 
				+        response.data = ""
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineDefaultResp), status_code=200)
			
 
				+
			
 
				+
			
 
				+# ========================= 获取asr引擎参数列表 ===========================
			
 
				+@router.get("/engine/{engine}", response_model=EngineParam, summary="Get ASR Engine param")
			
 
				+def api_get_asr_param(engine: str):
			
 
				+    """
			
 
				+    获取asr引擎配置参数列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_asr_param(engine)
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineParam), status_code=200)
			
 
				+
			
 
				+
			
 
				+# ========================= 执行asr引擎 ===========================
			
 
				+# wav 二进制
			
 
				+@router.post("/engine", response_model=ASREngineOutput, summary="Speech To Text Inference (wav binary)")
			
 
				+async def api_asr_infer(header: HeaderInfo, items: ASREngineInput):
			
 
				+    """
			
 
				+    执行asr引擎
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        output: TextMessage = await asr_infer(header, items)
			
 
				+        response.data = output.data
			
 
				+    except Exception as e:
			
 
				+        response.data = ""
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(ASREngineOutput), status_code=200)
			
 
				+
			
 
				+# mp3 文件
			
 
				+@router.post("/engine/file", response_model=ASREngineOutput, summary="Speech To Text Inference (mp3 file)")
			
 
				+async def api_asr_infer_file(
			
 
				+    header: HeaderInfo, 
			
 
				+    file: UploadFile, 
			
 
				+    engine: str = Form(...),
			
 
				+    type: AUDIO_TYPE = Form(...),
			
 
				+    config: str = Form(...),
			
 
				+    sampleRate: int = Form(...),
			
 
				+    sampleWidth: int = Form(...)
			
 
				+):
			
 
				+    """
			
 
				+    执行asr引擎
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        fileData = await file.read()
			
 
				+        items = ASREngineInput(
			
 
				+            engine=engine,
			
 
				+            type=type,
			
 
				+            config=json.loads(config),
			
 
				+            sampleRate=sampleRate,
			
 
				+            sampleWidth=sampleWidth,
			
 
				+            data=fileData
			
 
				+        )
			
 
				+        output: TextMessage = await asr_infer(header, items)
			
 
				+        response.data = output.data
			
 
				+    except Exception as e:
			
 
				+        response.data = ""
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(ASREngineOutput), status_code=200)
			
 
				+# 流式
			
 
				+@router.websocket("/engine/stream")
			
 
				+async def api_asr_infer_stream(header: HeaderInfo, websocket: WebSocket):
			
 
				+    """
			
 
				+    流式asr引擎
			
 
				+    """
			
 
				+    await asr_stream_infer(header, websocket)
			
--- a/digitalHuman/server/api/common/__init__.py
+++ b/digitalHuman/server/api/common/__init__.py
@@ -0,0 +1,2 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
--- a/digitalHuman/server/api/common/common_api_v0.py
+++ b/digitalHuman/server/api/common/common_api_v0.py
@@ -0,0 +1,26 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from fastapi import APIRouter, WebSocket
			
 
				+from digitalHuman.server.ws import WebsocketManager
			
 
				+from digitalHuman.utils import logger
			
 
				+
			
 
				+
			
 
				+router = APIRouter(prefix="/common/v0")
			
 
				+wsManager = WebsocketManager()
			
 
				+
			
 
				+# ========================= 心跳包 ===========================
			
 
				+@router.websocket("/heartbeat")
			
 
				+async def websocket_heartbeat(websocket: WebSocket):
			
 
				+    try:
			
 
				+        await wsManager.connect(websocket)
			
 
				+        while True:
			
 
				+            data = await websocket.receive_text()
			
 
				+            if data == "ping":
			
 
				+                await wsManager.sendMessage("pong", websocket)
			
 
				+            else:
			
 
				+                # 暂不处理其它消息格式: 非探活则关闭接口
			
 
				+                await wsManager.sendMessage("Received unsupported message", websocket)
			
 
				+                wsManager.disconnect(websocket)
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"[SERVER] websocket_heartbeat: {str(e)}")
			
 
				+        wsManager.disconnect(websocket)
			
--- a/digitalHuman/server/api/face_detection/__init__.py
+++ b/digitalHuman/server/api/face_detection/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
--- a/digitalHuman/server/api/face_detection/face_detection_api_v0.py
+++ b/digitalHuman/server/api/face_detection/face_detection_api_v0.py
@@ -0,0 +1,59 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import base64
			
 
				+from fastapi import APIRouter, Form, UploadFile
			
 
				+from fastapi.responses import JSONResponse
			
 
				+from digitalHuman.server.reponse import Response
			
 
				+from digitalHuman.server.header import HeaderInfo
			
 
				+from digitalHuman.server.models import FaceDetectionOutput
			
 
				+
			
 
				+# 延迟导入，避免启动时触发 uniface 导入（可能导致 DLL 加载失败）
			
 
				+# 只在 API 调用时才导入
			
 
				+def _get_face_detection_infer():
			
 
				+    """延迟导入 face_detection_infer"""
			
 
				+    from digitalHuman.server.core.api_face_detection_v0_impl import face_detection_infer
			
 
				+    return face_detection_infer
			
 
				+
			
 
				+router = APIRouter(prefix="/face_detection/v0")
			
 
				+
			
 
				+# ========================= 人脸检测 ===========================
			
 
				+@router.post("/detect", summary="Face Detection")
			
 
				+async def api_face_detection(header: HeaderInfo, file: UploadFile):
			
 
				+    """
			
 
				+    执行人脸检测
			
 
				+    接收图片文件，返回检测到的人脸信息
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        file_data = await file.read()
			
 
				+        face_detection_infer = _get_face_detection_infer()
			
 
				+        result = await face_detection_infer(header, file_data)
			
 
				+        response.data = result
			
 
				+    except Exception as e:
			
 
				+        response.data = {"hasFace": False, "faceCount": 0, "faces": []}
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(FaceDetectionOutput), status_code=200)
			
 
				+
			
 
				+# ========================= 人脸检测 (Base64) ===========================
			
 
				+@router.post("/detect/base64", summary="Face Detection (Base64)")
			
 
				+async def api_face_detection_base64(header: HeaderInfo, image_data: str = Form(...)):
			
 
				+    """
			
 
				+    执行人脸检测 (Base64格式)
			
 
				+    接收Base64编码的图片数据，返回检测到的人脸信息
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        # 解码Base64图片
			
 
				+        if image_data.startswith('data:image'):
			
 
				+            # 处理 data:image/jpeg;base64,xxx 格式
			
 
				+            image_data = image_data.split(',')[1]
			
 
				+        file_data = base64.b64decode(image_data)
			
 
				+        face_detection_infer = _get_face_detection_infer()
			
 
				+        result = await face_detection_infer(header, file_data)
			
 
				+        response.data = result
			
 
				+    except Exception as e:
			
 
				+        response.data = {"hasFace": False, "faceCount": 0, "faces": []}
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(FaceDetectionOutput), status_code=200)
			
 
				+
			
 
				+
			
--- a/digitalHuman/server/api/llm/__init__.py
+++ b/digitalHuman/server/api/llm/__init__.py
@@ -0,0 +1,2 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
--- a/digitalHuman/server/api/llm/llm_api_v0.py
+++ b/digitalHuman/server/api/llm/llm_api_v0.py
@@ -0,0 +1,71 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from fastapi import APIRouter
			
 
				+from fastapi.responses import JSONResponse, StreamingResponse
			
 
				+from digitalHuman.protocol import TextMessage
			
 
				+from digitalHuman.engine import EnginePool
			
 
				+from digitalHuman.server.reponse import Response, streamInteralError
			
 
				+from digitalHuman.server.header import HeaderInfo
			
 
				+from digitalHuman.server.models import *
			
 
				+from digitalHuman.server.core.api_llm_v0_impl import *
			
 
				+
			
 
				+router = APIRouter(prefix="/llm/v0")
			
 
				+enginePool = EnginePool()
			
 
				+
			
 
				+# ========================= 获取asr支持列表 ===========================
			
 
				+@router.get("/engine", response_model=EngineListResp, summary="Get LLM Engine List")
			
 
				+def api_get_llm_list():
			
 
				+    """
			
 
				+    获取asr支持引擎列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_llm_list()
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineListResp), status_code=200)
			
 
				+
			
 
				+# ========================= 获取asr默认引擎 ===========================
			
 
				+@router.get("/engine/default", response_model=EngineDefaultResp, summary="Get Default LLM Engine")
			
 
				+def api_get_asr_default():
			
 
				+    """
			
 
				+    获取默认asr引擎
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_llm_default()
			
 
				+    except Exception as e:
			
 
				+        response.data = ""
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineDefaultResp), status_code=200)
			
 
				+
			
 
				+
			
 
				+# ========================= 获取asr引擎参数列表 ===========================
			
 
				+@router.get("/engine/{engine}", response_model=EngineParam, summary="Get LLM Engine param")
			
 
				+def api_get_asr_param(engine: str):
			
 
				+    """
			
 
				+    获取asr引擎配置参数列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_llm_param(engine)
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineParam), status_code=200)
			
 
				+
			
 
				+
			
 
				+# ========================= 执行asr引擎 ===========================
			
 
				+@router.post("/engine", response_model=ASREngineOutput, summary="LLM Inference")
			
 
				+
			
 
				+async def api_agent_infer(item: LLMEngineInput, header: HeaderInfo):
			
 
				+    if item.engine.lower() == "default":
			
 
				+        item.engine = config.SERVER.LLM.DEFAULT
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        input = TextMessage(data=item.data)
			
 
				+        return StreamingResponse(enginePool.getEngine(ENGINE_TYPE.LLM, item.engine).run(input=input, user=header, **item.config), media_type="text/event-stream")
			
 
				+    except Exception as e:
			
 
				+        response.error(str(e))
			
 
				+        return StreamingResponse(streamInteralError("Interal Error"), media_type="text/event-stream")
			
--- a/digitalHuman/server/api/tts/__init__.py
+++ b/digitalHuman/server/api/tts/__init__.py
@@ -0,0 +1,2 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
--- a/digitalHuman/server/api/tts/tts_api_v0.py
+++ b/digitalHuman/server/api/tts/tts_api_v0.py
@@ -0,0 +1,93 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import json
			
 
				+from fastapi import APIRouter
			
 
				+from fastapi.responses import JSONResponse
			
 
				+from digitalHuman.utils import config, logger
			
 
				+from digitalHuman.protocol import AudioMessage
			
 
				+from digitalHuman.engine import EnginePool
			
 
				+from digitalHuman.server.reponse import Response
			
 
				+from digitalHuman.server.header import HeaderInfo
			
 
				+from digitalHuman.server.models import *
			
 
				+from digitalHuman.server.core.api_tts_v0_impl import *
			
 
				+
			
 
				+router = APIRouter(prefix="/tts/v0")
			
 
				+enginePool = EnginePool()
			
 
				+
			
 
				+# ========================= 获取tts支持列表 ===========================
			
 
				+@router.get("/engine", response_model=EngineListResp, summary="Get TTS Engine List")
			
 
				+def api_get_tts_list():
			
 
				+    """
			
 
				+    获取tts支持引擎列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_tts_list()
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineListResp), status_code=200)
			
 
				+
			
 
				+# ========================= 获取tts默认引擎 ===========================
			
 
				+@router.get("/engine/default", response_model=EngineDefaultResp, summary="Get Default TTS Engine")
			
 
				+def api_get_tts_default():
			
 
				+    """
			
 
				+    获取默认tts引擎
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_tts_default()
			
 
				+    except Exception as e:
			
 
				+        response.data = ""
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineDefaultResp), status_code=200)
			
 
				+
			
 
				+# ========================= 获取tts引擎声音列表 ===========================
			
 
				+@router.get("/engine/{engine}/voice", response_model=VoiceListResp, summary="Get TTS Engine Voice List")
			
 
				+async def api_get_tts_voice(engine: str, config: str = '{}'):
			
 
				+    """
			
 
				+    获取tts引擎配置参数列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    config = json.loads(config) if config else {}
			
 
				+    try:
			
 
				+        response.data = await get_tts_voice(engine, **config)
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(VoiceListResp), status_code=200)
			
 
				+
			
 
				+
			
 
				+# ========================= 获取tts引擎参数列表 ===========================
			
 
				+@router.get("/engine/{engine}", response_model=EngineParam, summary="Get TTS Engine Param")
			
 
				+def api_get_tts_param(engine: str):
			
 
				+    """
			
 
				+    获取tts引擎配置参数列表
			
 
				+    """
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        response.data = get_tts_param(engine)
			
 
				+    except Exception as e:
			
 
				+        response.data = []
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(EngineParam), status_code=200)
			
 
				+
			
 
				+
			
 
				+# ========================= 执行tts引擎 ===========================
			
 
				+@router.post("/engine", response_model=TTSEngineOutput, summary="Text To Speech Inference")
			
 
				+async def api_tts_infer(item: TTSEngineInput, header: HeaderInfo):
			
 
				+    """
			
 
				+    执行tts引擎
			
 
				+    """
			
 
				+    if item.engine.lower() == "default":
			
 
				+        item.engine = config.SERVER.ENGINES.TTS.DEFAULT
			
 
				+    response = Response()
			
 
				+    try:
			
 
				+        output: AudioMessage = await tts_infer(header, item)
			
 
				+        response.data = output.data
			
 
				+        response.sampleRate = output.sampleRate
			
 
				+        response.sampleWidth = output.sampleWidth
			
 
				+    except Exception as e:
			
 
				+        response.data = None
			
 
				+        response.error(str(e))
			
 
				+    return JSONResponse(content=response.validate(TTSEngineOutput), status_code=200)
			
--- a/digitalHuman/server/core/__init__.py
+++ b/digitalHuman/server/core/__init__.py
@@ -0,0 +1,2 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
--- a/digitalHuman/server/core/api_agent_v0_impl.py
+++ b/digitalHuman/server/core/api_agent_v0_impl.py
@@ -0,0 +1,43 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from typing import List, Dict
			
 
				+from digitalHuman.agent import AgentPool
			
 
				+from digitalHuman.utils import config
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.server.models import AgentEngineInput
			
 
				+
			
 
				+agentPool = AgentPool()
			
 
				+
			
 
				+def get_agent_list() -> List[EngineDesc]:
			
 
				+    agents = agentPool.list()
			
 
				+    return [agentPool.get(agent).desc() for agent in agents]
			
 
				+
			
 
				+def get_agent_default() -> EngineDesc:
			
 
				+    return agentPool.get(config.SERVER.AGENTS.DEFAULT).desc()
			
 
				+
			
 
				+def get_agent_param(name: str) -> List[ParamDesc]:
			
 
				+    engine = agentPool.get(name)
			
 
				+    return engine.parameters()
			
 
				+
			
 
				+async def create_agent_conversation(name: str, param: Dict) -> str:
			
 
				+    engine = agentPool.get(name)
			
 
				+    id = await engine.createConversation(**param)
			
 
				+    return id
			
 
				+
			
 
				+def agent_infer_stream(user: UserDesc, items: AgentEngineInput):
			
 
				+    # 检查是否是按钮触发的对话（包含 [BUTTON_TRIGGERED] 标记）
			
 
				+    # 如果是按钮触发，添加 persona 前缀；否则直接使用用户输入
			
 
				+    BUTTON_MARKER = "[BUTTON_TRIGGERED]"
			
 
				+    if items.data.startswith(BUTTON_MARKER):
			
 
				+        # 移除标记，添加 persona 前缀
			
 
				+        user_message = items.data[len(BUTTON_MARKER):]
			
 
				+        persona_prefix = "你现在是永天科技展厅的智能客服，请介绍永天科技的产品和解决方案：\n"
			
 
				+        user_input = persona_prefix + user_message
			
 
				+    else:
			
 
				+        # 普通对话，不添加 persona 前缀
			
 
				+        user_input = items.data
			
 
				+    
			
 
				+    input = TextMessage(data=user_input)
			
 
				+    streamContent = agentPool.get(items.engine).run(input=input, user=user, streaming=True, conversation_id=items.conversation_id, **items.config)
			
 
				+    return streamContent
			
--- a/digitalHuman/server/core/api_asr_v0_impl.py
+++ b/digitalHuman/server/core/api_asr_v0_impl.py
@@ -0,0 +1,54 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import json
			
 
				+from typing import List
			
 
				+from digitalHuman.engine import EnginePool
			
 
				+from digitalHuman.utils import config
			
 
				+from digitalHuman.protocol import *
			
 
				+from digitalHuman.server.models import *
			
 
				+from digitalHuman.server.ws import *
			
 
				+
			
 
				+enginePool = EnginePool()
			
 
				+
			
 
				+def get_asr_list() -> List[EngineDesc]:
			
 
				+    engines = enginePool.listEngine(ENGINE_TYPE.ASR)
			
 
				+    return [enginePool.getEngine(ENGINE_TYPE.ASR, engine).desc() for engine in engines]
			
 
				+
			
 
				+def get_asr_default() -> EngineDesc:
			
 
				+    return enginePool.getEngine(ENGINE_TYPE.ASR, config.SERVER.ENGINES.ASR.DEFAULT).desc()
			
 
				+
			
 
				+def get_asr_param(name: str) -> List[ParamDesc]:
			
 
				+    engine = enginePool.getEngine(ENGINE_TYPE.ASR, name)
			
 
				+    return engine.parameters()
			
 
				+
			
 
				+async def asr_infer(user: UserDesc, items: ASREngineInput) -> TextMessage:
			
 
				+    if items.engine.lower() == "default":
			
 
				+        items.engine = config.SERVER.ENGINES.ASR.DEFAULT
			
 
				+    input = AudioMessage(data=items.data, sampleRate=items.sampleRate, sampleWidth=items.sampleWidth, type=items.type)
			
 
				+    engine = enginePool.getEngine(ENGINE_TYPE.ASR, items.engine)
			
 
				+    if engine.inferType != INFER_TYPE.NORMAL:
			
 
				+        raise Exception("ASR engine {} not support infer type {}".format(items.engine, engine.inferType))
			
 
				+    output: TextMessage = await engine.run(input=input, user=user, **items.config)
			
 
				+    return output
			
 
				+
			
 
				+async def asr_stream_infer(user: UserDesc, websocket: WebSocket):
			
 
				+    await websocket.accept()
			
 
				+    client_waitting = True
			
 
				+    while client_waitting:
			
 
				+        action, payload = await WebSocketHandler.recv_message(websocket)
			
 
				+        match action:
			
 
				+            case WS_RECV_ACTION_TYPE.PING:
			
 
				+                await WebSocketHandler.send_message(websocket, WS_SEND_ACTION_TYPE.PONG, b'')
			
 
				+            case WS_RECV_ACTION_TYPE.ENGINE_START:
			
 
				+                # 解析payload
			
 
				+                items = EngineInput.model_validate_json(payload)
			
 
				+                client_waitting = False
			
 
				+            case _:
			
 
				+                await WebSocketHandler.send_message(websocket, WS_SEND_ACTION_TYPE.ERROR, 'First action must be ENGINE_START | PING')
			
 
				+                return
			
 
				+    if items.engine.lower() == "default":
			
 
				+        items.engine = config.SERVER.ENGINES.ASR.DEFAULT
			
 
				+    engine = enginePool.getEngine(ENGINE_TYPE.ASR, items.engine)
			
 
				+    if engine.inferType != INFER_TYPE.STREAM:
			
 
				+        raise Exception("ASR engine {} not support infer type {}".format(items.engine, engine.inferType))
			
 
				+    await engine.run(websocket=websocket, user=user, **items.config)
			
--- a/digitalHuman/server/core/api_face_detection_v0_impl.py
+++ b/digitalHuman/server/core/api_face_detection_v0_impl.py
@@ -0,0 +1,96 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from typing import Dict
			
 
				+from digitalHuman.protocol import UserDesc
			
 
				+from digitalHuman.utils import logger
			
 
				+
			
 
				+from digitalHuman.uniface.detection import RetinaFace
			
 
				+from digitalHuman.uniface.constants import RetinaFaceWeights
			
 
				+
			
 
				+# 全局检测器实例（单例模式，避免重复初始化）
			
 
				+_detector_instance = None
			
 
				+
			
 
				+def get_detector():
			
 
				+    """获取全局检测器实例"""
			
 
				+    global _detector_instance
			
 
				+    if _detector_instance is None:
			
 
				+        try:
			
 
				+            _detector_instance = RetinaFace(
			
 
				+                model_name=RetinaFaceWeights.MNET_V2,
			
 
				+                conf_thresh=0.5,
			
 
				+                nms_thresh=0.4
			
 
				+            )
			
 
				+            logger.info("UniFace RetinaFace 检测器初始化成功")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"UniFace 检测器初始化失败: {str(e)}")
			
 
				+            raise
			
 
				+    return _detector_instance
			
 
				+
			
 
				+async def face_detection_infer(user: UserDesc, image_data: bytes) -> Dict:
			
 
				+    """
			
 
				+    执行人脸检测
			
 
				+    
			
 
				+    Args:
			
 
				+        user: 用户信息
			
 
				+        image_data: 图片二进制数据
			
 
				+        
			
 
				+    Returns:
			
 
				+        Dict: 包含 hasFace, faceCount, faces 的字典
			
 
				+    """
			
 
				+    try:
			
 
				+        nparr = np.frombuffer(image_data, np.uint8)
			
 
				+        image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
			
 
				+        
			
 
				+        if image is None:
			
 
				+            raise ValueError("无法解码图片数据")
			
 
				+        
			
 
				+        detector = get_detector()
			
 
				+        faces = detector.detect(image)
			
 
				+        result = {
			
 
				+            "hasFace": len(faces) > 0,
			
 
				+            "faceCount": len(faces),
			
 
				+            "faces": []
			
 
				+        }
			
 
				+        
			
 
				+        for face in faces:
			
 
				+            bbox = face.get('bbox', [])
			
 
				+            confidence = face.get('confidence', 0.0)
			
 
				+            landmarks = face.get('landmarks', [])
			
 
				+            
			
 
				+            # 将 numpy 数组转换为列表以避免布尔判断警告
			
 
				+            if isinstance(bbox, np.ndarray):
			
 
				+                bbox = bbox.tolist()
			
 
				+            if isinstance(landmarks, np.ndarray):
			
 
				+                landmarks = landmarks.tolist()
			
 
				+            
			
 
				+            bbox_list = list(bbox) if bbox is not None else []
			
 
				+            
			
 
				+            # 处理 landmarks（可能是 (5, 2) 或 (106, 2) 形状的数组）
			
 
				+            landmarks_list = []
			
 
				+            if landmarks is not None:
			
 
				+                try:
			
 
				+                    if len(landmarks) > 0 and isinstance(landmarks[0], (list, tuple, np.ndarray)):
			
 
				+                        landmarks_list = [[float(p[0]), float(p[1])] for p in landmarks if len(p) >= 2]
			
 
				+                except (IndexError, TypeError):
			
 
				+                    landmarks_list = []
			
 
				+            
			
 
				+            face_info = {
			
 
				+                "bbox": {
			
 
				+                    "x1": float(bbox_list[0]) if len(bbox_list) > 0 else 0.0,
			
 
				+                    "y1": float(bbox_list[1]) if len(bbox_list) > 1 else 0.0,
			
 
				+                    "x2": float(bbox_list[2]) if len(bbox_list) > 2 else 0.0,
			
 
				+                    "y2": float(bbox_list[3]) if len(bbox_list) > 3 else 0.0,
			
 
				+                },
			
 
				+                "confidence": float(confidence),
			
 
				+                "landmarks": landmarks_list
			
 
				+            }
			
 
				+            result["faces"].append(face_info)
			
 
				+        
			
 
				+        return result
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"人脸检测失败: {str(e)}", exc_info=True)
			
 
				+        raise
			
 
				+
			
--- a/digitalHuman/server/core/api_llm_v0_impl.py
+++ b/digitalHuman/server/core/api_llm_v0_impl.py
@@ -0,0 +1,21 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from typing import List
			
 
				+from digitalHuman.engine import EnginePool
			
 
				+from digitalHuman.utils import config
			
 
				+from digitalHuman.protocol import ParamDesc, EngineDesc, ENGINE_TYPE, UserDesc, AudioMessage, TextMessage
			
 
				+from digitalHuman.server.models import LLMEngineInput
			
 
				+
			
 
				+enginePool = EnginePool()
			
 
				+
			
 
				+def get_llm_list() -> List[EngineDesc]:
			
 
				+    engines = enginePool.listEngine(ENGINE_TYPE.LLM)
			
 
				+    return [enginePool.getEngine(ENGINE_TYPE.LLM, engine).desc() for engine in engines]
			
 
				+
			
 
				+def get_llm_default() -> EngineDesc:
			
 
				+    return enginePool.getEngine(ENGINE_TYPE.LLM, config.SERVER.ENGINES.LLM.DEFAULT).desc()
			
 
				+
			
 
				+def get_llm_param(name: str) -> List[ParamDesc]:
			
 
				+    engine = enginePool.getEngine(ENGINE_TYPE.LLM, name)
			
 
				+    return engine.parameters()
			
--- a/digitalHuman/server/core/api_tts_v0_impl.py
+++ b/digitalHuman/server/core/api_tts_v0_impl.py
@@ -0,0 +1,34 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+
			
 
				+from typing import List, Dict
			
 
				+from digitalHuman.engine import EnginePool, BaseTTSEngine
			
 
				+from digitalHuman.utils import config
			
 
				+from digitalHuman.protocol import ParamDesc, EngineDesc, ENGINE_TYPE, UserDesc, AudioMessage, TextMessage, VoiceDesc
			
 
				+from digitalHuman.server.models import TTSEngineInput
			
 
				+
			
 
				+enginePool = EnginePool()
			
 
				+
			
 
				+def get_tts_list() -> List[EngineDesc]:
			
 
				+    engines = enginePool.listEngine(ENGINE_TYPE.TTS)
			
 
				+    return [enginePool.getEngine(ENGINE_TYPE.TTS, engine).desc() for engine in engines]
			
 
				+
			
 
				+def get_tts_default() -> EngineDesc:
			
 
				+    return enginePool.getEngine(ENGINE_TYPE.TTS, config.SERVER.ENGINES.TTS.DEFAULT).desc()
			
 
				+
			
 
				+async def get_tts_voice(name: str, **kwargs) -> List[VoiceDesc]:
			
 
				+    engine: BaseTTSEngine = enginePool.getEngine(ENGINE_TYPE.TTS, name)
			
 
				+    voices = await engine.voices(**kwargs)
			
 
				+    return voices
			
 
				+
			
 
				+def get_tts_param(name: str) -> List[ParamDesc]:
			
 
				+    engine = enginePool.getEngine(ENGINE_TYPE.TTS, name)
			
 
				+    return engine.parameters()
			
 
				+
			
 
				+async def tts_infer(user: UserDesc, item: TTSEngineInput) -> AudioMessage:
			
 
				+    if item.engine.lower() == "default":
			
 
				+        item.engine = config.SERVER.ENGINES.TTS.DEFAULT
			
 
				+    input = TextMessage(data=item.data)
			
 
				+    engine = enginePool.getEngine(ENGINE_TYPE.TTS, item.engine)
			
 
				+    output: AudioMessage = await engine.run(input=input, user=user, **item.config)
			
 
				+    return output
			
--- a/digitalHuman/server/header.py
+++ b/digitalHuman/server/header.py
@@ -0,0 +1,23 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from typing import Annotated
			
 
				+from fastapi import Header, Depends
			
 
				+from digitalHuman.protocol import UserDesc
			
 
				+
			
 
				+class _HeaderInfo(UserDesc):
			
 
				+    """请求头信息"""
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        user_id: str = Header("tester", alias="user-id", description="用户ID"),
			
 
				+        request_id: str = Header("", alias="request-id", description="请求ID"),
			
 
				+        cookie: str = Header("", alias="cookie", description="cookie")
			
 
				+    ):
			
 
				+        super().__init__(user_id=user_id, request_id=request_id, cookie=cookie)
			
 
				+    
			
 
				+    def __str__(self):
			
 
				+        return f"user-id: {self.user_id} request-id: {self.request_id} cookie: {self.cookie}"
			
 
				+    
			
 
				+    def __repr__(self):
			
 
				+        return self.__str__()
			
 
				+
			
 
				+HeaderInfo = Annotated[_HeaderInfo, Depends(_HeaderInfo)]
			
--- a/digitalHuman/server/models.py
+++ b/digitalHuman/server/models.py
@@ -0,0 +1,65 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from typing import List, Dict, Union
			
 
				+from pydantic import BaseModel
			
 
				+from digitalHuman.server.reponse import BaseResponse
			
 
				+from digitalHuman.protocol import *
			
 
				+
			
 
				+class EngineListResp(BaseResponse):
			
 
				+    data: List[EngineDesc] = []
			
 
				+
			
 
				+class EngineDefaultResp(BaseResponse):
			
 
				+    data: EngineDesc
			
 
				+
			
 
				+class EngineParam(BaseResponse):
			
 
				+    data: List[ParamDesc] = []
			
 
				+
			
 
				+class EngineInput(BaseModel):
			
 
				+    engine: str = 'default'
			
 
				+    config: Dict = {}
			
 
				+    data: Union[str, bytes] = ""
			
 
				+
			
 
				+class AgentEngineInput(EngineInput):
			
 
				+    conversation_id: str = ""
			
 
				+
			
 
				+class ASREngineInput(EngineInput, AudioMessage):
			
 
				+    pass
			
 
				+
			
 
				+class ASREngineOutput(BaseResponse):
			
 
				+    data: str
			
 
				+
			
 
				+class VoiceListResp(BaseResponse):
			
 
				+    data: List[VoiceDesc] = []
			
 
				+
			
 
				+class TTSEngineInput(EngineInput):
			
 
				+    pass
			
 
				+
			
 
				+class TTSEngineOutput(BaseResponse, AudioMessage):
			
 
				+    pass
			
 
				+
			
 
				+class LLMEngineInput(EngineInput):
			
 
				+    pass
			
 
				+
			
 
				+class ConversationInput(BaseModel):
			
 
				+    data: Dict = {}
			
 
				+
			
 
				+class ConversationIdResp(BaseResponse):
			
 
				+    data: str
			
 
				+
			
 
				+# ========================= 人脸检测模型 ===========================
			
 
				+class FaceBBox(BaseModel):
			
 
				+    """人脸边界框"""
			
 
				+    x1: float
			
 
				+    y1: float
			
 
				+    x2: float
			
 
				+    y2: float
			
 
				+
			
 
				+class FaceInfo(BaseModel):
			
 
				+    """人脸信息"""
			
 
				+    bbox: FaceBBox
			
 
				+    confidence: float
			
 
				+    landmarks: List[List[float]] = []  # 5点或106点关键点
			
 
				+
			
 
				+class FaceDetectionOutput(BaseResponse):
			
 
				+    """人脸检测输出"""
			
 
				+    data: Dict  # {"hasFace": bool, "faceCount": int, "faces": List[FaceInfo]}
			
--- a/digitalHuman/server/reponse.py
+++ b/digitalHuman/server/reponse.py
@@ -0,0 +1,49 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from typing import Any
			
 
				+from pydantic import BaseModel
			
 
				+from digitalHuman.protocol import RESPONSE_CODE, BaseResponse, eventStreamError, eventStreamDone
			
 
				+from digitalHuman.utils import logger
			
 
				+
			
 
				+
			
 
				+class Response(object):
			
 
				+    def __init__(self):
			
 
				+        self._response_dict = {}
			
 
				+        self.code = RESPONSE_CODE.OK
			
 
				+        self.message = "SUCCESS"
			
 
				+
			
 
				+    def __setattr__(self, name: str, value: Any):
			
 
				+        if name.startswith('_'):
			
 
				+            self.__dict__[name] = value
			
 
				+        else:
			
 
				+            self._response_dict[name] = value
			
 
				+
			
 
				+    def __getattr__(self, name: str):
			
 
				+        if name.startswith('_'):
			
 
				+            return self.__dict__[name]
			
 
				+        else:
			
 
				+            return self._response_dict[name]
			
 
				+
			
 
				+    def _message_log_summary(self, message: str, isError: bool):
			
 
				+        self.message = message
			
 
				+        if isError:
			
 
				+            logger.error(message, exc_info=True)
			
 
				+        else:
			
 
				+            logger.debug(message)
			
 
				+
			
 
				+    def ok(self, message: str):
			
 
				+        self.code = RESPONSE_CODE.OK
			
 
				+        self._message_log_summary(message, False)
			
 
				+
			
 
				+    def error(self, message: str, code: RESPONSE_CODE = RESPONSE_CODE.ERROR):
			
 
				+        self.code = code
			
 
				+        self._message_log_summary(message, True)
			
 
				+
			
 
				+    def validate(self, outItem: BaseModel):
			
 
				+        resp_json = outItem.model_validate(self._response_dict)
			
 
				+        # return json
			
 
				+        return resp_json.model_dump()
			
 
				+
			
 
				+async def streamInteralError(error: str = "Interal Error"):
			
 
				+    yield eventStreamError(error)
			
 
				+    yield eventStreamDone()
			
--- a/digitalHuman/server/router.py
+++ b/digitalHuman/server/router.py
@@ -0,0 +1,37 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from fastapi import FastAPI
			
 
				+from fastapi.middleware.cors import CORSMiddleware
			
 
				+from digitalHuman.server.api.common.common_api_v0 import router as commonRouter
			
 
				+from digitalHuman.server.api.asr.asr_api_v0 import router as asrRouter
			
 
				+from digitalHuman.server.api.tts.tts_api_v0 import router as ttsRouter
			
 
				+from digitalHuman.server.api.llm.llm_api_v0 import router as llmRouter
			
 
				+from digitalHuman.server.api.agent.agent_api_v0 import router as agentRouter
			
 
				+from digitalHuman.server.api.face_detection.face_detection_api_v0 import router as faceDetectionRouter
			
 
				+from digitalHuman.utils import config
			
 
				+
			
 
				+
			
 
				+__all__ = ["app"]
			
 
				+
			
 
				+app = FastAPI(
			
 
				+    title=config.COMMON.NAME, 
			
 
				+    description=f"This is a cool set of apis for {config.COMMON.NAME}",
			
 
				+    version=config.COMMON.VERSION
			
 
				+)
			
 
				+
			
 
				+app.add_middleware(
			
 
				+    CORSMiddleware,
			
 
				+    allow_origins=["*"],
			
 
				+    allow_credentials=True,
			
 
				+    allow_methods=["*"],
			
 
				+    allow_headers=["*"],
			
 
				+)
			
 
				+
			
 
				+GLOABLE_PREFIX = "/adh"
			
 
				+# 路由
			
 
				+app.include_router(commonRouter, prefix=GLOABLE_PREFIX, tags=["COMMON"])
			
 
				+app.include_router(asrRouter, prefix=GLOABLE_PREFIX, tags=["ASR"])
			
 
				+app.include_router(ttsRouter, prefix=GLOABLE_PREFIX, tags=["TTS"])
			
 
				+app.include_router(llmRouter, prefix=GLOABLE_PREFIX, tags=["LLM"])
			
 
				+app.include_router(agentRouter, prefix=GLOABLE_PREFIX, tags=["AGENT"])
			
 
				+app.include_router(faceDetectionRouter, prefix=GLOABLE_PREFIX, tags=["FACE_DETECTION"])
			
--- a/digitalHuman/server/ws.py
+++ b/digitalHuman/server/ws.py
@@ -0,0 +1,30 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+from typing import List
			
 
				+from fastapi import WebSocket
			
 
				+
			
 
				+class WebsocketManager:
			
 
				+    def __init__(self):
			
 
				+        # 存放激活的ws连接对象
			
 
				+        self._connections: List[WebSocket] = []
			
 
				+ 
			
 
				+    async def connect(self, ws: WebSocket) -> None:
			
 
				+        # 等待连接
			
 
				+        await ws.accept()
			
 
				+        # 存储ws连接对象
			
 
				+        self._connections.append(ws)
			
 
				+ 
			
 
				+    def disconnect(self, ws: WebSocket) -> None:
			
 
				+        # 关闭时 移除ws对象
			
 
				+        if ws in self._connections:
			
 
				+            self._connections.remove(ws)
			
 
				+ 
			
 
				+    @staticmethod
			
 
				+    async def sendMessage(message: str, ws: WebSocket) -> None:
			
 
				+        # 发消息
			
 
				+        await ws.send_text(message)
			
 
				+ 
			
 
				+    async def broadcast(self, message: str) -> None:
			
 
				+        # 广播消息
			
 
				+        for connection in self._connections:
			
 
				+            await connection.send_text(message)
			
--- a/digitalHuman/uniface/__init__.py
+++ b/digitalHuman/uniface/__init__.py
@@ -0,0 +1,72 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# 修复 uniface 内部导入路径
			
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+# 获取当前文件所在目录（digitalHuman/uniface）
			
 
				+_current_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+# 获取 digitalHuman 目录
			
 
				+_digital_human_dir = os.path.dirname(_current_dir)
			
 
				+# 将 digitalHuman 目录添加到 sys.path，这样 uniface 内部的导入可以正常工作
			
 
				+# 因为 uniface 内部使用 from uniface.xxx，需要让 uniface 作为顶级包
			
 
				+if _digital_human_dir not in sys.path:
			
 
				+    sys.path.insert(0, _digital_human_dir)
			
 
				+
			
 
				+# 现在导入 uniface 的公共接口（使用相对导入以避免路径问题）
			
 
				+from .face_utils import compute_similarity, face_alignment
			
 
				+from .log import Logger, enable_logging
			
 
				+from .model_store import verify_model_weights
			
 
				+from .visualization import draw_detections
			
 
				+
			
 
				+from .analyzer import FaceAnalyzer
			
 
				+from .attribute import AgeGender
			
 
				+from .face import Face
			
 
				+
			
 
				+try:
			
 
				+    from .attribute import Emotion
			
 
				+except ImportError:
			
 
				+    Emotion = None  # PyTorch not installed
			
 
				+
			
 
				+from .detection import (
			
 
				+    SCRFD,
			
 
				+    RetinaFace,
			
 
				+    create_detector,
			
 
				+    detect_faces,
			
 
				+    list_available_detectors,
			
 
				+)
			
 
				+from .landmark import Landmark106, create_landmarker
			
 
				+from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
			
 
				+
			
 
				+__all__ = [
			
 
				+    '__author__',
			
 
				+    '__license__',
			
 
				+    '__version__',
			
 
				+    # Core classes
			
 
				+    'Face',
			
 
				+    'FaceAnalyzer',
			
 
				+    # Factory functions
			
 
				+    'create_detector',
			
 
				+    'create_landmarker',
			
 
				+    'create_recognizer',
			
 
				+    'detect_faces',
			
 
				+    'list_available_detectors',
			
 
				+    # Detection models
			
 
				+    'RetinaFace',
			
 
				+    'SCRFD',
			
 
				+    # Recognition models
			
 
				+    'ArcFace',
			
 
				+    'MobileFace',
			
 
				+    'SphereFace',
			
 
				+    # Landmark models
			
 
				+    'Landmark106',
			
 
				+    # Attribute models
			
 
				+    'AgeGender',
			
 
				+    'Emotion',
			
 
				+    # Utilities
			
 
				+    'compute_similarity',
			
 
				+    'draw_detections',
			
 
				+    'face_alignment',
			
 
				+    'verify_model_weights',
			
 
				+    'Logger',
			
 
				+    'enable_logging',
			
 
				+]
			
--- a/digitalHuman/uniface/analyzer.py
+++ b/digitalHuman/uniface/analyzer.py
@@ -0,0 +1,84 @@
 
				+# Copyright 2025 Yakhyokhuja Valikhujaev
			
 
				+# Author: Yakhyokhuja Valikhujaev
			
 
				+# GitHub: https://github.com/yakhyo
			
 
				+
			
 
				+from typing import List, Optional
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+from uniface.attribute.age_gender import AgeGender
			
 
				+from uniface.detection.base import BaseDetector
			
 
				+from uniface.face import Face
			
 
				+from uniface.log import Logger
			
 
				+from uniface.recognition.base import BaseRecognizer
			
 
				+
			
 
				+__all__ = ['FaceAnalyzer']
			
 
				+
			
 
				+
			
 
				+class FaceAnalyzer:
			
 
				+    """Unified face analyzer combining detection, recognition, and attributes."""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        detector: BaseDetector,
			
 
				+        recognizer: Optional[BaseRecognizer] = None,
			
 
				+        age_gender: Optional[AgeGender] = None,
			
 
				+    ) -> None:
			
 
				+        self.detector = detector
			
 
				+        self.recognizer = recognizer
			
 
				+        self.age_gender = age_gender
			
 
				+
			
 
				+        Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
			
 
				+        if recognizer:
			
 
				+            Logger.info(f'  - Recognition enabled: {recognizer.__class__.__name__}')
			
 
				+        if age_gender:
			
 
				+            Logger.info(f'  - Age/Gender enabled: {age_gender.__class__.__name__}')
			
 
				+
			
 
				+    def analyze(self, image: np.ndarray) -> List[Face]:
			
 
				+        """Analyze faces in an image."""
			
 
				+        detections = self.detector.detect(image)
			
 
				+        Logger.debug(f'Detected {len(detections)} face(s)')
			
 
				+
			
 
				+        faces = []
			
 
				+        for idx, detection in enumerate(detections):
			
 
				+            bbox = detection['bbox']
			
 
				+            confidence = detection['confidence']
			
 
				+            landmarks = detection['landmarks']
			
 
				+
			
 
				+            embedding = None
			
 
				+            if self.recognizer is not None:
			
 
				+                try:
			
 
				+                    embedding = self.recognizer.get_normalized_embedding(image, landmarks)
			
 
				+                    Logger.debug(f'  Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
			
 
				+                except Exception as e:
			
 
				+                    Logger.warning(f'  Face {idx + 1}: Failed to extract embedding: {e}')
			
 
				+
			
 
				+            age, gender_id = None, None
			
 
				+            if self.age_gender is not None:
			
 
				+                try:
			
 
				+                    gender_id, age = self.age_gender.predict(image, bbox)
			
 
				+                    gender_str = 'Female' if gender_id == 0 else 'Male'
			
 
				+                    Logger.debug(f'  Face {idx + 1}: Age={age}, Gender={gender_str}')
			
 
				+                except Exception as e:
			
 
				+                    Logger.warning(f'  Face {idx + 1}: Failed to predict age/gender: {e}')
			
 
				+
			
 
				+            face = Face(
			
 
				+                bbox=bbox,
			
 
				+                confidence=confidence,
			
 
				+                landmarks=landmarks,
			
 
				+                embedding=embedding,
			
 
				+                age=age,
			
 
				+                gender_id=gender_id,
			
 
				+            )
			
 
				+            faces.append(face)
			
 
				+
			
 
				+        Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
			
 
				+        return faces
			
 
				+
			
 
				+    def __repr__(self) -> str:
			
 
				+        parts = [f'FaceAnalyzer(detector={self.detector.__class__.__name__}']
			
 
				+        if self.recognizer:
			
 
				+            parts.append(f'recognizer={self.recognizer.__class__.__name__}')
			
 
				+        if self.age_gender:
			
 
				+            parts.append(f'age_gender={self.age_gender.__class__.__name__}')
			
 
				+        return ', '.join(parts) + ')'
			
--- a/digitalHuman/uniface/attribute/__init__.py
+++ b/digitalHuman/uniface/attribute/__init__.py
@@ -0,0 +1,99 @@
 
				+# Copyright 2025 Yakhyokhuja Valikhujaev
			
 
				+# Author: Yakhyokhuja Valikhujaev
			
 
				+# GitHub: https://github.com/yakhyo
			
 
				+
			
 
				+from typing import Any, Dict, List, Union
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+from uniface.attribute.age_gender import AgeGender
			
 
				+from uniface.attribute.base import Attribute
			
 
				+from uniface.constants import AgeGenderWeights, DDAMFNWeights
			
 
				+
			
 
				+# Emotion requires PyTorch - make it optional
			
 
				+try:
			
 
				+    from uniface.attribute.emotion import Emotion
			
 
				+
			
 
				+    _EMOTION_AVAILABLE = True
			
 
				+except ImportError:
			
 
				+    Emotion = None
			
 
				+    _EMOTION_AVAILABLE = False
			
 
				+
			
 
				+# Public API for the attribute module
			
 
				+__all__ = ['AgeGender', 'Emotion', 'create_attribute_predictor', 'predict_attributes']
			
 
				+
			
 
				+# A mapping from model enums to their corresponding attribute classes
			
 
				+_ATTRIBUTE_MODELS = {
			
 
				+    **{model: AgeGender for model in AgeGenderWeights},
			
 
				+}
			
 
				+
			
 
				+# Add Emotion models only if PyTorch is available
			
 
				+if _EMOTION_AVAILABLE:
			
 
				+    _ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
			
 
				+
			
 
				+
			
 
				+def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
			
 
				+    """
			
 
				+    Factory function to create an attribute predictor instance.
			
 
				+
			
 
				+    This high-level API simplifies the creation of attribute models by
			
 
				+    dynamically selecting the correct class based on the provided model enum.
			
 
				+
			
 
				+    Args:
			
 
				+        model_name: The enum corresponding to the desired attribute model
			
 
				+                    (e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
			
 
				+        **kwargs: Additional keyword arguments to pass to the model's constructor.
			
 
				+
			
 
				+    Returns:
			
 
				+        An initialized instance of an Attribute predictor class (e.g., AgeGender).
			
 
				+
			
 
				+    Raises:
			
 
				+        ValueError: If the provided model_name is not a supported enum.
			
 
				+    """
			
 
				+    model_class = _ATTRIBUTE_MODELS.get(model_name)
			
 
				+
			
 
				+    if model_class is None:
			
 
				+        raise ValueError(
			
 
				+            f'Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights.'
			
 
				+        )
			
 
				+
			
 
				+    # Pass model_name to the constructor, as some classes might need it
			
 
				+    return model_class(model_name=model_name, **kwargs)
			
 
				+
			
 
				+
			
 
				+def predict_attributes(
			
 
				+    image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
			
 
				+) -> List[Dict[str, Any]]:
			
 
				+    """
			
 
				+    High-level API to predict attributes for multiple detected faces.
			
 
				+
			
 
				+    This function iterates through a list of face detections, runs the
			
 
				+    specified attribute predictor on each one, and appends the results back
			
 
				+    into the detection dictionary.
			
 
				+
			
 
				+    Args:
			
 
				+        image (np.ndarray): The full input image in BGR format.
			
 
				+        detections (List[Dict]): A list of detection results, where each dict
			
 
				+                                 must contain a 'bbox' and optionally 'landmark'.
			
 
				+        predictor (Attribute): An initialized attribute predictor instance,
			
 
				+                               created by `create_attribute_predictor`.
			
 
				+
			
 
				+    Returns:
			
 
				+        The list of detections, where each dictionary is updated with a new
			
 
				+        'attributes' key containing the prediction result.
			
 
				+    """
			
 
				+    for face in detections:
			
 
				+        # Initialize attributes dict if it doesn't exist
			
 
				+        if 'attributes' not in face:
			
 
				+            face['attributes'] = {}
			
 
				+
			
 
				+        if isinstance(predictor, AgeGender):
			
 
				+            gender_id, age = predictor(image, face['bbox'])
			
 
				+            face['attributes']['gender_id'] = gender_id
			
 
				+            face['attributes']['age'] = age
			
 
				+        elif isinstance(predictor, Emotion):
			
 
				+            emotion, confidence = predictor(image, face['landmark'])
			
 
				+            face['attributes']['emotion'] = emotion
			
 
				+            face['attributes']['confidence'] = confidence
			
 
				+
			
 
				+    return detections
			
--- a/digitalHuman/uniface/attribute/age_gender.py
+++ b/digitalHuman/uniface/attribute/age_gender.py
@@ -0,0 +1,187 @@
 
				+# Copyright 2025 Yakhyokhuja Valikhujaev
			
 
				+# Author: Yakhyokhuja Valikhujaev
			
 
				+# GitHub: https://github.com/yakhyo
			
 
				+
			
 
				+from typing import List, Tuple, Union
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+
			
 
				+from uniface.attribute.base import Attribute
			
 
				+from uniface.constants import AgeGenderWeights
			
 
				+from uniface.face_utils import bbox_center_alignment
			
 
				+from uniface.log import Logger
			
 
				+from uniface.model_store import verify_model_weights
			
 
				+from uniface.onnx_utils import create_onnx_session
			
 
				+
			
 
				+__all__ = ['AgeGender']
			
 
				+
			
 
				+
			
 
				+class AgeGender(Attribute):
			
 
				+    """
			
 
				+    Age and gender prediction model using ONNX Runtime.
			
 
				+
			
 
				+    This class inherits from the base `Attribute` class and implements the
			
 
				+    functionality for predicting age (in years) and gender ID (0 for Female,
			
 
				+    1 for Male) from a face image. It requires a bounding box to locate the face.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT) -> None:
			
 
				+        """
			
 
				+        Initializes the AgeGender prediction model.
			
 
				+
			
 
				+        Args:
			
 
				+            model_name (AgeGenderWeights): The enum specifying the model weights
			
 
				+                                           to load.
			
 
				+        """
			
 
				+        Logger.info(f'Initializing AgeGender with model={model_name.name}')
			
 
				+        self.model_path = verify_model_weights(model_name)
			
 
				+        self._initialize_model()
			
 
				+
			
 
				+    def _initialize_model(self) -> None:
			
 
				+        """
			
 
				+        Initializes the ONNX model and creates an inference session.
			
 
				+        """
			
 
				+        try:
			
 
				+            self.session = create_onnx_session(self.model_path)
			
 
				+            # Get model input details from the loaded model
			
 
				+            input_meta = self.session.get_inputs()[0]
			
 
				+            self.input_name = input_meta.name
			
 
				+            self.input_size = tuple(input_meta.shape[2:4])  # (height, width)
			
 
				+            self.output_names = [output.name for output in self.session.get_outputs()]
			
 
				+            Logger.info(f'Successfully initialized AgeGender model with input size {self.input_size}')
			
 
				+        except Exception as e:
			
 
				+            Logger.error(
			
 
				+                f"Failed to load AgeGender model from '{self.model_path}'",
			
 
				+                exc_info=True,
			
 
				+            )
			
 
				+            raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
			
 
				+
			
 
				+    def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
			
 
				+        """
			
 
				+        Aligns the face based on the bounding box and preprocesses it for inference.
			
 
				+
			
 
				+        Args:
			
 
				+            image (np.ndarray): The full input image in BGR format.
			
 
				+            bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
			
 
				+
			
 
				+        Returns:
			
 
				+            np.ndarray: The preprocessed image blob ready for inference.
			
 
				+        """
			
 
				+        bbox = np.asarray(bbox)
			
 
				+
			
 
				+        width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
			
 
				+        center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
			
 
				+        scale = self.input_size[1] / (max(width, height) * 1.5)
			
 
				+
			
 
				+        # **Rotation parameter restored here**
			
 
				+        rotation = 0.0
			
 
				+        aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
			
 
				+
			
 
				+        blob = cv2.dnn.blobFromImage(
			
 
				+            aligned_face,
			
 
				+            scalefactor=1.0,
			
 
				+            size=self.input_size[::-1],
			
 
				+            mean=(0.0, 0.0, 0.0),
			
 
				+            swapRB=True,
			
 
				+        )
			
 
				+        return blob
			
 
				+
			
 
				+    def postprocess(self, prediction: np.ndarray) -> Tuple[int, int]:
			
 
				+        """
			
 
				+        Processes the raw model output to extract gender and age.
			
 
				+
			
 
				+        Args:
			
 
				+            prediction (np.ndarray): The raw output from the model inference.
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male)
			
 
				+                             and age (in years).
			
 
				+        """
			
 
				+        # First two values are gender logits
			
 
				+        gender_id = int(np.argmax(prediction[:2]))
			
 
				+        # Third value is normalized age, scaled by 100
			
 
				+        age = int(np.round(prediction[2] * 100))
			
 
				+        return gender_id, age
			
 
				+
			
 
				+    def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[int, int]:
			
 
				+        """
			
 
				+        Predicts age and gender for a single face specified by a bounding box.
			
 
				+
			
 
				+        Args:
			
 
				+            image (np.ndarray): The full input image in BGR format.
			
 
				+            bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
			
 
				+
			
 
				+        Returns:
			
 
				+            Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male) and age.
			
 
				+        """
			
 
				+        face_blob = self.preprocess(image, bbox)
			
 
				+        prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
			
 
				+        gender_id, age = self.postprocess(prediction)
			
 
				+        return gender_id, age
			
 
				+
			
 
				+
			
 
				+# TODO: below is only for testing, remove it later
			
 
				+if __name__ == '__main__':
			
 
				+    # To run this script, you need to have uniface.detection installed
			
 
				+    # or available in your path.
			
 
				+    from uniface.constants import RetinaFaceWeights
			
 
				+    from uniface.detection import create_detector
			
 
				+
			
 
				+    print('Initializing models for live inference...')
			
 
				+    # 1. Initialize the face detector
			
 
				+    # Using a smaller model for faster real-time performance
			
 
				+    detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
			
 
				+
			
 
				+    # 2. Initialize the attribute predictor
			
 
				+    age_gender_predictor = AgeGender()
			
 
				+
			
 
				+    # 3. Start webcam capture
			
 
				+    cap = cv2.VideoCapture(0)
			
 
				+    if not cap.isOpened():
			
 
				+        print('Error: Could not open webcam.')
			
 
				+        exit()
			
 
				+
			
 
				+    print("Starting webcam feed. Press 'q' to quit.")
			
 
				+    while True:
			
 
				+        ret, frame = cap.read()
			
 
				+        if not ret:
			
 
				+            print('Error: Failed to capture frame.')
			
 
				+            break
			
 
				+
			
 
				+        # Detect faces in the current frame
			
 
				+        detections = detector.detect(frame)
			
 
				+
			
 
				+        # For each detected face, predict age and gender
			
 
				+        for detection in detections:
			
 
				+            box = detection['bbox']
			
 
				+            x1, y1, x2, y2 = map(int, box)
			
 
				+
			
 
				+            # Predict attributes
			
 
				+            gender_id, age = age_gender_predictor.predict(frame, box)
			
 
				+            gender_str = 'Female' if gender_id == 0 else 'Male'
			
 
				+
			
 
				+            # Prepare text and draw on the frame
			
 
				+            label = f'{gender_str}, {age}'
			
 
				+            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
			
 
				+            cv2.putText(
			
 
				+                frame,
			
 
				+                label,
			
 
				+                (x1, y1 - 10),
			
 
				+                cv2.FONT_HERSHEY_SIMPLEX,
			
 
				+                0.8,
			
 
				+                (0, 255, 0),
			
 
				+                2,
			
 
				+            )
			
 
				+
			
 
				+        # Display the resulting frame
			
 
				+        cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
			
 
				+
			
 
				+        # Break the loop if 'q' is pressed
			
 
				+        if cv2.waitKey(1) & 0xFF == ord('q'):
			
 
				+            break
			
 
				+
			
 
				+    # Release resources
			
 
				+    cap.release()
			
 
				+    cv2.destroyAllWindows()
			
 
				+    print('Inference stopped.')
			
--- a/digitalHuman/uniface/attribute/base.py
+++ b/digitalHuman/uniface/attribute/base.py
@@ -0,0 +1,92 @@
 
				+# Copyright 2025 Yakhyokhuja Valikhujaev
			
 
				+# Author: Yakhyokhuja Valikhujaev
			
 
				+# GitHub: https://github.com/yakhyo
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from typing import Any
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+class Attribute(ABC):
			
 
				+    """
			
 
				+    Abstract base class for face attribute models.
			
 
				+
			
 
				+    This class defines the common interface that all attribute models
			
 
				+    (e.g., age-gender, emotion) must implement. It ensures a consistent API
			
 
				+    across different attribute prediction modules in the library, making them
			
 
				+    interchangeable and easy to use.
			
 
				+    """
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def _initialize_model(self) -> None:
			
 
				+        """
			
 
				+        Initializes the underlying model for inference.
			
 
				+
			
 
				+        This method should handle loading model weights, creating the
			
 
				+        inference session (e.g., ONNX Runtime, PyTorch), and any necessary
			
 
				+        warm-up procedures to prepare the model for prediction.
			
 
				+        """
			
 
				+        raise NotImplementedError('Subclasses must implement the _initialize_model method.')
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def preprocess(self, image: np.ndarray, *args: Any) -> Any:
			
 
				+        """
			
 
				+        Preprocesses the input data for the model.
			
 
				+
			
 
				+        This method should take a raw image and any other necessary data
			
 
				+        (like bounding boxes or landmarks) and convert it into the format
			
 
				+        expected by the model's inference engine (e.g., a blob or tensor).
			
 
				+
			
 
				+        Args:
			
 
				+            image (np.ndarray): The input image containing the face, typically
			
 
				+                                in BGR format.
			
 
				+            *args: Additional arguments required for preprocessing, such as
			
 
				+                   bounding boxes or facial landmarks.
			
 
				+
			
 
				+        Returns:
			
 
				+            The preprocessed data ready for model inference.
			
 
				+        """
			
 
				+        raise NotImplementedError('Subclasses must implement the preprocess method.')
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def postprocess(self, prediction: Any) -> Any:
			
 
				+        """
			
 
				+        Postprocesses the raw model output into a human-readable format.
			
 
				+
			
 
				+        This method takes the raw output from the model's inference and
			
 
				+        converts it into a meaningful result, such as an age value, a gender
			
 
				+        label, or an emotion category.
			
 
				+
			
 
				+        Args:
			
 
				+            prediction (Any): The raw output from the model's inference.
			
 
				+
			
 
				+        Returns:
			
 
				+            The final, processed attributes.
			
 
				+        """
			
 
				+        raise NotImplementedError('Subclasses must implement the postprocess method.')
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def predict(self, image: np.ndarray, *args: Any) -> Any:
			
 
				+        """
			
 
				+        Performs end-to-end attribute prediction on a given image.
			
 
				+
			
 
				+        This method orchestrates the full pipeline: it calls the preprocess,
			
 
				+        inference, and postprocess steps to return the final, user-friendly
			
 
				+        attribute prediction.
			
 
				+
			
 
				+        Args:
			
 
				+            image (np.ndarray): The input image containing the face.
			
 
				+            *args: Additional data required for prediction, such as a bounding
			
 
				+                   box or landmarks.
			
 
				+
			
 
				+        Returns:
			
 
				+            The final predicted attributes.
			
 
				+        """
			
 
				+        raise NotImplementedError('Subclasses must implement the predict method.')
			
 
				+
			
 
				+    def __call__(self, *args, **kwargs) -> Any:
			
 
				+        """
			
 
				+        Provides a convenient, callable shortcut for the `predict` method.
			
 
				+        """
			
 
				+        return self.predict(*args, **kwargs)
			
--- a/digitalHuman/uniface/attribute/emotion.py
+++ b/digitalHuman/uniface/attribute/emotion.py
@@ -0,0 +1,194 @@
 
				+# Copyright 2025 Yakhyokhuja Valikhujaev
			
 
				+# Author: Yakhyokhuja Valikhujaev
			
 
				+# GitHub: https://github.com/yakhyo
			
 
				+
			
 
				+from typing import List, Tuple, Union
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import torch
			
 
				+
			
 
				+from uniface.attribute.base import Attribute
			
 
				+from uniface.constants import DDAMFNWeights
			
 
				+from uniface.face_utils import face_alignment
			
 
				+from uniface.log import Logger
			
 
				+from uniface.model_store import verify_model_weights
			
 
				+
			
 
				+__all__ = ['Emotion']
			
 
				+
			
 
				+
			
 
				+class Emotion(Attribute):
			
 
				+    """
			
 
				+    Emotion recognition model using a TorchScript model.
			
 
				+
			
 
				+    This class inherits from the base `Attribute` class and implements the
			
 
				+    functionality for predicting one of several emotion categories from a face
			
 
				+    image. It requires 5-point facial landmarks for alignment.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
			
 
				+        input_size: Tuple[int, int] = (112, 112),
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Initializes the emotion recognition model.
			
 
				+
			
 
				+        Args:
			
 
				+            model_weights (DDAMFNWeights): The enum for the model weights to load.
			
 
				+            input_size (Tuple[int, int]): The expected input size for the model.
			
 
				+        """
			
 
				+        Logger.info(f'Initializing Emotion with model={model_weights.name}')
			
 
				+
			
 
				+        if torch.backends.mps.is_available():
			
 
				+            self.device = torch.device('mps')
			
 
				+        elif torch.cuda.is_available():
			
 
				+            self.device = torch.device('cuda')
			
 
				+        else:
			
 
				+            self.device = torch.device('cpu')
			
 
				+
			
 
				+        self.input_size = input_size
			
 
				+        self.model_path = verify_model_weights(model_weights)
			
 
				+
			
 
				+        # Define emotion labels based on the selected model
			
 
				+        self.emotion_labels = [
			
 
				+            'Neutral',
			
 
				+            'Happy',
			
 
				+            'Sad',
			
 
				+            'Surprise',
			
 
				+            'Fear',
			
 
				+            'Disgust',
			
 
				+            'Angry',
			
 
				+        ]
			
 
				+        if model_weights == DDAMFNWeights.AFFECNET8:
			
 
				+            self.emotion_labels.append('Contempt')
			
 
				+
			
 
				+        self._initialize_model()
			
 
				+
			
 
				+    def _initialize_model(self) -> None:
			
 
				+        """
			
 
				+        Loads and initializes the TorchScript model for inference.
			
 
				+        """
			
 
				+        try:
			
 
				+            self.model = torch.jit.load(self.model_path, map_location=self.device)
			
 
				+            self.model.eval()
			
 
				+            # Warm-up with a dummy input for faster first inference
			
 
				+            dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
			
 
				+            with torch.no_grad():
			
 
				+                self.model(dummy_input)
			
 
				+            Logger.info(f'Successfully initialized Emotion model on {self.device}')
			
 
				+        except Exception as e:
			
 
				+            Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
			
 
				+            raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
			
 
				+
			
 
				+    def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
			
 
				+        """
			
 
				+        Aligns the face using landmarks and preprocesses it into a tensor.
			
 
				+
			
 
				+        Args:
			
 
				+            image (np.ndarray): The full input image in BGR format.
			
 
				+            landmark (Union[List, np.ndarray]): The 5-point facial landmarks.
			
 
				+
			
 
				+        Returns:
			
 
				+            torch.Tensor: The preprocessed image tensor ready for inference.
			
 
				+        """
			
 
				+        landmark = np.asarray(landmark)
			
 
				+
			
 
				+        aligned_image, _ = face_alignment(image, landmark)
			
 
				+
			
 
				+        # Convert BGR to RGB, resize, normalize, and convert to a CHW tensor
			
 
				+        rgb_image = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2RGB)
			
 
				+        resized_image = cv2.resize(rgb_image, self.input_size).astype(np.float32) / 255.0
			
 
				+        mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
			
 
				+        std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
			
 
				+        normalized_image = (resized_image - mean) / std
			
 
				+        transposed_image = normalized_image.transpose((2, 0, 1))
			
 
				+
			
 
				+        return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
			
 
				+
			
 
				+    def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
			
 
				+        """
			
 
				+        Processes the raw model output to get the emotion label and confidence score.
			
 
				+        """
			
 
				+        probabilities = torch.nn.functional.softmax(prediction, dim=1).squeeze().cpu().numpy()
			
 
				+        pred_index = np.argmax(probabilities)
			
 
				+        emotion_label = self.emotion_labels[pred_index]
			
 
				+        confidence = float(probabilities[pred_index])
			
 
				+        return emotion_label, confidence
			
 
				+
			
 
				+    def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
			
 
				+        """
			
 
				+        Predicts the emotion from a single face specified by its landmarks.
			
 
				+        """
			
 
				+        input_tensor = self.preprocess(image, landmark)
			
 
				+        with torch.no_grad():
			
 
				+            output = self.model(input_tensor)
			
 
				+            if isinstance(output, tuple):
			
 
				+                output = output[0]
			
 
				+
			
 
				+        return self.postprocess(output)
			
 
				+
			
 
				+
			
 
				+# TODO: below is only for testing, remove it later
			
 
				+if __name__ == '__main__':
			
 
				+    from uniface.constants import RetinaFaceWeights
			
 
				+    from uniface.detection import create_detector
			
 
				+
			
 
				+    print('Initializing models for live inference...')
			
 
				+    # 1. Initialize the face detector
			
 
				+    # Using a smaller model for faster real-time performance
			
 
				+    detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
			
 
				+
			
 
				+    # 2. Initialize the attribute predictor
			
 
				+    emotion_predictor = Emotion()
			
 
				+
			
 
				+    # 3. Start webcam capture
			
 
				+    cap = cv2.VideoCapture(0)
			
 
				+    if not cap.isOpened():
			
 
				+        print('Error: Could not open webcam.')
			
 
				+        exit()
			
 
				+
			
 
				+    print("Starting webcam feed. Press 'q' to quit.")
			
 
				+    while True:
			
 
				+        ret, frame = cap.read()
			
 
				+        if not ret:
			
 
				+            print('Error: Failed to capture frame.')
			
 
				+            break
			
 
				+
			
 
				+        # Detect faces in the current frame.
			
 
				+        # This method returns a list of dictionaries for each detected face.
			
 
				+        detections = detector.detect(frame)
			
 
				+
			
 
				+        # For each detected face, predict the emotion
			
 
				+        for detection in detections:
			
 
				+            box = detection['bbox']
			
 
				+            landmark = detection['landmarks']
			
 
				+            x1, y1, x2, y2 = map(int, box)
			
 
				+
			
 
				+            # Predict attributes using the landmark
			
 
				+            emotion, confidence = emotion_predictor.predict(frame, landmark)
			
 
				+
			
 
				+            # Prepare text and draw on the frame
			
 
				+            label = f'{emotion} ({confidence:.2f})'
			
 
				+            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
			
 
				+            cv2.putText(
			
 
				+                frame,
			
 
				+                label,
			
 
				+                (x1, y1 - 10),
			
 
				+                cv2.FONT_HERSHEY_SIMPLEX,
			
 
				+                0.8,
			
 
				+                (255, 0, 0),
			
 
				+                2,
			
 
				+            )
			
 
				+
			
 
				+        # Display the resulting frame
			
 
				+        cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
			
 
				+
			
 
				+        # Break the loop if 'q' is pressed
			
 
				+        if cv2.waitKey(1) & 0xFF == ord('q'):
			
 
				+            break
			
 
				+
			
 
				+    # Release resources
			
 
				+    cap.release()
			
 
				+    cv2.destroyAllWindows()
			
 
				+    print('Inference stopped.')
			
--- a/digitalHuman/uniface/common.py
+++ b/digitalHuman/uniface/common.py
@@ -0,0 +1,243 @@
 
				+# Copyright 2025 Yakhyokhuja Valikhujaev
			
 
				+# Author: Yakhyokhuja Valikhujaev
			
 
				+# GitHub: https://github.com/yakhyo
			
 
				+
			
 
				+import itertools
			
 
				+import math
			
 
				+from typing import List, Optional, Tuple
			
 
				+
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+
			
 
				+__all__ = [
			
 
				+    'resize_image',
			
 
				+    'generate_anchors',
			
 
				+    'non_max_suppression',
			
 
				+    'decode_boxes',
			
 
				+    'decode_landmarks',
			
 
				+    'distance2bbox',
			
 
				+    'distance2kps',
			
 
				+]
			
 
				+
			
 
				+
			
 
				+def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
			
 
				+    """
			
 
				+    Resize an image to fit within a target shape while keeping its aspect ratio.
			
 
				+
			
 
				+    Args:
			
 
				+        frame (np.ndarray): Input image.
			
 
				+        target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
			
 
				+
			
 
				+    Returns:
			
 
				+        Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
			
 
				+    """
			
 
				+    width, height = target_shape
			
 
				+
			
 
				+    # Aspect-ratio preserving resize
			
 
				+    im_ratio = float(frame.shape[0]) / frame.shape[1]
			
 
				+    model_ratio = height / width
			
 
				+    if im_ratio > model_ratio:
			
 
				+        new_height = height
			
 
				+        new_width = int(new_height / im_ratio)
			
 
				+    else:
			
 
				+        new_width = width
			
 
				+        new_height = int(new_width * im_ratio)
			
 
				+
			
 
				+    resize_factor = float(new_height) / frame.shape[0]
			
 
				+    resized_frame = cv2.resize(frame, (new_width, new_height))
			
 
				+
			
 
				+    # Create blank image and place resized image on it
			
 
				+    image = np.zeros((height, width, 3), dtype=np.uint8)
			
 
				+    image[:new_height, :new_width, :] = resized_frame
			
 
				+
			
 
				+    return image, resize_factor
			
 
				+
			
 
				+
			
 
				+def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
			
 
				+    """
			
 
				+    Generate anchor boxes for a given image size (RetinaFace specific).
			
 
				+
			
 
				+    Args:
			
 
				+        image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
			
 
				+
			
 
				+    Returns:
			
 
				+        np.ndarray: Anchor box coordinates as a NumPy array with shape (num_anchors, 4).
			
 
				+    """
			
 
				+    steps = [8, 16, 32]
			
 
				+    min_sizes = [[16, 32], [64, 128], [256, 512]]
			
 
				+
			
 
				+    anchors = []
			
 
				+    feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
			
 
				+
			
 
				+    for k, (map_height, map_width) in enumerate(feature_maps):
			
 
				+        step = steps[k]
			
 
				+        for i, j in itertools.product(range(map_height), range(map_width)):
			
 
				+            for min_size in min_sizes[k]:
			
 
				+                s_kx = min_size / image_size[1]
			
 
				+                s_ky = min_size / image_size[0]
			
 
				+
			
 
				+                dense_cx = [x * step / image_size[1] for x in [j + 0.5]]
			
 
				+                dense_cy = [y * step / image_size[0] for y in [i + 0.5]]
			
 
				+                for cy, cx in itertools.product(dense_cy, dense_cx):
			
 
				+                    anchors += [cx, cy, s_kx, s_ky]
			
 
				+
			
 
				+    output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
			
 
				+    return output
			
 
				+
			
 
				+
			
 
				+def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
			
 
				+    """
			
 
				+    Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
			
 
				+
			
 
				+    Args:
			
 
				+        dets (np.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
			
 
				+        threshold (float): IoU threshold for suppression.
			
 
				+
			
 
				+    Returns:
			
 
				+        List[int]: Indices of bounding boxes retained after suppression.
			
 
				+    """
			
 
				+    x1 = dets[:, 0]
			
 
				+    y1 = dets[:, 1]
			
 
				+    x2 = dets[:, 2]
			
 
				+    y2 = dets[:, 3]
			
 
				+    scores = dets[:, 4]
			
 
				+
			
 
				+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
			
 
				+    order = scores.argsort()[::-1]
			
 
				+
			
 
				+    keep = []
			
 
				+    while order.size > 0:
			
 
				+        i = order[0]
			
 
				+        keep.append(i)
			
 
				+        xx1 = np.maximum(x1[i], x1[order[1:]])
			
 
				+        yy1 = np.maximum(y1[i], y1[order[1:]])
			
 
				+        xx2 = np.minimum(x2[i], x2[order[1:]])
			
 
				+        yy2 = np.minimum(y2[i], y2[order[1:]])
			
 
				+
			
 
				+        w = np.maximum(0.0, xx2 - xx1 + 1)
			
 
				+        h = np.maximum(0.0, yy2 - yy1 + 1)
			
 
				+        inter = w * h
			
 
				+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
			
 
				+
			
 
				+        inds = np.where(ovr <= threshold)[0]
			
 
				+        order = order[inds + 1]
			
 
				+
			
 
				+    return keep
			
 
				+
			
 
				+
			
 
				+def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None) -> np.ndarray:
			
 
				+    """
			
 
				+    Decode locations from predictions using priors to undo
			
 
				+    the encoding done for offset regression at train time (RetinaFace specific).
			
 
				+
			
 
				+    Args:
			
 
				+        loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
			
 
				+        priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
			
 
				+        variances (Optional[List[float]]): Variances of prior boxes. Defaults to [0.1, 0.2].
			
 
				+
			
 
				+    Returns:
			
 
				+        np.ndarray: Decoded bounding box predictions with shape [num_priors, 4]
			
 
				+    """
			
 
				+    if variances is None:
			
 
				+        variances = [0.1, 0.2]
			
 
				+    # Compute centers of predicted boxes
			
 
				+    cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
			
 
				+
			
 
				+    # Compute widths and heights of predicted boxes
			
 
				+    wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
			
 
				+
			
 
				+    # Convert center, size to corner coordinates
			
 
				+    boxes = np.zeros_like(loc)
			
 
				+    boxes[:, :2] = cxcy - wh / 2  # xmin, ymin
			
 
				+    boxes[:, 2:] = cxcy + wh / 2  # xmax, ymax
			
 
				+
			
 
				+    return boxes
			
 
				+
			
 
				+
			
 
				+def decode_landmarks(
			
 
				+    predictions: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None
			
 
				+) -> np.ndarray:
			
 
				+    """
			
 
				+    Decode landmark predictions using prior boxes (RetinaFace specific).
			
 
				+
			
 
				+    Args:
			
 
				+        predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
			
 
				+        priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
			
 
				+        variances (Optional[List[float]]): Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
			
 
				+
			
 
				+    Returns:
			
 
				+        np.ndarray: Decoded landmarks, shape: [num_priors, 10]
			
 
				+    """
			
 
				+    if variances is None:
			
 
				+        variances = [0.1, 0.2]
			
 
				+
			
 
				+    # Reshape predictions to [num_priors, 5, 2] to process landmark points
			
 
				+    predictions = predictions.reshape(predictions.shape[0], 5, 2)
			
 
				+
			
 
				+    # Expand priors to match (num_priors, 5, 2)
			
 
				+    priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1)  # (num_priors, 5, 2)
			
 
				+    priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1)  # (num_priors, 5, 2)
			
 
				+
			
 
				+    # Compute absolute landmark positions
			
 
				+    landmarks = priors_xy + predictions * variances[0] * priors_wh
			
 
				+
			
 
				+    # Flatten back to [num_priors, 10]
			
 
				+    landmarks = landmarks.reshape(landmarks.shape[0], -1)
			
 
				+
			
 
				+    return landmarks
			
 
				+
			
 
				+
			
 
				+def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
			
 
				+    """
			
 
				+    Decode distance prediction to bounding box (SCRFD specific).
			
 
				+
			
 
				+    Args:
			
 
				+        points (np.ndarray): Anchor points with shape (n, 2), [x, y].
			
 
				+        distance (np.ndarray): Distance from the given point to 4
			
 
				+            boundaries (left, top, right, bottom) with shape (n, 4).
			
 
				+        max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
			
 
				+
			
 
				+    Returns:
			
 
				+        np.ndarray: Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
			
 
				+    """
			
 
				+    x1 = points[:, 0] - distance[:, 0]
			
 
				+    y1 = points[:, 1] - distance[:, 1]
			
 
				+    x2 = points[:, 0] + distance[:, 2]
			
 
				+    y2 = points[:, 1] + distance[:, 3]
			
 
				+
			
 
				+    if max_shape is not None:
			
 
				+        x1 = np.clip(x1, 0, max_shape[1])
			
 
				+        y1 = np.clip(y1, 0, max_shape[0])
			
 
				+        x2 = np.clip(x2, 0, max_shape[1])
			
 
				+        y2 = np.clip(y2, 0, max_shape[0])
			
 
				+    else:
			
 
				+        x1 = np.maximum(x1, 0)
			
 
				+        y1 = np.maximum(y1, 0)
			
 
				+        x2 = np.maximum(x2, 0)
			
 
				+        y2 = np.maximum(y2, 0)
			
 
				+
			
 
				+    return np.stack([x1, y1, x2, y2], axis=-1)
			
 
				+
			
 
				+
			
 
				+def distance2kps(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
			
 
				+    """
			
 
				+    Decode distance prediction to keypoints (SCRFD specific).
			
 
				+
			
 
				+    Args:
			
 
				+        points (np.ndarray): Anchor points with shape (n, 2), [x, y].
			
 
				+        distance (np.ndarray): Distance from the given point to keypoints with shape (n, 2k).
			
 
				+        max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
			
 
				+
			
 
				+    Returns:
			
 
				+        np.ndarray: Decoded keypoints with shape (n, 2k).
			
 
				+    """
			
 
				+    preds = []
			
 
				+    for i in range(0, distance.shape[1], 2):
			
 
				+        px = points[:, i % 2] + distance[:, i]
			
 
				+        py = points[:, i % 2 + 1] + distance[:, i + 1]
			
 
				+        if max_shape is not None:
			
 
				+            px = np.clip(px, 0, max_shape[1])
			
 
				+            py = np.clip(py, 0, max_shape[0])
			
 
				+        preds.append(px)
			
 
				+        preds.append(py)
			
 
				+    return np.stack(preds, axis=-1)