zhouqi
/
usky-human


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
							# -*- coding: utf-8 -*-


from ..builder import TTSEngines
from ..engineBase import BaseTTSEngine
import base64
import httpx
from digitalHuman.protocol import *
from digitalHuman.utils import logger, mp3ToWav

__all__ = ["DifyApiTts"]


@TTSEngines.register("Dify")
class DifyApiTts(BaseTTSEngine):
    def setup(self):
        """初始化 HTTP 客户端，优化连接池和超时设置"""
        super().setup()
        # 创建专用的 HTTP 客户端，优化连接池和超时设置
        # 使用连接池复用连接，减少连接建立时间
        # 设置合理的超时时间：连接超时 5s，读取超时 30s（TTS 可能需要一些时间）
        self._client = httpx.AsyncClient(
            timeout=httpx.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0),
            limits=httpx.Limits(max_keepalive_connections=10, max_connections=20),
            # 注意：如需启用 HTTP/2，请安装 httpx[http2]：pip install httpx[http2]
            # http2=True,  # 暂时禁用，避免缺少 h2 包的错误
        )
    
    def release(self):
        """释放 HTTP 客户端资源"""
        super().release()
        # 注意：httpx.AsyncClient 会在程序退出时自动关闭
        # 如果需要立即关闭，可以在异步上下文中调用 await self._client.aclose()
        # 这里只做标记，避免在 release 中处理异步操作
        if hasattr(self, '_client'):
            self._client = None
    
    async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
        # 参数校验
        paramters = self.checkParameter(**kwargs)
        API_SERVER = paramters["api_server"]
        API_KEY = paramters["api_key"]
        API_USERNAME = paramters["username"]

        headers = {
            'Authorization': f'Bearer {API_KEY}',
            'Content-Type': 'application/json',
            'Accept': 'audio/*',  # 明确指定接受音频类型
        }
        payload = {
            "text": input.data,
            "user": API_USERNAME,
        }

        logger.debug(f"[TTS] Engine input: {input.data[:50]}..." if len(input.data) > 50 else f"[TTS] Engine input: {input.data}")
        
        try:
            # 使用优化的客户端发送请求
            response = await self._client.post(
                API_SERVER.rstrip('/') + "/text-to-audio",
                json=payload,
                headers=headers,
                follow_redirects=True,  # 自动跟随重定向
            )
            
            if response.status_code != 200:
                error_msg = f"DifyAPI tts api error: {response.status_code}"
                if response.text:
                    error_msg += f", response: {response.text[:200]}"
                raise RuntimeError(error_msg)
            
            # 直接使用响应内容，无需额外转换
            audio_content = response.content
            
            message = AudioMessage(
                data=base64.b64encode(audio_content).decode('utf-8'),
                sampleRate=16000,
                sampleWidth=2,
            )
            
            logger.debug(f"[TTS] Successfully generated audio, size: {len(audio_content)} bytes")
            return message
            
        except httpx.TimeoutException as e:
            logger.error(f"[TTS] Request timeout: {e}")
            raise RuntimeError(f"DifyAPI tts request timeout: {e}")
        except httpx.RequestError as e:
            logger.error(f"[TTS] Request error: {e}")
            raise RuntimeError(f"DifyAPI tts request error: {e}")
        except Exception as e:
            logger.error(f"[TTS] Unexpected error: {e}")
            raise