zhouqi
/
usky-human


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
							# -*- coding: utf-8 -*-


from ..builder import TTSEngines
from ..engineBase import BaseTTSEngine
import hashlib
import hmac
import time
import json
from uuid import uuid4
from datetime import datetime, timezone
from typing import Tuple, Dict
from digitalHuman.protocol import *
from digitalHuman.utils import logger, httpxAsyncClient
from pydantic import BaseModel
from typing import List, Optional
from decimal import Decimal


__all__ = ["TencentApiTts"]


MAX_INPUT_LENGTH = 150

# neutral(中性)、sad(悲伤)、happy(高兴)、angry(生气)、fear(恐惧)、sajiao(撒娇)、amaze(震惊)、disgusted(厌恶)、peaceful(平静)
# 中性、悲伤、高兴、生气、恐惧、撒娇、震惊、厌恶、平静
class TencentVoiceEmotion(StrEnum):
    NEUTRAL = "neutral"
    SAD = "sad"
    HAPPY = "happy"
    ANGRY = "angry"
    FEAR = "fear"
    SAJIAO = "sajiao"
    AMAZE = "amaze"
    DISGUSTED = "disgusted"
    PEACEFUL = "peaceful"

class TencentVoiceDesc(BaseModel):
    id: int
    name: str
    gender: GENDER_TYPE
    language: str
    multi_emotional: bool

VOICE_LIST = [
    TencentVoiceDesc(id=501000, name="智斌", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=501001, name="智兰", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=501002, name="智菊", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=501003, name="智宇", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=501004, name="月华", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=501005, name="飞镜", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=501006, name="千嶂", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=501007, name="浅草", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=501008, name="WeJames", gender=GENDER_TYPE.MALE, language="英文", multi_emotional=False),
    TencentVoiceDesc(id=501009, name="WeWinny", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=False),
    TencentVoiceDesc(id=601000, name="爱小溪", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601001, name="爱小洛", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601002, name="爱小辰", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601003, name="爱小荷", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601004, name="爱小树", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601005, name="爱小静", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601006, name="爱小耀", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601007, name="爱小叶", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601008, name="爱小豪", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601009, name="爱小芊", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601010, name="爱小娇", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601011, name="爱小川", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601012, name="爱小璟", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601013, name="爱小伊", gender=GENDER_TYPE.FEMALE, language="中文", multi_emotional=True),
    TencentVoiceDesc(id=601014, name="爱小简", gender=GENDER_TYPE.MALE, language="中文", multi_emotional=True),
]

class TencentCloudApiKey(BaseModel):
    secret_id: str
    secret_key: str

def findVoice(name: str) -> Optional[TencentVoiceDesc]:
    for voice in VOICE_LIST:
        if voice.name == name:
            return voice
    return None

def sign(key, msg: str):
    return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()

@TTSEngines.register("Tencent-API")
class TencentApiTts(BaseTTSEngine): 
    def setup(self):
        self._url = "https://tts.tencentcloudapi.com"
    
    def _buildRequest(
        self, 
        input: TextMessage, 
        tencentApiKey: TencentCloudApiKey, 
        voice: str, 
        volume: float, 
        speed: float, 
        emotionCategory: str = TencentVoiceEmotion.NEUTRAL
    ) -> Tuple[Dict, str]:
        service = "tts"
        host = "tts.tencentcloudapi.com"
        version = "2019-08-23"
        action = "TextToVoice"
        algorithm = "TC3-HMAC-SHA256"
        timestamp = int(time.time())
        date = datetime.fromtimestamp(timestamp, timezone.utc).strftime("%Y-%m-%d")
        tencentVoice = findVoice(voice)
        if not tencentVoice:
            raise ValueError("voice not found")
        params = {
            "Text": input.data,
            "SessionId": str(uuid4()),
            "VoiceType": tencentVoice.id,
            # "Codec": "wav",
            "Codec": "mp3",
            "Volume": volume,
            "Speed": speed,
            "EmotionCategory": emotionCategory
        }
        payload = json.dumps(params)
        # ************* 步骤 1：拼接规范请求串 *************
        http_request_method = "POST"
        canonical_uri = "/"
        canonical_querystring = ""
        ct = "application/json; charset=utf-8"
        canonical_headers = "content-type:%s\nhost:%s\nx-tc-action:%s\n" % (ct, host, action.lower())
        signed_headers = "content-type;host;x-tc-action"
        hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest()
        canonical_request = (http_request_method + "\n" +
                            canonical_uri + "\n" +
                            canonical_querystring + "\n" +
                            canonical_headers + "\n" +
                            signed_headers + "\n" +
                            hashed_request_payload)

        # ************* 步骤 2：拼接待签名字符串 *************
        credential_scope = date + "/" + service + "/" + "tc3_request"
        hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
        string_to_sign = (algorithm + "\n" +
                        str(timestamp) + "\n" +
                        credential_scope + "\n" +
                        hashed_canonical_request)

        # ************* 步骤 3：计算签名 *************
        secret_date = sign(("TC3" + tencentApiKey.secret_key).encode("utf-8"), date)
        secret_service = sign(secret_date, service)
        secret_signing = sign(secret_service, "tc3_request")
        signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()

        # ************* 步骤 4：拼接 Authorization *************
        authorization = (algorithm + " " +
                        "Credential=" + tencentApiKey.secret_id + "/" + credential_scope + ", " +
                        "SignedHeaders=" + signed_headers + ", " +
                        "Signature=" + signature)

        # ************* 步骤 5：构造并发起请求 *************
        headers = {
            "Authorization": authorization,
            "Content-Type": "application/json; charset=utf-8",
            "Host": host,
            "X-TC-Action": action,
            "X-TC-Timestamp": str(timestamp),
            "X-TC-Version": version
        }

        return (headers, payload)

    async def voices(self, **kwargs) -> List[VoiceDesc]:
        return [VoiceDesc(name=v.name, gender=v.gender) for v in VOICE_LIST]
    
    async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
        # 参数校验
        paramters = self.checkParameter(**kwargs)
        voice = paramters["voice"]
        speed = paramters["speed"]
        volume = paramters["volume"]
        SECRECT_ID = paramters["secret_id"]
        SECRECT_KEY = paramters["secret_key"]
        tencentCloudApiKey = TencentCloudApiKey(secret_id=SECRECT_ID, secret_key=SECRECT_KEY)
        headers, payload = self._buildRequest(input, tencentCloudApiKey, voice, volume, speed) 
        logger.debug(f"[TTS] Engine input: {input.data}")
        response = await httpxAsyncClient.post(self._url, headers=headers, data=payload)
        if response.status_code != 200:
            raise RuntimeError(f"Builtin tts api error: {response.status_code}")
        audio = response.json()["Response"]["Audio"]
        message = AudioMessage(
            data=audio,
            sampleRate=16000,
            sampleWidth=2,
        )
        return message