| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- # -*- coding: utf-8 -*-
- from ..builder import TTSEngines
- from ..engineBase import BaseTTSEngine
- import edge_tts
- import base64
- from typing import List
- from digitalHuman.protocol import *
- from digitalHuman.utils import logger, mp3ToWav
- __all__ = ["EdgeApiTts"]
- VOICE_LIST = [
- VoiceDesc(name="zh-HK-HiuGaaiNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="zh-HK-HiuMaanNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="zh-HK-WanLungNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="zh-CN-XiaoxiaoNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="zh-CN-XiaoyiNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="zh-CN-YunjianNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="zh-CN-YunxiNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="zh-CN-YunxiaNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="zh-CN-YunyangNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="zh-CN-liaoning-XiaobeiNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="zh-TW-HsiaoChenNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="zh-TW-YunJheNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="zh-TW-HsiaoYuNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="zh-CN-shaanxi-XiaoniNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-AU-NatashaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-AU-WilliamNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-CA-ClaraNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-CA-LiamNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-HK-YanNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-HK-SamNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-IN-NeerjaExpressiveNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-IN-NeerjaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-IN-PrabhatNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-IE-ConnorNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-IE-EmilyNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-KE-AsiliaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-KE-ChilembaNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-NZ-MitchellNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-NZ-MollyNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-NG-AbeoNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-NG-EzinneNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-PH-JamesNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-PH-RosaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-AvaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-AndrewNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-US-EmmaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-BrianNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-SG-LunaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-SG-WayneNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-ZA-LeahNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-ZA-LukeNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-TZ-ElimuNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-TZ-ImaniNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-GB-LibbyNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-GB-MaisieNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-GB-RyanNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-GB-SoniaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-GB-ThomasNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-US-AnaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-AndrewMultilingualNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-US-AriaNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-AvaMultilingualNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-BrianMultilingualNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-US-ChristopherNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-US-EmmaMultilingualNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-EricNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-US-GuyNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-US-JennyNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-MichelleNeural", gender=GENDER_TYPE.FEMALE),
- VoiceDesc(name="en-US-RogerNeural", gender=GENDER_TYPE.MALE),
- VoiceDesc(name="en-US-SteffanNeural", gender=GENDER_TYPE.MALE)
- ]
- @TTSEngines.register("EdgeTTS")
- class EdgeApiTts(BaseTTSEngine):
- async def voices(self, **kwargs) -> List[VoiceDesc]:
- return VOICE_LIST
- """
- 结构体
- [{
- 'Name': 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)',
- 'ShortName': 'af-ZA-AdriNeural',
- 'Gender': 'Female',
- 'Locale': 'af-ZA',
- 'SuggestedCodec': 'audio-24khz-48kbitrate-mono-mp3',
- 'FriendlyName': 'Microsoft Adri Online (Natural) - Afrikaans (South Africa)',
- 'Status': 'GA',
- 'VoiceTag': {'ContentCategories': ['General'], 'VoicePersonalities': ['Friendly', 'Positive']
- }, ...]
- """
- voices = await edge_tts.list_voices()
- # 过滤 zh / en
- voices = [voice for voice in voices if voice['ShortName'].startswith('zh') or voice['ShortName'].startswith('en')]
- test = [VoiceDesc(name=voice['ShortName'], gender=GENDER_TYPE.FEMALE if voice['Gender'] == 'Female' else GENDER_TYPE.MALE) for voice in voices]
- for t in test:
- print(f'VoiceDesc(name="{t.name}", gender={"GENDER_TYPE.FEMALE" if t.gender == GENDER_TYPE.FEMALE else "GENDER_TYPE.MALE"}),')
- return [VoiceDesc(name=voice['ShortName'], gender=GENDER_TYPE.FEMALE if voice['Gender'] == 'Female' else GENDER_TYPE.MALE) for voice in voices]
- async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
- # 参数填充
- for paramter in self.parameters():
- if paramter.name == "voice":
- voice = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
- if paramter.name == "rate":
- rate = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
- if paramter.name == "volume":
- volume = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
- if paramter.name == "pitch":
- pitch = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
- if not voice:
- raise KeyError("LitAPI tts voice is required")
- logger.debug(f"[TTS] Engine input[{voice}]: {input.data}")
- rate = "+" + str(rate) + "%" if rate >= 0 else "" + str(rate) + "%"
- volume = "+" + str(volume) + "%" if volume >= 0 else "" + str(volume) + "%"
- pitch = "+" + str(pitch) + "Hz" if pitch >= 0 else "" + str(pitch) + "HZ"
- communicate = edge_tts.Communicate(
- text=input.data,
- voice=voice,
- rate=rate,
- volume=volume,
- pitch=pitch
- )
- data = b''
- async for message in communicate.stream():
- if message["type"] == "audio":
- data += message["data"]
- # mp3 -> wav
- # data = mp3ToWav(data)
- message = AudioMessage(
- data=base64.b64encode(data).decode('utf-8'),
- sampleRate=16000,
- sampleWidth=2,
- )
- return message
|