edgeTTS.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # -*- coding: utf-8 -*-
  2. from ..builder import TTSEngines
  3. from ..engineBase import BaseTTSEngine
  4. import edge_tts
  5. import base64
  6. from typing import List
  7. from digitalHuman.protocol import *
  8. from digitalHuman.utils import logger, mp3ToWav
  9. __all__ = ["EdgeApiTts"]
  10. VOICE_LIST = [
  11. VoiceDesc(name="zh-HK-HiuGaaiNeural", gender=GENDER_TYPE.FEMALE),
  12. VoiceDesc(name="zh-HK-HiuMaanNeural", gender=GENDER_TYPE.FEMALE),
  13. VoiceDesc(name="zh-HK-WanLungNeural", gender=GENDER_TYPE.MALE),
  14. VoiceDesc(name="zh-CN-XiaoxiaoNeural", gender=GENDER_TYPE.FEMALE),
  15. VoiceDesc(name="zh-CN-XiaoyiNeural", gender=GENDER_TYPE.FEMALE),
  16. VoiceDesc(name="zh-CN-YunjianNeural", gender=GENDER_TYPE.MALE),
  17. VoiceDesc(name="zh-CN-YunxiNeural", gender=GENDER_TYPE.MALE),
  18. VoiceDesc(name="zh-CN-YunxiaNeural", gender=GENDER_TYPE.MALE),
  19. VoiceDesc(name="zh-CN-YunyangNeural", gender=GENDER_TYPE.MALE),
  20. VoiceDesc(name="zh-CN-liaoning-XiaobeiNeural", gender=GENDER_TYPE.FEMALE),
  21. VoiceDesc(name="zh-TW-HsiaoChenNeural", gender=GENDER_TYPE.FEMALE),
  22. VoiceDesc(name="zh-TW-YunJheNeural", gender=GENDER_TYPE.MALE),
  23. VoiceDesc(name="zh-TW-HsiaoYuNeural", gender=GENDER_TYPE.FEMALE),
  24. VoiceDesc(name="zh-CN-shaanxi-XiaoniNeural", gender=GENDER_TYPE.FEMALE),
  25. VoiceDesc(name="en-AU-NatashaNeural", gender=GENDER_TYPE.FEMALE),
  26. VoiceDesc(name="en-AU-WilliamNeural", gender=GENDER_TYPE.MALE),
  27. VoiceDesc(name="en-CA-ClaraNeural", gender=GENDER_TYPE.FEMALE),
  28. VoiceDesc(name="en-CA-LiamNeural", gender=GENDER_TYPE.MALE),
  29. VoiceDesc(name="en-HK-YanNeural", gender=GENDER_TYPE.FEMALE),
  30. VoiceDesc(name="en-HK-SamNeural", gender=GENDER_TYPE.MALE),
  31. VoiceDesc(name="en-IN-NeerjaExpressiveNeural", gender=GENDER_TYPE.FEMALE),
  32. VoiceDesc(name="en-IN-NeerjaNeural", gender=GENDER_TYPE.FEMALE),
  33. VoiceDesc(name="en-IN-PrabhatNeural", gender=GENDER_TYPE.MALE),
  34. VoiceDesc(name="en-IE-ConnorNeural", gender=GENDER_TYPE.MALE),
  35. VoiceDesc(name="en-IE-EmilyNeural", gender=GENDER_TYPE.FEMALE),
  36. VoiceDesc(name="en-KE-AsiliaNeural", gender=GENDER_TYPE.FEMALE),
  37. VoiceDesc(name="en-KE-ChilembaNeural", gender=GENDER_TYPE.MALE),
  38. VoiceDesc(name="en-NZ-MitchellNeural", gender=GENDER_TYPE.MALE),
  39. VoiceDesc(name="en-NZ-MollyNeural", gender=GENDER_TYPE.FEMALE),
  40. VoiceDesc(name="en-NG-AbeoNeural", gender=GENDER_TYPE.MALE),
  41. VoiceDesc(name="en-NG-EzinneNeural", gender=GENDER_TYPE.FEMALE),
  42. VoiceDesc(name="en-PH-JamesNeural", gender=GENDER_TYPE.MALE),
  43. VoiceDesc(name="en-PH-RosaNeural", gender=GENDER_TYPE.FEMALE),
  44. VoiceDesc(name="en-US-AvaNeural", gender=GENDER_TYPE.FEMALE),
  45. VoiceDesc(name="en-US-AndrewNeural", gender=GENDER_TYPE.MALE),
  46. VoiceDesc(name="en-US-EmmaNeural", gender=GENDER_TYPE.FEMALE),
  47. VoiceDesc(name="en-US-BrianNeural", gender=GENDER_TYPE.MALE),
  48. VoiceDesc(name="en-SG-LunaNeural", gender=GENDER_TYPE.FEMALE),
  49. VoiceDesc(name="en-SG-WayneNeural", gender=GENDER_TYPE.MALE),
  50. VoiceDesc(name="en-ZA-LeahNeural", gender=GENDER_TYPE.FEMALE),
  51. VoiceDesc(name="en-ZA-LukeNeural", gender=GENDER_TYPE.MALE),
  52. VoiceDesc(name="en-TZ-ElimuNeural", gender=GENDER_TYPE.MALE),
  53. VoiceDesc(name="en-TZ-ImaniNeural", gender=GENDER_TYPE.FEMALE),
  54. VoiceDesc(name="en-GB-LibbyNeural", gender=GENDER_TYPE.FEMALE),
  55. VoiceDesc(name="en-GB-MaisieNeural", gender=GENDER_TYPE.FEMALE),
  56. VoiceDesc(name="en-GB-RyanNeural", gender=GENDER_TYPE.MALE),
  57. VoiceDesc(name="en-GB-SoniaNeural", gender=GENDER_TYPE.FEMALE),
  58. VoiceDesc(name="en-GB-ThomasNeural", gender=GENDER_TYPE.MALE),
  59. VoiceDesc(name="en-US-AnaNeural", gender=GENDER_TYPE.FEMALE),
  60. VoiceDesc(name="en-US-AndrewMultilingualNeural", gender=GENDER_TYPE.MALE),
  61. VoiceDesc(name="en-US-AriaNeural", gender=GENDER_TYPE.FEMALE),
  62. VoiceDesc(name="en-US-AvaMultilingualNeural", gender=GENDER_TYPE.FEMALE),
  63. VoiceDesc(name="en-US-BrianMultilingualNeural", gender=GENDER_TYPE.MALE),
  64. VoiceDesc(name="en-US-ChristopherNeural", gender=GENDER_TYPE.MALE),
  65. VoiceDesc(name="en-US-EmmaMultilingualNeural", gender=GENDER_TYPE.FEMALE),
  66. VoiceDesc(name="en-US-EricNeural", gender=GENDER_TYPE.MALE),
  67. VoiceDesc(name="en-US-GuyNeural", gender=GENDER_TYPE.MALE),
  68. VoiceDesc(name="en-US-JennyNeural", gender=GENDER_TYPE.FEMALE),
  69. VoiceDesc(name="en-US-MichelleNeural", gender=GENDER_TYPE.FEMALE),
  70. VoiceDesc(name="en-US-RogerNeural", gender=GENDER_TYPE.MALE),
  71. VoiceDesc(name="en-US-SteffanNeural", gender=GENDER_TYPE.MALE)
  72. ]
  73. @TTSEngines.register("EdgeTTS")
  74. class EdgeApiTts(BaseTTSEngine):
  75. async def voices(self, **kwargs) -> List[VoiceDesc]:
  76. return VOICE_LIST
  77. """
  78. 结构体
  79. [{
  80. 'Name': 'Microsoft Server Speech Text to Speech Voice (af-ZA, AdriNeural)',
  81. 'ShortName': 'af-ZA-AdriNeural',
  82. 'Gender': 'Female',
  83. 'Locale': 'af-ZA',
  84. 'SuggestedCodec': 'audio-24khz-48kbitrate-mono-mp3',
  85. 'FriendlyName': 'Microsoft Adri Online (Natural) - Afrikaans (South Africa)',
  86. 'Status': 'GA',
  87. 'VoiceTag': {'ContentCategories': ['General'], 'VoicePersonalities': ['Friendly', 'Positive']
  88. }, ...]
  89. """
  90. voices = await edge_tts.list_voices()
  91. # 过滤 zh / en
  92. voices = [voice for voice in voices if voice['ShortName'].startswith('zh') or voice['ShortName'].startswith('en')]
  93. test = [VoiceDesc(name=voice['ShortName'], gender=GENDER_TYPE.FEMALE if voice['Gender'] == 'Female' else GENDER_TYPE.MALE) for voice in voices]
  94. for t in test:
  95. print(f'VoiceDesc(name="{t.name}", gender={"GENDER_TYPE.FEMALE" if t.gender == GENDER_TYPE.FEMALE else "GENDER_TYPE.MALE"}),')
  96. return [VoiceDesc(name=voice['ShortName'], gender=GENDER_TYPE.FEMALE if voice['Gender'] == 'Female' else GENDER_TYPE.MALE) for voice in voices]
  97. async def run(self, input: TextMessage, **kwargs) -> AudioMessage:
  98. # 参数填充
  99. for paramter in self.parameters():
  100. if paramter.name == "voice":
  101. voice = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
  102. if paramter.name == "rate":
  103. rate = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
  104. if paramter.name == "volume":
  105. volume = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
  106. if paramter.name == "pitch":
  107. pitch = paramter.default if paramter.name not in kwargs else kwargs[paramter.name]
  108. if not voice:
  109. raise KeyError("LitAPI tts voice is required")
  110. logger.debug(f"[TTS] Engine input[{voice}]: {input.data}")
  111. rate = "+" + str(rate) + "%" if rate >= 0 else "" + str(rate) + "%"
  112. volume = "+" + str(volume) + "%" if volume >= 0 else "" + str(volume) + "%"
  113. pitch = "+" + str(pitch) + "Hz" if pitch >= 0 else "" + str(pitch) + "HZ"
  114. communicate = edge_tts.Communicate(
  115. text=input.data,
  116. voice=voice,
  117. rate=rate,
  118. volume=volume,
  119. pitch=pitch
  120. )
  121. data = b''
  122. async for message in communicate.stream():
  123. if message["type"] == "audio":
  124. data += message["data"]
  125. # mp3 -> wav
  126. # data = mp3ToWav(data)
  127. message = AudioMessage(
  128. data=base64.b64encode(data).decode('utf-8'),
  129. sampleRate=16000,
  130. sampleWidth=2,
  131. )
  132. return message