dashscopeASR.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import base64
  4. import asyncio
  5. import tempfile
  6. from http import HTTPStatus
  7. from dashscope.audio.asr import Recognition
  8. from digitalHuman.utils import logger
  9. from digitalHuman.engine.builder import ASREngines
  10. from digitalHuman.protocol import AudioMessage, TextMessage, AUDIO_TYPE, DATA_TYPE
  11. from digitalHuman.engine.engineBase import BaseASREngine
  12. __all__ = ["DashscopeASR"]
  13. @ASREngines.register("dashscopeASR")
  14. class DashscopeASR(BaseASREngine):
  15. def setup(self):
  16. """初始化配置"""
  17. try:
  18. import dashscope
  19. # 从配置或环境变量获取 API Key
  20. custom_config = self.custom()
  21. api_key = custom_config.get('api_key') or os.getenv('DASHSCOPE_API_KEY')
  22. if api_key:
  23. dashscope.api_key = api_key
  24. logger.info("[DashscopeASR] API Key configured successfully")
  25. else:
  26. logger.warning("[DashscopeASR] No API Key found, please set DASHSCOPE_API_KEY environment variable or configure in yaml")
  27. except ImportError:
  28. logger.error("[DashscopeASR] Please install dashscope: pip install dashscope")
  29. raise
  30. except Exception as e:
  31. logger.error(f"[DashscopeASR] Setup error: {e}")
  32. raise
  33. async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
  34. """
  35. 执行语音识别
  36. input: AudioMessage,包含音频数据
  37. 返回: TextMessage,包含识别文本
  38. """
  39. # 参数校验
  40. paramters = self.checkParameter(**kwargs)
  41. model = paramters.get("model", "fun-asr-realtime")
  42. sample_rate = paramters.get("sample_rate", 16000)
  43. format_type = paramters.get("format", "wav")
  44. language_hints = paramters.get("language_hints", ["zh", "en"])
  45. try:
  46. # 处理音频数据
  47. audio_data = input.data
  48. if isinstance(audio_data, str):
  49. # 如果是base64编码的字符串,先解码
  50. audio_data = base64.b64decode(audio_data)
  51. # 保存为临时文件
  52. file_suffix = f'.{input.type}' if hasattr(input, 'type') else '.wav'
  53. with tempfile.NamedTemporaryFile(delete=False, suffix=file_suffix) as tmp_file:
  54. tmp_file.write(audio_data)
  55. audio_path = tmp_file.name
  56. logger.debug(f"[DashscopeASR] Using model: {model}, format: {format_type}, sample_rate: {sample_rate}")
  57. # 创建识别对象
  58. # 注意:language_hints 只支持 paraformer-realtime-v2 模型
  59. if model in ['paraformer-realtime-v2', 'paraformer-v2']:
  60. recognition = Recognition(
  61. model=model,
  62. format=format_type,
  63. sample_rate=sample_rate,
  64. language_hints=language_hints,
  65. callback=None
  66. )
  67. else:
  68. # fun-asr-realtime 等模型不支持 language_hints
  69. recognition = Recognition(
  70. model=model,
  71. format=format_type,
  72. sample_rate=sample_rate,
  73. callback=None
  74. )
  75. # 执行识别(在线程池中执行同步调用)
  76. logger.debug(f"[DashscopeASR] Starting recognition for audio file: {audio_path}")
  77. result = await asyncio.get_event_loop().run_in_executor(
  78. None, recognition.call, audio_path
  79. )
  80. # 清理临时文件
  81. try:
  82. os.remove(audio_path)
  83. except Exception as e:
  84. logger.warning(f"[DashscopeASR] Failed to remove temp file: {e}")
  85. # 处理结果
  86. if result.status_code == HTTPStatus.OK:
  87. # 获取识别结果
  88. sentence = result.get_sentence()
  89. logger.debug(f"[DashscopeASR] Sentence type: {type(sentence)}, content: {sentence}")
  90. # 从句子对象中提取文本
  91. if isinstance(sentence, dict):
  92. # 字典类型,提取text字段
  93. text = sentence.get('text', '')
  94. elif isinstance(sentence, list) and len(sentence) > 0:
  95. # 如果是列表,获取第一个元素
  96. first_item = sentence[0]
  97. text = first_item.get('text', '') if isinstance(first_item, dict) else str(first_item)
  98. elif isinstance(sentence, str):
  99. text = sentence
  100. else:
  101. # 尝试获取所有可用的文本字段
  102. text = str(sentence) if sentence else ''
  103. logger.info(f"[DashscopeASR] Recognition result: {text}")
  104. logger.debug(
  105. f"[Metric] requestId: {recognition.get_last_request_id()}, "
  106. f"first package delay ms: {recognition.get_first_package_delay()}, "
  107. f"last package delay ms: {recognition.get_last_package_delay()}"
  108. )
  109. return TextMessage(data=text)
  110. else:
  111. error_msg = f"Recognition failed: {result.message}"
  112. logger.error(f"[DashscopeASR] {error_msg}")
  113. raise RuntimeError(error_msg)
  114. except Exception as e:
  115. logger.error(f"[DashscopeASR] Error during recognition: {e}")
  116. raise