| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- # -*- coding: utf-8 -*-
- import os
- import base64
- import asyncio
- import tempfile
- from http import HTTPStatus
- from dashscope.audio.asr import Recognition
- from digitalHuman.utils import logger
- from digitalHuman.engine.builder import ASREngines
- from digitalHuman.protocol import AudioMessage, TextMessage, AUDIO_TYPE, DATA_TYPE
- from digitalHuman.engine.engineBase import BaseASREngine
- __all__ = ["DashscopeASR"]
- @ASREngines.register("dashscopeASR")
- class DashscopeASR(BaseASREngine):
- def setup(self):
- """初始化配置"""
- try:
- import dashscope
- # 从配置或环境变量获取 API Key
- custom_config = self.custom()
- api_key = custom_config.get('api_key') or os.getenv('DASHSCOPE_API_KEY')
- if api_key:
- dashscope.api_key = api_key
- logger.info("[DashscopeASR] API Key configured successfully")
- else:
- logger.warning("[DashscopeASR] No API Key found, please set DASHSCOPE_API_KEY environment variable or configure in yaml")
- except ImportError:
- logger.error("[DashscopeASR] Please install dashscope: pip install dashscope")
- raise
- except Exception as e:
- logger.error(f"[DashscopeASR] Setup error: {e}")
- raise
- async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
- """
- 执行语音识别
- input: AudioMessage,包含音频数据
- 返回: TextMessage,包含识别文本
- """
- # 参数校验
- paramters = self.checkParameter(**kwargs)
- model = paramters.get("model", "fun-asr-realtime")
- sample_rate = paramters.get("sample_rate", 16000)
- format_type = paramters.get("format", "wav")
- language_hints = paramters.get("language_hints", ["zh", "en"])
-
- try:
- # 处理音频数据
- audio_data = input.data
- if isinstance(audio_data, str):
- # 如果是base64编码的字符串,先解码
- audio_data = base64.b64decode(audio_data)
-
- # 保存为临时文件
- file_suffix = f'.{input.type}' if hasattr(input, 'type') else '.wav'
- with tempfile.NamedTemporaryFile(delete=False, suffix=file_suffix) as tmp_file:
- tmp_file.write(audio_data)
- audio_path = tmp_file.name
-
- logger.debug(f"[DashscopeASR] Using model: {model}, format: {format_type}, sample_rate: {sample_rate}")
-
- # 创建识别对象
- # 注意:language_hints 只支持 paraformer-realtime-v2 模型
- if model in ['paraformer-realtime-v2', 'paraformer-v2']:
- recognition = Recognition(
- model=model,
- format=format_type,
- sample_rate=sample_rate,
- language_hints=language_hints,
- callback=None
- )
- else:
- # fun-asr-realtime 等模型不支持 language_hints
- recognition = Recognition(
- model=model,
- format=format_type,
- sample_rate=sample_rate,
- callback=None
- )
-
- # 执行识别(在线程池中执行同步调用)
- logger.debug(f"[DashscopeASR] Starting recognition for audio file: {audio_path}")
- result = await asyncio.get_event_loop().run_in_executor(
- None, recognition.call, audio_path
- )
-
- # 清理临时文件
- try:
- os.remove(audio_path)
- except Exception as e:
- logger.warning(f"[DashscopeASR] Failed to remove temp file: {e}")
-
- # 处理结果
- if result.status_code == HTTPStatus.OK:
- # 获取识别结果
- sentence = result.get_sentence()
- logger.debug(f"[DashscopeASR] Sentence type: {type(sentence)}, content: {sentence}")
-
- # 从句子对象中提取文本
- if isinstance(sentence, dict):
- # 字典类型,提取text字段
- text = sentence.get('text', '')
- elif isinstance(sentence, list) and len(sentence) > 0:
- # 如果是列表,获取第一个元素
- first_item = sentence[0]
- text = first_item.get('text', '') if isinstance(first_item, dict) else str(first_item)
- elif isinstance(sentence, str):
- text = sentence
- else:
- # 尝试获取所有可用的文本字段
- text = str(sentence) if sentence else ''
-
- logger.info(f"[DashscopeASR] Recognition result: {text}")
- logger.debug(
- f"[Metric] requestId: {recognition.get_last_request_id()}, "
- f"first package delay ms: {recognition.get_first_package_delay()}, "
- f"last package delay ms: {recognition.get_last_package_delay()}"
- )
- return TextMessage(data=text)
- else:
- error_msg = f"Recognition failed: {result.message}"
- logger.error(f"[DashscopeASR] {error_msg}")
- raise RuntimeError(error_msg)
-
- except Exception as e:
- logger.error(f"[DashscopeASR] Error during recognition: {e}")
- raise
|