| 12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- # -*- coding: utf-8 -*-
- from ..builder import ASREngines
- from ..engineBase import BaseASREngine
- import io, base64
- from digitalHuman.protocol import AudioMessage, TextMessage, AUDIO_TYPE
- from digitalHuman.utils import logger, httpxAsyncClient, wavToMp3
- __all__ = ["DifyApiAsr"]
- @ASREngines.register("Dify")
- class DifyApiAsr(BaseASREngine):
- async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
- # 参数校验
- paramters = self.checkParameter(**kwargs)
- API_SERVER = paramters["api_server"]
- API_KEY = paramters["api_key"]
- API_USERNAME = paramters["username"]
- headers = {
- 'Authorization': f'Bearer {API_KEY}'
- }
- payload = {
- 'user': API_USERNAME
- }
- if isinstance(input.data, str):
- input.data = base64.b64decode(input.data)
- if input.type == AUDIO_TYPE.WAV:
- input.data = wavToMp3(input.data)
- input.type = AUDIO_TYPE.MP3
- files = {'file': ('file', io.BytesIO(input.data), 'audio/mp3')}
- response = await httpxAsyncClient.post(API_SERVER + "/audio-to-text", headers=headers, files=files, data=payload)
- if response.status_code != 200:
- raise RuntimeError(f"Dify asr api error: {response.status_code}")
- result = response.json()["text"]
- logger.debug(f"[ASR] Engine response: {result}")
- message = TextMessage(data=result)
- return message
-
|