difyASR.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # -*- coding: utf-8 -*-
  2. from ..builder import ASREngines
  3. from ..engineBase import BaseASREngine
  4. import io, base64
  5. from digitalHuman.protocol import AudioMessage, TextMessage, AUDIO_TYPE
  6. from digitalHuman.utils import logger, httpxAsyncClient, wavToMp3
  7. __all__ = ["DifyApiAsr"]
  8. @ASREngines.register("Dify")
  9. class DifyApiAsr(BaseASREngine):
  10. async def run(self, input: AudioMessage, **kwargs) -> TextMessage:
  11. # 参数校验
  12. paramters = self.checkParameter(**kwargs)
  13. API_SERVER = paramters["api_server"]
  14. API_KEY = paramters["api_key"]
  15. API_USERNAME = paramters["username"]
  16. headers = {
  17. 'Authorization': f'Bearer {API_KEY}'
  18. }
  19. payload = {
  20. 'user': API_USERNAME
  21. }
  22. if isinstance(input.data, str):
  23. input.data = base64.b64decode(input.data)
  24. if input.type == AUDIO_TYPE.WAV:
  25. input.data = wavToMp3(input.data)
  26. input.type = AUDIO_TYPE.MP3
  27. files = {'file': ('file', io.BytesIO(input.data), 'audio/mp3')}
  28. response = await httpxAsyncClient.post(API_SERVER + "/audio-to-text", headers=headers, files=files, data=payload)
  29. if response.status_code != 200:
  30. raise RuntimeError(f"Dify asr api error: {response.status_code}")
  31. result = response.json()["text"]
  32. logger.debug(f"[ASR] Engine response: {result}")
  33. message = TextMessage(data=result)
  34. return message