new_wukong/src/dashscope/sambert.py
ZZY 6120e7ba57 refactor(main): 使用异步流处理替换原有的顺序执行
重构`main`函数,将原先的顺序执行模型改为基于异步流的处理方式,利用`aiostream`库实现更高效的并发数据处理。
2024-08-31 14:36:44 +08:00

77 lines
2.8 KiB
Python

# https://help.aliyun.com/zh/dashscope/developer-reference/quick-start-13?spm=a2c4g.11186623.0.0.26772e5cs8Vl59
import asyncio
import sys
from typing import Any, Generator, Iterable, Iterator
import dashscope
from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse
from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthesisResult
from aiostream import Stream, stream
from logging import getLogger
logger = getLogger(__name__)
# import requests
# r = requests.get(
# 'https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav'
# )
# with open('asr_example.wav', 'wb') as f:
# f.write(r.content)
class TTSSambert():
def __init__(self, api_key : str,
_model : str = "sambert-zhichu-v1",
_is_stream : bool = False,
*args, **kwargs) -> None:
dashscope.api_key = api_key
self.model = kwargs.get("model", _model)
class Callback(ResultCallback):
def __init__(self)-> None:
self.res = []
super().__init__()
def on_open(self):
logger.debug('Speech synthesizer is opened.')
def on_complete(self):
logger.debug('Speech synthesizer is completed.')
def on_error(self, response: SpeechSynthesisResponse):
logger.error('Speech synthesizer failed, response is %s' % (str(response)))
def on_close(self):
logger.debug('Speech synthesizer is closed.')
def on_event(self, result: SpeechSynthesisResult):
if result.get_audio_frame() is not None:
logger.debug('audio result length:', sys.getsizeof(result.get_audio_frame()))
self.res.append(result.get_audio_frame())
if result.get_timestamp() is not None:
logger.debug('timestamp result:', str(result.get_timestamp()))
async def generate_events(self, callback: Callback):
for event in callback.res:
yield event
async def process_events(self, callback: Callback) -> Stream[bytes]:
source = self.generate_events(callback)
return stream.iterate(source)
# def execute_nonstream(self, data) -> bytes:
# result = SpeechSynthesizer.call(model=self.model,
# text=data,
# sample_rate=48000)
# return result.get_audio_data()
def execute_stream(self, data, *args):
callback = self.Callback()
SpeechSynthesizer.call(model=self.model,
text=data,
sample_rate=48000,
callback=callback,
word_timestamp_enabled=True,
phoneme_timestamp_enabled=True)
return self.generate_events(callback)