# https://help.aliyun.com/zh/dashscope/developer-reference/quick-start-13?spm=a2c4g.11186623.0.0.26772e5cs8Vl59 import asyncio import sys from typing import Any, Generator, Iterable, Iterator import dashscope from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthesisResult from aiostream import Stream, stream from logging import getLogger logger = getLogger(__name__) # import requests # r = requests.get( # 'https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav' # ) # with open('asr_example.wav', 'wb') as f: # f.write(r.content) class TTSSambert(): def __init__(self, api_key : str, _model : str = "sambert-zhichu-v1", _is_stream : bool = False, *args, **kwargs) -> None: dashscope.api_key = api_key self.model = kwargs.get("model", _model) class Callback(ResultCallback): def __init__(self)-> None: self.res = [] super().__init__() def on_open(self): logger.debug('Speech synthesizer is opened.') def on_complete(self): logger.debug('Speech synthesizer is completed.') def on_error(self, response: SpeechSynthesisResponse): logger.error('Speech synthesizer failed, response is %s' % (str(response))) def on_close(self): logger.debug('Speech synthesizer is closed.') def on_event(self, result: SpeechSynthesisResult): if result.get_audio_frame() is not None: logger.debug('audio result length:', sys.getsizeof(result.get_audio_frame())) self.res.append(result.get_audio_frame()) if result.get_timestamp() is not None: logger.debug('timestamp result:', str(result.get_timestamp())) async def generate_events(self, callback: Callback): for event in callback.res: yield event async def process_events(self, callback: Callback) -> Stream[bytes]: source = self.generate_events(callback) return stream.iterate(source) # def execute_nonstream(self, data) -> bytes: # result = SpeechSynthesizer.call(model=self.model, # text=data, # sample_rate=48000) # return result.get_audio_data() def execute_stream(self, data, *args): callback = self.Callback() SpeechSynthesizer.call(model=self.model, text=data, sample_rate=48000, callback=callback, word_timestamp_enabled=True, phoneme_timestamp_enabled=True) return self.generate_events(callback)