77 lines
2.8 KiB
Python
77 lines
2.8 KiB
Python
# https://help.aliyun.com/zh/dashscope/developer-reference/quick-start-13?spm=a2c4g.11186623.0.0.26772e5cs8Vl59
|
|
|
|
import asyncio
|
|
import sys
|
|
from typing import Any, Generator, Iterable, Iterator
|
|
|
|
import dashscope
|
|
from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse
|
|
from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthesisResult
|
|
from aiostream import Stream, stream
|
|
|
|
from logging import getLogger
|
|
|
|
logger = getLogger(__name__)
|
|
|
|
# import requests
|
|
# r = requests.get(
|
|
# 'https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav'
|
|
# )
|
|
# with open('asr_example.wav', 'wb') as f:
|
|
# f.write(r.content)
|
|
|
|
class TTSSambert():
|
|
|
|
def __init__(self, api_key : str,
|
|
_model : str = "sambert-zhichu-v1",
|
|
_is_stream : bool = False,
|
|
*args, **kwargs) -> None:
|
|
dashscope.api_key = api_key
|
|
self.model = kwargs.get("model", _model)
|
|
|
|
class Callback(ResultCallback):
|
|
def __init__(self)-> None:
|
|
self.res = []
|
|
super().__init__()
|
|
|
|
def on_open(self):
|
|
logger.debug('Speech synthesizer is opened.')
|
|
|
|
def on_complete(self):
|
|
logger.debug('Speech synthesizer is completed.')
|
|
|
|
def on_error(self, response: SpeechSynthesisResponse):
|
|
logger.error('Speech synthesizer failed, response is %s' % (str(response)))
|
|
def on_close(self):
|
|
logger.debug('Speech synthesizer is closed.')
|
|
|
|
def on_event(self, result: SpeechSynthesisResult):
|
|
if result.get_audio_frame() is not None:
|
|
logger.debug('audio result length:', sys.getsizeof(result.get_audio_frame()))
|
|
self.res.append(result.get_audio_frame())
|
|
|
|
if result.get_timestamp() is not None:
|
|
logger.debug('timestamp result:', str(result.get_timestamp()))
|
|
async def generate_events(self, callback: Callback):
|
|
for event in callback.res:
|
|
yield event
|
|
|
|
async def process_events(self, callback: Callback) -> Stream[bytes]:
|
|
source = self.generate_events(callback)
|
|
return stream.iterate(source)
|
|
|
|
# def execute_nonstream(self, data) -> bytes:
|
|
# result = SpeechSynthesizer.call(model=self.model,
|
|
# text=data,
|
|
# sample_rate=48000)
|
|
# return result.get_audio_data()
|
|
|
|
def execute_stream(self, data, *args):
|
|
callback = self.Callback()
|
|
SpeechSynthesizer.call(model=self.model,
|
|
text=data,
|
|
sample_rate=48000,
|
|
callback=callback,
|
|
word_timestamp_enabled=True,
|
|
phoneme_timestamp_enabled=True)
|
|
return self.generate_events(callback) |