Source code for pylips.speech.polly_tts

import boto3
import json
import os
from contextlib import closing
import pickle

[docs]class PollyTTS: ''' A text-to-speech backend that uses Amazon Polly. This class is used to generate audio files from text using Amazon Polly. It can also generate visemes that correspond to the audio files that it generates. args: None '''
[docs] def __init__(self): self.tts = boto3.client('polly') # all current amazon polly voices self.voices = ['Zeina','Hala','Zayd','Lisa','Arlet','Hiujin','Zhiyu','Naja','Mads', 'Sofie','Laura','Lotte','Ruben','Nicole','Olivia','Russell','Amy','Emma', 'Brian','Arthur','Aditi','Raveena','Kajal','Niamh','Aria','Ayanda','Danielle', 'Gregory','Ivy','Joanna','Kendra','Kimberly','Salli','Joey','Justin','Kevin', 'Matthew','Ruth','Stephen','Geraint','Suvi','Celine','Léa','Mathieu','Rémi', 'Isabelle','Chantal','Gabrielle','Liam','Marlene','Vicki','Hans','Daniel', 'Hannah','Aditi','Kajal','Dora','Karl','Carla','Bianca','Giorgio','Adriano', 'Mizuki','Takumi','Kazuha','Tomoko','Seoyeon','Liv','Ida','Ewa','Maja','Jacek', 'Jan','Ola','Camila','Vitoria','Ricardo','Thiago','Ines','Cristiano','Carmen', 'Tatyana','Maxim','Conchita','Lucia','Enrique','Sergio','Mia','Andrés','Lupe', 'Penelope','Miguel','Pedro','Astrid','Elin','Filiz','Burcu','Gwyneth']
[docs] def list_voices(self): ''' Lists all the voices that are available in the Amazon Polly TTS backend. For a more in-depth look at the voices, see the `Amazon Polly documentation <https://docs.aws.amazon.com/polly/latest/dg/voicelist.html>`_. args: None ''' for i, voice in enumerate(self.voices): print(f'{i}: {voice}')
[docs] def gen_audio_and_visemes(self, text, voice_id=None, fname=None): ''' Generates an audio file and visemes from a string of text using Amazon Polly. args: text (str): the text that the robot should speak voice_id (str): the voice that the robot should speak in fname (str): the name of the file that the audio should be saved to returns: (tuple): a tuple containing ``fname``, ``times``, and ``visemes``. fname is the path to the audio file, times is a list of times that correspond to the initiation of the visemes, and visemes is a list of visemes that correspond to the words in the audio ''' if voice_id is None: voice_id = 'Justin' if fname is None: fname = f"pylips_phrases/{voice_id}_output.mp3" else: #if it was already cached, return it, otherwise, generate it and return it fname = f"pylips_phrases/{fname}.mp3" if os.path.exists(fname): times, visemes = pickle.load(open(f'{fname[:-4]}.pkl', 'rb')) return fname, times, visemes # Synthesize speech response = self.tts.synthesize_speech( TextType='ssml', Text=f"<speak>{text}</speak>", OutputFormat='mp3', VoiceId=voice_id ) audio_stream = response["AudioStream"] if not os.path.isdir('pylips_phrases'): os.mkdir('pylips_phrases') with open(fname, "wb") as file: file.write(audio_stream.read()) # Synthesize visemes # Synthesize speech with viseme output response = self.tts.synthesize_speech( TextType='ssml', Text=f"<speak>{text}</speak>", OutputFormat='json', SpeechMarkTypes=['viseme'], VoiceId=voice_id ) # Process the response if 'AudioStream' in response: # Do something with the audio stream if needed with closing(response["AudioStream"]) as stream: data = stream.read().decode('utf-8') xSheet = data.split('\n') xSheet = [json.loads(line) for line in xSheet if line != ''] times = [x['time'] / 1000. for x in xSheet] visemes = [VIS2IPA[x['value']] for x in xSheet] pickle.dump((times, visemes), open(f'{fname[:-4]}.pkl', 'wb')) return fname, times, visemes
[docs] def get_audio_and_visemes(self, fname): ''' Loads presaved audio and visemes from a file. args: fname (str): the name of the file that the audio and visemes were saved to returns: (tuple): a tuple containing ``fname``, ``times``, and ``visemes``. fname is the path to the audio file, times is a list of times that correspond to the initiation of the visemes, and visemes is a list of visemes that correspond to the words in the audio raises: Exception: if the file does not exist ''' #if it was already cached, return it, otherwise, raise an error fname = f"pylips_phrases/{fname}.mp3" if os.path.exists(fname): times, visemes = pickle.load(open(f'{fname[:-4]}.pkl', 'rb')) return fname, times, visemes else: raise Exception(f'phrase {fname} does not exist')
VIS2IPA = {"p": "BILABIAL", "B": "BILABIAL", "f": "LABIODENTAL", "T": "INTERDENTAL", "s": "DENTAL_ALVEOLAR", "t": "DENTAL_ALVEOLAR", "S": "POSTALVEOLAR", "r": "POSTALVEOLAR", "J": "VELAR_GLOTTAL", "k": "VELAR_GLOTTAL", "i": "CLOSE_FRONT_VOWEL", "u": "CLOSE_BACK_VOWEL", "@": "MID_CENTRAL_VOWEL", "a": "OPEN_FRONT_VOWEL", "e": "OPEN_FRONT_VOWEL", "E": "OPEN_FRONT_VOWEL", "o": "OPEN_BACK_VOWEL", "O": "OPEN_BACK_VOWEL", "sil": "IDLE"}