##MS SPEECH
#Install Libraries
%pip install openai requests python-dotenv langdetect
%pip install azure-cognitiveservices-speech
import os
import azure.cognitiveservices.speech as speechsdk
import glob # Import glob for file handling
from datetime import datetime
from dotenv import load_dotenv
# Load environment variables from the .env file
load_dotenv()
# Retrieve API keys and endpoint information from the environment
speech_key = os.getenv("MS_SPEECH_API_KEY1") # Azure Speech API Key
service_region = "australiaeast" # Set your Azure region
# Folders for input and output
input_folder = r'eleven_input_folder'
output_folder = r'speech_ms_output_folder'
# Ensure the output folder exists, or create it
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Language-specific voices (matching full language names, including "Brazilian Portuguese")
voices = {
"Spanish": "es-MX-DaliaNeural", # Spanish (Mexico) - Dalia
"Portuguese": "pt-BR-ElzaNeural", # Portuguese (Brazil) - Thalita
"Brazilian Portuguese": "pt-BR-ElzaNeural", # Added to map Brazilian Portuguese to Thalita
"Hindi": "hi-IN-AnanyaNeural", # Hindi (India) - Ananya
"Arabic": "ar-EG-SalmaNeural", # Arabic (Egypt) - Salma
"Japanese": "ja-JP-MayuNeural", # Japanese - Mayu
"Russian": "ru-RU-SvetlanaNeural", # Russian - Svetlana
"Indonesian": "id-ID-GadisNeural", # Indonesian - Gadis
"English": "en-GB-SoniaNeural" # English (UK) - Sonia (Default)
}
# Function to generate dynamic output file name
def generate_output_file_name(language, output_folder):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Get the current timestamp
file_name = f"output_speech_{language}_{timestamp}.mp3"
return os.path.join(output_folder, file_name)
# Function to read the content of the latest file
def read_file_content(file_path):
print(f"Reading file content from: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
print(f"Content read from file: {content[:100]}...") # Print the first 100 characters for debugging
return content
# Function to map language name from the filename
def get_language_from_filename(file_path):
# Extract the language part from the filename, assuming format: MS-Language-...
filename = os.path.basename(file_path)
if "MS-" in filename:
parts = filename.split("-")
if len(parts) > 1:
language_name = parts[1] # This should give us the language name (like Russian, Spanish, etc.)
print(f"Detected language from filename: {language_name}")
return language_name
return "English" # Default to English if no match
# Function to find the most recent file for each language
def get_latest_files(directory):
files = glob.glob(os.path.join(directory, '*.txt'))
if not files:
raise ValueError(f"No .txt files found in the directory: {directory}")
latest_files = {}
# Iterate over language-specific voices and find the latest file for each
for lang_code, voice_name in voices.items():
# Find files with the language code in the file name
language_files = [file for file in files if f"-{lang_code}-" in file]
if language_files:
# Get the most recent file for the language
latest_files[lang_code] = max(language_files, key=os.path.getctime)
print(f"Latest file for {lang_code}: {latest_files[lang_code]}")
# Find the most recent non-language-specific file (English/general) if available
non_language_files = [file for file in files if not any(f"-{lang_code}-" in file for lang_code in voices.keys())]
if non_language_files:
latest_files["English"] = max(non_language_files, key=os.path.getctime)
print(f"Latest general file: {latest_files['English']}")
return latest_files
# Function to process and synthesize speech using Azure Speech SDK
def synthesize_speech(text, language, output_path):
print(f"Synthesizing speech for language: {language}")
print(f"Using output file: {output_path}")
# Set up the speech configuration
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Set the appropriate voice for the language
voice = voices.get(language, voices["English"]) # Default to English if no voice found
speech_config.speech_synthesis_voice_name = voice
print(f"Using voice: {voice}")
# Set the audio output format to MP3
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
# Create a speech synthesizer
audio_output = speechsdk.audio.AudioOutputConfig(filename=output_path)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output)
# Perform the text-to-speech synthesis
result = speech_synthesizer.speak_text_async(text).get()
# Handle the results
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print(f"Speech synthesized successfully and saved to {output_path}")
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print(f"Speech synthesis canceled: {cancellation_details.reason}")
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print(f"Error details: {cancellation_details.error_details}")
# Main function to process text files and generate speech outputs
def process_files_for_ms_tts():
try:
# Get the latest text files for each language
latest_files = get_latest_files(input_folder)
# Process each file dynamically based on language
for _, file_path in latest_files.items():
text = read_file_content(file_path)
# Detect language from filename
language_name = get_language_from_filename(file_path)
# Check if the detected language is supported
if language_name not in voices:
print(f"Warning: Language '{language_name}' not found in the voices mapping. Defaulting to English.")
language_name = "English"
# Generate the output file path
output_file_path = generate_output_file_name(language_name, output_folder)
# Synthesize speech for the current text file
synthesize_speech(text, language_name, output_file_path)
except Exception as e:
print(f"Error processing files: {e}")
# Run the process
if __name__ == "__main__":
process_files_for_ms_tts()