Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| import torch | |
| import torchaudio | |
| import tempfile | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| # Install dependencies | |
| def install_dependencies(): | |
| """Install required packages""" | |
| try: | |
| # Clone the model repository | |
| if not os.path.exists('saudi-tts'): | |
| subprocess.run(['git', 'clone', 'https://huggingface.co/AhmedEladl/saudi-tts'], check=True) | |
| # Install TTS and other dependencies | |
| subprocess.run([sys.executable, '-m', 'pip', 'install', 'git+https://github.com/coqui-ai/TTS'], check=True) | |
| subprocess.run([sys.executable, '-m', 'pip', 'install', 'transformers==4.55.4'], check=True) | |
| subprocess.run([sys.executable, '-m', 'pip', 'install', 'deepspeed'], check=True) | |
| return True | |
| except Exception as e: | |
| print(f"Error installing dependencies: {e}") | |
| return False | |
| # Global variables for model | |
| model = None | |
| gpt_cond_latent = None | |
| speaker_embedding = None | |
| def load_model(): | |
| """Load the XTTS model""" | |
| global model, gpt_cond_latent, speaker_embedding | |
| try: | |
| from TTS.tts.configs.xtts_config import XttsConfig | |
| from TTS.tts.models.xtts import Xtts | |
| # Define paths | |
| CONFIG_FILE_PATH = 'saudi-tts/config.json' | |
| VOCAB_FILE_PATH = 'saudi-tts/vocab.json' | |
| MODEL_PATH = 'saudi-tts/' | |
| SPEAKER_AUDIO_PATH = 'saudi-tts/speaker.wav' | |
| print("Loading model configuration...") | |
| config = XttsConfig() | |
| config.load_json(CONFIG_FILE_PATH) | |
| print("Initializing model...") | |
| model = Xtts.init_from_config(config) | |
| print("Loading model checkpoint...") | |
| model.load_checkpoint( | |
| config, | |
| checkpoint_dir=MODEL_PATH, | |
| use_deepspeed=False, # Set to False for CPU or change based on your setup | |
| vocab_path=VOCAB_FILE_PATH | |
| ) | |
| # Move to GPU if available | |
| if torch.cuda.is_available(): | |
| model.cuda() | |
| print("Model moved to GPU") | |
| else: | |
| print("Using CPU") | |
| print("Computing speaker latents...") | |
| gpt_cond_latent, speaker_embedding = model.get_conditioning_latents( | |
| audio_path=[SPEAKER_AUDIO_PATH] | |
| ) | |
| print("Model loaded successfully!") | |
| return True | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| return False | |
| def generate_speech(text, temperature=0.75): | |
| """Generate speech from Arabic text""" | |
| global model, gpt_cond_latent, speaker_embedding | |
| if model is None: | |
| return None, "Model not loaded. Please wait for initialization." | |
| if not text.strip(): | |
| return None, "Please enter some text to convert to speech." | |
| try: | |
| print(f"Generating speech for: {text}") | |
| # Generate speech | |
| out = model.inference( | |
| text, | |
| "ar", # Arabic language code | |
| gpt_cond_latent, | |
| speaker_embedding, | |
| temperature=temperature, | |
| ) | |
| # Save to temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: | |
| temp_path = tmp_file.name | |
| # Convert output to tensor and save | |
| audio_tensor = torch.tensor(out["wav"]).unsqueeze(0) | |
| torchaudio.save(temp_path, audio_tensor, 24000) | |
| return temp_path, "Speech generated successfully!" | |
| except Exception as e: | |
| error_msg = f"Error generating speech: {str(e)}" | |
| print(error_msg) | |
| return None, error_msg | |
| # Initialize the app | |
| def initialize_app(): | |
| """Initialize the application""" | |
| print("Installing dependencies...") | |
| if not install_dependencies(): | |
| return False | |
| print("Loading model...") | |
| if not load_model(): | |
| return False | |
| return True | |
| # Create Gradio interface | |
| def create_interface(): | |
| """Create the Gradio interface""" | |
| with gr.Blocks(title="Saudi Arabic TTS", theme=gr.themes.Soft()) as interface: | |
| gr.Markdown( | |
| """ | |
| # ๐๏ธ Saudi Arabic Text-to-Speech | |
| Convert Arabic text to speech using the Saudi TTS model. This model is specifically trained for Saudi Arabic dialect. | |
| **Usage Instructions:** | |
| 1. Enter your Arabic text in the text box below | |
| 2. Adjust the temperature (0.1-1.0) to control speech variation | |
| 3. Click "Generate Speech" to create audio | |
| 4. Play or download the generated audio file | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_input = gr.Textbox( | |
| label="Arabic Text", | |
| placeholder="ุฃุฏุฎู ุงููุต ุงูุนุฑุจู ููุง...", | |
| lines=5, | |
| rtl=True # Right-to-left for Arabic | |
| ) | |
| temperature_slider = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.75, | |
| step=0.05, | |
| label="Temperature (Speech Variation)", | |
| info="Higher values = more variation, Lower values = more consistent" | |
| ) | |
| generate_btn = gr.Button("๐ต Generate Speech", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| audio_output = gr.Audio( | |
| label="Generated Speech", | |
| type="filepath", | |
| interactive=False | |
| ) | |
| status_output = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| lines=2 | |
| ) | |
| # Examples | |
| gr.Markdown("### ๐ Example Texts:") | |
| examples = [ | |
| ["ุงูุณูุงู ุนูููู ูุฏู ุฃูุฑููู ุฃูู ูู ูุฐุฌ ุฃุตุทูุงุนู ู ูุชูุญ ุงูู ุตุฏุฑ ูุชููู ุจุงูููุฌุฉ ุงูุณุนูุฏูุฉ."], | |
| ["ุฃููุงู ูุณููุงู ุจูู ูู ุงูู ู ููุฉ ุงูุนุฑุจูุฉ ุงูุณุนูุฏูุฉ"], | |
| ["ููู ุญุงููู ุ ุฅู ุดุงุก ุงููู ุชู ุงู "], | |
| ["ุดูุฑุงู ููู ุนูู ุงุณุชุฎุฏุงู ูุฐุง ุงููู ูุฐุฌ"] | |
| ] | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[text_input], | |
| label="Click on any example to try it:" | |
| ) | |
| # Event handlers | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=[text_input, temperature_slider], | |
| outputs=[audio_output, status_output], | |
| show_progress=True | |
| ) | |
| return interface | |
| # Main execution | |
| if __name__ == "__main__": | |
| print("Initializing Saudi Arabic TTS App...") | |
| # Initialize app (install dependencies and load model) | |
| if initialize_app(): | |
| print("โ App initialized successfully!") | |
| # Create and launch interface | |
| interface = create_interface() | |
| interface.launch( | |
| server_name="0.0.0.0", # Allow external access | |
| server_port=7860, # Default Gradio port | |
| share=False, # Set to True if you want a public link | |
| debug=True, | |
| show_error=True | |
| ) | |
| else: | |
| print("โ Failed to initialize app. Please check the logs above.") |