diff --git a/bot.py b/bot.py index 0ac4138..2c1c458 100644 --- a/bot.py +++ b/bot.py @@ -21,7 +21,6 @@ import hashlib from datetime import datetime, timedelta import concurrent.futures from gtts import gTTS -import speech_recognition as sr import shutil from bs4 import BeautifulSoup from dotenv import load_dotenv @@ -2359,21 +2358,9 @@ async def process_ai_queue(): await channel.send(embed=embed) if ctx.voice_client: # If bot is in a voice channel - try: - import shutil - # Check if FFmpeg is available - if not shutil.which("ffmpeg"): - await channel.send("āš ļø FFmpeg is not available. Audio playback disabled. (This should not happen in Docker deployment)") - else: - tts = gTTS(assistant_message, lang="en") - tts.save("response.mp3") - - # Check if bot is still connected before playing - if ctx.voice_client and ctx.voice_client.is_connected(): - ctx.voice_client.play(discord.FFmpegPCMAudio("response.mp3")) - await channel.send("šŸ”Š Playing TTS audio...") - except Exception as e: - await channel.send(f"āš ļø TTS audio playback failed: {str(e)}") + tts = gTTS(assistant_message, lang="en") + tts.save("response.mp3") + ctx.voice_client.play(discord.FFmpegPCMAudio("response.mp3")) user_history.append({"role": "assistant", "content": assistant_message}) @@ -2519,312 +2506,6 @@ async def leave(ctx): else: await ctx.send("I am not in a voice channel.") -@client.hybrid_command() -async def speech_to_text(ctx, *, audio_attachment: discord.Attachment = None): - """Converts audio file to text using speech recognition. - - Usage: - /speech_to_text [attach audio file] - - Supported formats: .wav, .mp3, .m4a, .ogg, .flac - """ - # Check if it's a slash command and defer if needed - is_slash_command = hasattr(ctx, 'interaction') and ctx.interaction - if is_slash_command: - await ctx.defer() - - # Helper function for sending responses - async def send_response(content=None, embed=None, ephemeral=False): - try: - if is_slash_command and hasattr(ctx, 'followup'): - if embed: - await ctx.followup.send(embed=embed, ephemeral=ephemeral) - else: - await ctx.followup.send(content, ephemeral=ephemeral) - else: - if embed: - await ctx.send(embed=embed) - else: - await ctx.send(content) - except Exception as e: - logger.error(f"Error sending response in speech_to_text: {e}") - # Fallback to regular send if followup fails - try: - if embed: - await ctx.send(embed=embed) - else: - await ctx.send(content) - except Exception as fallback_error: - logger.error(f"Fallback send also failed: {fallback_error}") - - try: - # Get attachment (either from parameter or message) - attachment = audio_attachment - if not attachment and hasattr(ctx, 'message') and ctx.message and ctx.message.attachments: - attachment = ctx.message.attachments[0] - - if not attachment: - embed = discord.Embed( - title="āŒ No Audio File", - description="Please attach an audio file to convert to text.\n\nSupported formats: `.wav`, `.mp3`, `.m4a`, `.ogg`, `.flac`", - color=0xff0000 - ) - await send_response(embed=embed, ephemeral=True) - return - - # Check file type - supported_formats = ['.wav', '.mp3', '.m4a', '.ogg', '.flac'] - file_extension = os.path.splitext(attachment.filename)[1].lower() - - if file_extension not in supported_formats: - embed = discord.Embed( - title="āŒ Unsupported Format", - description=f"File format `{file_extension}` is not supported.\n\nSupported formats: {', '.join(supported_formats)}", - color=0xff0000 - ) - await send_response(embed=embed, ephemeral=True) - return - - # Check file size (max 25MB for Discord, but we'll be more conservative) - max_size = 10 * 1024 * 1024 # 10MB - if attachment.size > max_size: - embed = discord.Embed( - title="āŒ File Too Large", - description=f"File size ({attachment.size / 1024 / 1024:.1f}MB) exceeds maximum allowed size (10MB).", - color=0xff0000 - ) - await send_response(embed=embed, ephemeral=True) - return - - # Download and process the audio file - embed = discord.Embed( - title="šŸŽ¤ Processing Audio", - description="Downloading and converting audio to text...", - color=0x3498db - ) - await send_response(embed=embed) - - # Create temp directory if it doesn't exist - temp_dir = "temp_audio" - if not os.path.exists(temp_dir): - os.makedirs(temp_dir) - - # Download file - temp_filename = f"{uuid.uuid4()}_{attachment.filename}" - temp_filepath = os.path.join(temp_dir, temp_filename) - - audio_data = await attachment.read() - with open(temp_filepath, 'wb') as f: - f.write(audio_data) - - # Initialize speech recognizer - recognizer = sr.Recognizer() - - try: - # Convert audio to WAV if needed (speech_recognition works best with WAV) - wav_filepath = temp_filepath - if file_extension != '.wav': - from pydub import AudioSegment - wav_filepath = os.path.splitext(temp_filepath)[0] + '.wav' - audio = AudioSegment.from_file(temp_filepath) - audio.export(wav_filepath, format="wav") - - # Process audio file - with sr.AudioFile(wav_filepath) as source: - # Adjust for ambient noise - recognizer.adjust_for_ambient_noise(source, duration=0.5) - audio = recognizer.record(source) - - # Convert speech to text using Google Speech Recognition - try: - text = recognizer.recognize_google(audio, language='en-US') - - if not text.strip(): - embed = discord.Embed( - title="āš ļø No Speech Detected", - description="No speech was detected in the audio file. Please ensure the audio contains clear speech.", - color=0xffa500 - ) - await send_response(embed=embed) - return - - # Create success embed - embed = discord.Embed( - title="šŸŽ¤ Speech to Text Result", - color=0x00ff00, - timestamp=datetime.now() - ) - - # Truncate text if too long for embed - if len(text) > 1000: - embed.add_field( - name="šŸ“ Transcribed Text", - value=text[:1000] + "...", - inline=False - ) - embed.add_field( - name="ā„¹ļø Note", - value=f"Text was truncated. Full length: {len(text)} characters.", - inline=False - ) - else: - embed.add_field( - name="šŸ“ Transcribed Text", - value=text, - inline=False - ) - - embed.add_field( - name="šŸ“Š File Info", - value=f"**Filename:** {attachment.filename}\n**Size:** {attachment.size / 1024:.1f} KB\n**Format:** {file_extension.upper()}", - inline=True - ) - - embed.set_footer(text=f"Processed by {ctx.author.display_name}") - - await send_response(embed=embed) - - # If text is very long, also send as text file - if len(text) > 1500: - text_filename = f"transcription_{ctx.author.id}_{int(time.time())}.txt" - text_filepath = os.path.join(temp_dir, text_filename) - - with open(text_filepath, 'w', encoding='utf-8') as f: - f.write(f"Speech-to-Text Transcription\n") - f.write(f"Original file: {attachment.filename}\n") - f.write(f"Processed by: {ctx.author.display_name}\n") - f.write(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") - f.write(f"\n{'='*50}\n\n") - f.write(text) - - with open(text_filepath, 'rb') as f: - await ctx.send( - "šŸ“„ **Full transcription** (text too long for embed):", - file=discord.File(f, text_filename) - ) - - # Clean up text file - try: - os.remove(text_filepath) - except: - pass - - except sr.UnknownValueError: - embed = discord.Embed( - title="āŒ Speech Not Recognized", - description="Could not understand the speech in the audio file. Please ensure:\n• Audio is clear and not too noisy\n• Speech is in English\n• Audio quality is good", - color=0xff0000 - ) - await send_response(embed=embed) - - except sr.RequestError as e: - embed = discord.Embed( - title="āŒ Recognition Service Error", - description=f"Could not request results from speech recognition service: {e}", - color=0xff0000 - ) - await send_response(embed=embed) - - finally: - # Clean up temporary files - try: - if os.path.exists(temp_filepath): - os.remove(temp_filepath) - if wav_filepath != temp_filepath and os.path.exists(wav_filepath): - os.remove(wav_filepath) - except Exception as e: - logger.warning(f"Could not clean up temp files: {e}") - - except Exception as e: - logger.error(f"Error in speech_to_text command: {e}") - embed = discord.Embed( - title="āŒ Error", - description="An error occurred while processing the audio file. Please try again.", - color=0xff0000 - ) - await send_response(embed=embed) - -@client.hybrid_command() -async def live_speech(ctx, duration: int = 10): - """Records audio from your microphone and converts it to text. - - Usage: - /live_speech [duration in seconds (default: 10, max: 30)] - - Note: You need to be in a voice channel with the bot. - """ - # Check if it's a slash command and defer if needed - is_slash_command = hasattr(ctx, 'interaction') and ctx.interaction - if is_slash_command: - await ctx.defer() - - # Helper function for sending responses - async def send_response(content=None, embed=None, ephemeral=False): - try: - if is_slash_command and hasattr(ctx, 'followup'): - if embed: - await ctx.followup.send(embed=embed, ephemeral=ephemeral) - else: - await ctx.followup.send(content, ephemeral=ephemeral) - else: - if embed: - await ctx.send(embed=embed) - else: - await ctx.send(content) - except Exception as e: - logger.error(f"Error sending response in live_speech: {e}") - # Fallback to regular send if followup fails - try: - if embed: - await ctx.send(embed=embed) - else: - await ctx.send(content) - except Exception as fallback_error: - logger.error(f"Fallback send also failed: {fallback_error}") - - try: - # Validate duration - if duration < 1: - duration = 1 - elif duration > 30: - duration = 30 - - # Check if user is in voice channel - if not ctx.author.voice: - embed = discord.Embed( - title="āŒ Not in Voice Channel", - description="You need to be in a voice channel to use live speech recognition.", - color=0xff0000 - ) - await send_response(embed=embed, ephemeral=True) - return - - # Start recording notification - embed = discord.Embed( - title="šŸŽ¤ Live Speech Recognition", - description=f"šŸ”“ **Recording for {duration} seconds...**\n\nSpeak clearly into your microphone!", - color=0xff0000 - ) - await send_response(embed=embed) - - # Note: This is a simplified version as Discord bots can't directly access user microphones - # This would require a client-side application or different implementation - embed = discord.Embed( - title="šŸŽ¤ Live Speech Recognition", - description="āš ļø **Feature Note**\n\nLive microphone recording requires additional setup. Use `/speech_to_text` with audio files instead.", - color=0xffa500 - ) - await send_response(embed=embed) - - except Exception as e: - logger.error(f"Error in live_speech command: {e}") - embed = discord.Embed( - title="šŸŽ¤ Live Speech Recognition", - description="āŒ **Error occurred**\n\nCould not process live speech recognition.", - color=0xff0000 - ) - await send_response(embed=embed) - @client.hybrid_command() async def toggle_feature(ctx, feature: str, state: str): """Allows admin to enable or disable bot features.""" @@ -5952,157 +5633,6 @@ async def contact_status(ctx): await ctx.send(f"Error getting contact status: {e}") logger.error(f"Error in contact_status: {e}") -@client.hybrid_command() -async def join_voice(ctx): - """Join the voice channel that the user is currently in.""" - # Check if it's a slash command and defer if needed - is_slash_command = hasattr(ctx, 'interaction') and ctx.interaction - if is_slash_command: - await ctx.defer() - - # Helper function for sending responses - async def send_response(content=None, embed=None, ephemeral=False): - try: - if is_slash_command and hasattr(ctx, 'followup'): - if embed: - return await ctx.followup.send(embed=embed, ephemeral=ephemeral) - else: - return await ctx.followup.send(content, ephemeral=ephemeral) - else: - # Fallback for regular commands or if followup isn't available - if embed: - return await ctx.send(embed=embed) - else: - return await ctx.send(content) - except Exception as e: - # Final fallback - if embed: - return await ctx.send(embed=embed) - else: - return await ctx.send(content) - - if ctx.author.voice is None: - await send_response("āŒ You need to be in a voice channel for me to join!") - return - - voice_channel = ctx.author.voice.channel - - if ctx.voice_client is not None: - if ctx.voice_client.channel == voice_channel: - await send_response("šŸ”Š I'm already in your voice channel!") - return - else: - await ctx.voice_client.move_to(voice_channel) - await send_response(f"šŸ”Š Moved to **{voice_channel.name}**!") - return - - try: - await voice_channel.connect() - await send_response(f"āœ… Joined **{voice_channel.name}**! You can now use TTS features.") - except Exception as e: - await send_response(f"āŒ Failed to join voice channel: {str(e)}") - -@client.hybrid_command() -async def leave_voice(ctx): - """Leave the current voice channel.""" - # Check if it's a slash command and defer if needed - is_slash_command = hasattr(ctx, 'interaction') and ctx.interaction - if is_slash_command: - await ctx.defer() - - # Helper function for sending responses - async def send_response(content=None, embed=None, ephemeral=False): - try: - if is_slash_command and hasattr(ctx, 'followup'): - if embed: - return await ctx.followup.send(embed=embed, ephemeral=ephemeral) - else: - return await ctx.followup.send(content, ephemeral=ephemeral) - else: - # Fallback for regular commands or if followup isn't available - if embed: - return await ctx.send(embed=embed) - else: - return await ctx.send(content) - except Exception as e: - # Final fallback - if embed: - return await ctx.send(embed=embed) - else: - return await ctx.send(content) - - if ctx.voice_client is None: - await send_response("āŒ I'm not in a voice channel!") - return - - try: - await ctx.voice_client.disconnect() - await send_response("šŸ‘‹ Left the voice channel!") - except Exception as e: - await send_response(f"āŒ Failed to leave voice channel: {str(e)}") - -@client.hybrid_command() -async def test_tts(ctx, *, text: str = "Hello! This is a TTS test."): - """Test TTS functionality with custom text.""" - # Check if it's a slash command and defer if needed - is_slash_command = hasattr(ctx, 'interaction') and ctx.interaction - if is_slash_command: - await ctx.defer() - - # Helper function for sending responses - async def send_response(content=None, embed=None, ephemeral=False): - try: - if is_slash_command and hasattr(ctx, 'followup'): - if embed: - return await ctx.followup.send(embed=embed, ephemeral=ephemeral) - else: - return await ctx.followup.send(content, ephemeral=ephemeral) - else: - # Fallback for regular commands or if followup isn't available - if embed: - return await ctx.send(embed=embed) - else: - return await ctx.send(content) - except Exception as e: - # Final fallback - if embed: - return await ctx.send(embed=embed) - else: - return await ctx.send(content) - - if ctx.voice_client is None: - await send_response("āŒ I need to be in a voice channel first! Use `/join_voice` to get me connected.") - return - - if len(text) > 500: - await send_response("āŒ Text is too long! Please keep it under 500 characters.") - return - - try: - import shutil - from gtts import gTTS - - # Check if FFmpeg is available - if not shutil.which("ffmpeg"): - await send_response("āš ļø FFmpeg is not available. (This should not happen in Docker deployment)") - return - - # Generate TTS audio - await send_response("šŸ”„ Generating TTS audio...") - - tts = gTTS(text, lang="en") - tts.save("tts_test.mp3") - - # Check if bot is still connected before playing - if ctx.voice_client and ctx.voice_client.is_connected(): - ctx.voice_client.play(discord.FFmpegPCMAudio("tts_test.mp3")) - await send_response(f"šŸ”Š Playing TTS: \"{text[:100]}{'...' if len(text) > 100 else ''}\"") - else: - await send_response("āŒ Lost connection to voice channel!") - - except Exception as e: - await send_response(f"āŒ TTS test failed: {str(e)}") - try: # Initialize database tables create_warnings_table()