47 lines
1.7 KiB
Python
47 lines
1.7 KiB
Python
from pydub import AudioSegment, silence
|
|
import speech_recognition as sr
|
|
import os
|
|
|
|
# Lade die MP3-Datei und konvertiere sie zu WAV
|
|
audio = AudioSegment.from_mp3("input.mp3")
|
|
audio = audio.set_channels(1).set_frame_rate(16000) # Mono & 16kHz für bessere Erkennung
|
|
|
|
# Stille erkennen mit optimierten Werten
|
|
silent_ranges = silence.detect_silence(audio, min_silence_len=200, silence_thresh=-45)
|
|
silent_ranges = [(start, end) for start, end in silent_ranges if end - start > 200] # Kurze Pausen filtern
|
|
|
|
# Wortsegmente bestimmen
|
|
word_segments = []
|
|
start = 0
|
|
for silence_start, silence_end in silent_ranges:
|
|
word_segments.append((start, silence_start))
|
|
start = silence_end
|
|
word_segments.append((start, len(audio))) # Letztes Wort hinzufügen
|
|
|
|
# Sprach-Recognizer vorbereiten
|
|
recognizer = sr.Recognizer()
|
|
|
|
# Jedes Segment speichern und transkribieren
|
|
for i, (start, end) in enumerate(word_segments):
|
|
segment = audio[start:end]
|
|
temp_file = f"temp_word_{i+1}.wav"
|
|
segment.export(temp_file, format="wav")
|
|
|
|
# Spracherkennung durchführen
|
|
with sr.AudioFile(temp_file) as source:
|
|
audio_data = recognizer.record(source)
|
|
try:
|
|
word_text = recognizer.recognize_google(audio_data, language="de-DE")
|
|
except sr.UnknownValueError:
|
|
try:
|
|
word_text = recognizer.recognize_sphinx(audio_data, language="de-DE")
|
|
except sr.UnknownValueError:
|
|
word_text = f"unbekannt_{i+1}"
|
|
|
|
word_text = word_text.replace(" ", "_") # Leerzeichen durch Unterstrich ersetzen
|
|
final_file = f"{word_text}.wav"
|
|
os.rename(temp_file, final_file)
|
|
|
|
print(f"Wort {i+1}: {word_text} -> Gespeichert als {final_file}")
|
|
|
|
print("Fertig! Alle Worte wurden gespeichert.") |