Files
goetheverlag/mp3/Trennen2.py
2025-06-29 15:55:23 +02:00

70 lines
2.1 KiB
Python

from pyannote.audio import Pipeline
import whisper
from pydub import AudioSegment
import os
import re
import certifi
os.environ['SSL_CERT_FILE'] = certifi.where()
AUDIO_PATH = "SVSV011.MP3"
OUTPUT_DIR = "output_phrases"
LANGUAGE = "sv"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# Lade Audio
audio = AudioSegment.from_file(AUDIO_PATH)
# 1) Diarization Pipeline laden (erfordert HuggingFace-Token)
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
print("🔍 Diarization starten...")
diarization = pipeline(AUDIO_PATH)
# 2) Whisper Modell laden
model = whisper.load_model("medium")
# 3) Segmente nach Sprecher gruppieren
segments = []
for turn, _, speaker in diarization.itertracks(yield_label=True):
segments.append({
"start": turn.start,
"end": turn.end,
"speaker": speaker
})
# 4) Sortiere Segmente nach Startzeit
segments = sorted(segments, key=lambda x: x["start"])
# 5) Jetzt male-female-Phrase bauen und ausschneiden
# Hier anpassen, wenn du weißt, wer männlich, wer weiblich ist,
# z.B. speaker0 = male, speaker1 = female, oder dynamisch prüfen.
# Beispiel: male = 'SPEAKER_0', female = 'SPEAKER_1'
male_speaker = "SPEAKER_0"
female_speaker = "SPEAKER_1"
phrases = []
i = 0
while i < len(segments)-1:
if segments[i]['speaker'] == male_speaker and segments[i+1]['speaker'] == female_speaker:
start_ms = int(segments[i]['start'] * 1000)
end_ms = int(segments[i+1]['end'] * 1000)
clip = audio[start_ms:end_ms]
# Transkribiere den weiblichen Teil
female_audio = audio[int(segments[i+1]['start']*1000):int(segments[i+1]['end']*1000)]
female_audio.export("temp_female.wav", format="wav")
result = model.transcribe("temp_female.wav", language=LANGUAGE)
female_text = result['text'].strip()
filename = re.sub(r"[^\wäöåÄÖÅ ]+", "", female_text.lower()).strip().replace(" ", "_")
filename = filename[:60] + ".wav"
clip.export(os.path.join(OUTPUT_DIR, filename), format="wav")
print(f"✅ Phrase gespeichert: {filename}")
i += 2
else:
i += 1
print("🏁 Alles fertig!")