Files
txt2apkg/GoetheVerlag/mp3/Trennen.py
Sven Riwoldt 853e9bab4d Bis 017
2025-05-31 07:42:45 +02:00

56 lines
1.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import whisper
from pydub import AudioSegment
import os
import re
from difflib import SequenceMatcher
# ---------- KONFIG ----------
AUDIO_PATH = "015/SVSV015.MP3"
OUTPUT_DIR = "015/phrases"
MODEL_SIZE = "base"
LANG = "sv"
SIMILARITY_THRESHOLD = 0.85
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ----------------------------
# Lade Modell & Audio
model = whisper.load_model(MODEL_SIZE)
result = model.transcribe(AUDIO_PATH, language=LANG, word_timestamps=False)
audio = AudioSegment.from_file(AUDIO_PATH)
segments = result["segments"]
i = 0
while i < len(segments) - 1:
first = segments[i]
second = segments[i + 1]
text1 = first["text"].strip().lower()
text2 = second["text"].strip().lower()
print(text1)
print(text2)
ratio = SequenceMatcher(None, text1, text2).ratio()
if ratio < SIMILARITY_THRESHOLD:
print(f"⛔ Unterschiedliche Texte übersprungen:\n Mann: {text1}\n Frau: {text2}")
i += 1
continue
# Verwende weibliche Phrase als Dateiname
phrase_text = text2
clean_name = re.sub(r"[^\wäöåÄÖÅ]+", "_", phrase_text).strip("_")
filename = f"{clean_name}.wav"
filepath = os.path.join(OUTPUT_DIR, filename)
# Bestimme exakte Zeit: von Start (männlich) bis vor nächste männliche Phrase
start = first["start"]
if i + 2 < len(segments):
end = min(second["end"], segments[i + 2]["start"])
else:
end = second["end"]
clip = audio[int(start * 1000):int(end * 1000)]
clip.export(filepath, format="wav")
print(f"💾 Gespeichert: {filename} ({start:.2f}s {end:.2f}s)")
i += 2