56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
import whisper
|
||
from pydub import AudioSegment
|
||
import os
|
||
import re
|
||
from difflib import SequenceMatcher
|
||
|
||
# ---------- KONFIG ----------
|
||
AUDIO_PATH = "015/SVSV015.MP3"
|
||
OUTPUT_DIR = "015/phrases"
|
||
MODEL_SIZE = "base"
|
||
LANG = "sv"
|
||
SIMILARITY_THRESHOLD = 0.85
|
||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||
# ----------------------------
|
||
|
||
# Lade Modell & Audio
|
||
model = whisper.load_model(MODEL_SIZE)
|
||
result = model.transcribe(AUDIO_PATH, language=LANG, word_timestamps=False)
|
||
audio = AudioSegment.from_file(AUDIO_PATH)
|
||
|
||
segments = result["segments"]
|
||
i = 0
|
||
|
||
while i < len(segments) - 1:
|
||
first = segments[i]
|
||
second = segments[i + 1]
|
||
|
||
text1 = first["text"].strip().lower()
|
||
text2 = second["text"].strip().lower()
|
||
print(text1)
|
||
print(text2)
|
||
|
||
ratio = SequenceMatcher(None, text1, text2).ratio()
|
||
if ratio < SIMILARITY_THRESHOLD:
|
||
print(f"⛔ Unterschiedliche Texte übersprungen:\n Mann: {text1}\n Frau: {text2}")
|
||
i += 1
|
||
continue
|
||
|
||
# Verwende weibliche Phrase als Dateiname
|
||
phrase_text = text2
|
||
clean_name = re.sub(r"[^\wäöåÄÖÅ]+", "_", phrase_text).strip("_")
|
||
filename = f"{clean_name}.wav"
|
||
filepath = os.path.join(OUTPUT_DIR, filename)
|
||
|
||
# Bestimme exakte Zeit: von Start (männlich) bis vor nächste männliche Phrase
|
||
start = first["start"]
|
||
if i + 2 < len(segments):
|
||
end = min(second["end"], segments[i + 2]["start"])
|
||
else:
|
||
end = second["end"]
|
||
|
||
clip = audio[int(start * 1000):int(end * 1000)]
|
||
clip.export(filepath, format="wav")
|
||
print(f"💾 Gespeichert: {filename} ({start:.2f}s – {end:.2f}s)")
|
||
|
||
i += 2 |