Initial commit
This commit is contained in:
70
mp3/Trennen2.py
Normal file
70
mp3/Trennen2.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from pyannote.audio import Pipeline
|
||||
import whisper
|
||||
from pydub import AudioSegment
|
||||
import os
|
||||
import re
|
||||
|
||||
import certifi
|
||||
os.environ['SSL_CERT_FILE'] = certifi.where()
|
||||
|
||||
AUDIO_PATH = "SVSV011.MP3"
|
||||
OUTPUT_DIR = "output_phrases"
|
||||
LANGUAGE = "sv"
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# Lade Audio
|
||||
audio = AudioSegment.from_file(AUDIO_PATH)
|
||||
|
||||
# 1) Diarization Pipeline laden (erfordert HuggingFace-Token)
|
||||
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
|
||||
|
||||
print("🔍 Diarization starten...")
|
||||
diarization = pipeline(AUDIO_PATH)
|
||||
|
||||
# 2) Whisper Modell laden
|
||||
model = whisper.load_model("medium")
|
||||
|
||||
# 3) Segmente nach Sprecher gruppieren
|
||||
segments = []
|
||||
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
||||
segments.append({
|
||||
"start": turn.start,
|
||||
"end": turn.end,
|
||||
"speaker": speaker
|
||||
})
|
||||
|
||||
# 4) Sortiere Segmente nach Startzeit
|
||||
segments = sorted(segments, key=lambda x: x["start"])
|
||||
|
||||
# 5) Jetzt male-female-Phrase bauen und ausschneiden
|
||||
# Hier anpassen, wenn du weißt, wer männlich, wer weiblich ist,
|
||||
# z.B. speaker0 = male, speaker1 = female, oder dynamisch prüfen.
|
||||
|
||||
# Beispiel: male = 'SPEAKER_0', female = 'SPEAKER_1'
|
||||
male_speaker = "SPEAKER_0"
|
||||
female_speaker = "SPEAKER_1"
|
||||
|
||||
phrases = []
|
||||
i = 0
|
||||
while i < len(segments)-1:
|
||||
if segments[i]['speaker'] == male_speaker and segments[i+1]['speaker'] == female_speaker:
|
||||
start_ms = int(segments[i]['start'] * 1000)
|
||||
end_ms = int(segments[i+1]['end'] * 1000)
|
||||
clip = audio[start_ms:end_ms]
|
||||
|
||||
# Transkribiere den weiblichen Teil
|
||||
female_audio = audio[int(segments[i+1]['start']*1000):int(segments[i+1]['end']*1000)]
|
||||
female_audio.export("temp_female.wav", format="wav")
|
||||
result = model.transcribe("temp_female.wav", language=LANGUAGE)
|
||||
female_text = result['text'].strip()
|
||||
|
||||
filename = re.sub(r"[^\wäöåÄÖÅ ]+", "", female_text.lower()).strip().replace(" ", "_")
|
||||
filename = filename[:60] + ".wav"
|
||||
clip.export(os.path.join(OUTPUT_DIR, filename), format="wav")
|
||||
print(f"✅ Phrase gespeichert: {filename}")
|
||||
|
||||
i += 2
|
||||
else:
|
||||
i += 1
|
||||
|
||||
print("🏁 Alles fertig!")
|
||||
Reference in New Issue
Block a user