01 in Namen konvertiert

This commit is contained in:
2026-01-29 19:50:33 +01:00
parent da93fd80ed
commit af13ac0f5a
54 changed files with 55 additions and 2 deletions
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
View File
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
Binary file not shown.
+53
View File
@@ -0,0 +1,53 @@
import sys
import whisper
import os
import re
import ssl
def transcribe_and_fix(mp3_file):
# 1. Whisper Modell laden (Base reicht für einzelne Wörter meist aus)
# 1. SSL-Check für Standard-Python-Tools deaktivieren
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and
getattr(ssl, '_create_unverified_context', None)):
ssl._create_default_https_context = ssl._create_unverified_context
# 2. Umgebungsvariable setzen, die viele Bibliotheken (wie certifi) erkennen
os.environ['CURL_CA_BUNDLE'] = ""
os.environ['PYTHONHTTPSVERIFY'] = "0"
model = whisper.load_model("large-v3")
directory = os.path.dirname(mp3_file)
#ssl._create_default_https_context = ssl._create_unverified_context
print("Analysiere Audio...")
# 'language="sv"' zwingt Whisper auf Schwedisch
result = model.transcribe(mp3_file, language="sv", fp16=False)
text = result['text'].strip()
# 2. Fehlerhafte Zeichen & Leerzeichen korrigieren
# Wir entfernen alles, was kein Buchstabe oder Zahl ist
# und ersetzen Leerzeichen durch Unterstriche
clean_text = re.sub(r'[^\w\säöåÄÖÅ]', '', text) # Behält schwedische Zeichen
clean_text = clean_text.replace(" ", "_").lower()
if not clean_text:
print("Konnte keinen Text extrahieren.")
return
new_filename = f"{clean_text}.mp3"
# 3. Umbenennen
try:
os.rename(mp3_file, directory +"/" + new_filename)
print(f"Erfolg: '{mp3_file}' -> '{new_filename}'")
except Exception as e:
print(f"Fehler beim Umbenennen: {e}")
# Testlauf
transcribe_and_fix(sys.argv[1])
BIN
View File
Binary file not shown.
+2 -2
View File
@@ -3,13 +3,13 @@ import speech_recognition as sr
import os import os
# Lade die MP3-Datei und konvertiere sie zu WAV # Lade die MP3-Datei und konvertiere sie zu WAV
audio = AudioSegment.from_mp3("20250614.mp3") audio = AudioSegment.from_mp3("/Users/svenriwoldt/PycharmProjects/CSV2ANKI/mp3/01/5002.mp3")
audio = audio.set_channels(2).set_frame_rate(48000) # Mono & 16kHz für bessere Erkennung audio = audio.set_channels(2).set_frame_rate(48000) # Mono & 16kHz für bessere Erkennung
BUFFER_BEFORE = 300 BUFFER_BEFORE = 300
BUFFER_AFTER = 300 BUFFER_AFTER = 300
# Stille erkennen mit optimierten Werten # Stille erkennen mit optimierten Werten
silent_ranges = silence.detect_silence(audio, min_silence_len=400, silence_thresh=-45) silent_ranges = silence.detect_silence(audio, min_silence_len=400, silence_thresh=0)
silent_ranges = [(start, end) for start, end in silent_ranges if end - start > 200] # Kurze Pausen filtern silent_ranges = [(start, end) for start, end in silent_ranges if end - start > 200] # Kurze Pausen filtern
# Wortsegmente bestimmen # Wortsegmente bestimmen