Initial commit
This commit is contained in:
BIN
mp3cut/SpeechRecognition-3.14.1-py3-none-any.whl
Normal file
BIN
mp3cut/SpeechRecognition-3.14.1-py3-none-any.whl
Normal file
Binary file not shown.
21
mp3cut/convert.py
Executable file
21
mp3cut/convert.py
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/python3
|
||||
# coding=utf-8
|
||||
from pydub import AudioSegment
|
||||
import sys,os
|
||||
|
||||
def convert(wav):
|
||||
# Lade die WAV-Datei
|
||||
audio = AudioSegment.from_wav(wav)
|
||||
|
||||
print(wav)
|
||||
|
||||
output = os.path.splitext(wav)[0]
|
||||
outputname = output+".mp3"
|
||||
# print(outputname)
|
||||
|
||||
# Speichere sie als MP3 (z.B. mit einer Bitrate von 192 kbps)
|
||||
audio.export(outputname, format="mp3", bitrate="192k")
|
||||
|
||||
print("Konvertierung abgeschlossen!")
|
||||
|
||||
convert(sys.argv[1])
|
||||
9
mp3cut/convert.sh
Executable file
9
mp3cut/convert.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
for i in $(ls *.wav)
|
||||
do
|
||||
python3 convert.py $i
|
||||
echo $i
|
||||
rm -f $i
|
||||
done
|
||||
|
||||
56
mp3cut/mp3cut.py
Normal file
56
mp3cut/mp3cut.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from pydub import AudioSegment, silence
|
||||
import speech_recognition as sr
|
||||
import os
|
||||
|
||||
# Lade die MP3-Datei und konvertiere sie zu WAV
|
||||
audio = AudioSegment.from_mp3("20250614.mp3")
|
||||
audio = audio.set_channels(2).set_frame_rate(48000) # Mono & 16kHz für bessere Erkennung
|
||||
|
||||
BUFFER_BEFORE = 300
|
||||
BUFFER_AFTER = 300
|
||||
# Stille erkennen mit optimierten Werten
|
||||
silent_ranges = silence.detect_silence(audio, min_silence_len=400, silence_thresh=-45)
|
||||
silent_ranges = [(start, end) for start, end in silent_ranges if end - start > 200] # Kurze Pausen filtern
|
||||
|
||||
# Wortsegmente bestimmen
|
||||
word_segments = []
|
||||
start = 0
|
||||
for silence_start, silence_end in silent_ranges:
|
||||
word_segments.append((start, silence_start))
|
||||
start = silence_end
|
||||
word_segments.append((start, len(audio))) # Letztes Wort hinzufügen
|
||||
|
||||
# Sprach-Recognizer vorbereiten
|
||||
recognizer = sr.Recognizer()
|
||||
namenr = 1
|
||||
# Jedes Segment speichern und transkribieren
|
||||
for i, (start, end) in enumerate(word_segments):
|
||||
|
||||
start = max(0, start - BUFFER_BEFORE)
|
||||
end = min(len(audio), end + BUFFER_AFTER)
|
||||
segment = audio[start:end]
|
||||
temp_file = f"temp_word_{i+1}.wav"
|
||||
|
||||
segment.export(temp_file, format="wav")
|
||||
#word_text = ""
|
||||
# Spracherkennung durchführen
|
||||
with sr.AudioFile(temp_file) as source:
|
||||
audio_data = recognizer.record(source)
|
||||
try:
|
||||
word_text = recognizer.recognize_google(audio_data, language="sv-SE") # Schwedisch
|
||||
print("Erkanntes Wort:", word_text)
|
||||
word_text = word_text.replace(" ", "_") # Leerzeichen durch Unterstrich ersetzen
|
||||
final_file = f"{word_text}.wav"
|
||||
print(f"Wort {i+1}: {word_text} -> Gespeichert als {final_file}")
|
||||
os.rename(temp_file, final_file)
|
||||
except sr.UnknownValueError:
|
||||
print("Google konnte nichts erkennen.")
|
||||
final_file = f"{namenr}.wav"
|
||||
os.rename(temp_file, final_file)
|
||||
print(f"{namenr} -> Gespeichert als {final_file}")
|
||||
namenr = namenr + 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"Fehler: {e}")
|
||||
|
||||
print("Fertig! Alle Worte wurden gespeichert.")
|
||||
Reference in New Issue
Block a user