first successful export
This commit is contained in:
@@ -8,6 +8,7 @@ import (
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Segment is a time interval [Start, End) in seconds.
|
||||
@@ -24,8 +25,8 @@ type wsMsg struct {
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func send(msg wsMsg, broadcast func([]byte)) {
|
||||
data, _ := json.Marshal(msg)
|
||||
func send(msg *wsMsg, broadcast func([]byte)) {
|
||||
data, _ := json.Marshal(&msg)
|
||||
broadcast(data)
|
||||
}
|
||||
|
||||
@@ -36,11 +37,7 @@ func send(msg wsMsg, broadcast func([]byte)) {
|
||||
// DetectSpeechSegments runs ffmpeg's silencedetect filter and returns the
|
||||
// non-silent (speech) segments. A "segments" WebSocket message is broadcast
|
||||
// when detection finishes.
|
||||
func DetectSpeechSegments(
|
||||
inputPath string,
|
||||
noiseDb, minDuration, padding float64,
|
||||
broadcast func([]byte),
|
||||
) ([]Segment, error) {
|
||||
func DetectSpeechSegments(inputPath string, noiseDb, minDuration, padding float64, broadcast func([]byte)) ([]Segment, error) {
|
||||
filter := fmt.Sprintf("silencedetect=noise=%.0fdB:d=%.2f", noiseDb, minDuration)
|
||||
|
||||
cmd := exec.Command("ffmpeg",
|
||||
@@ -91,9 +88,9 @@ func DetectSpeechSegments(
|
||||
return nil, fmt.Errorf("ffmpeg: %w", err)
|
||||
}
|
||||
|
||||
segments := invertSilences(silences, totalDuration, padding)
|
||||
segments := removeSilence(silences, totalDuration, padding)
|
||||
|
||||
send(wsMsg{
|
||||
send(&wsMsg{
|
||||
Type: "segments",
|
||||
Segments: segments,
|
||||
Duration: totalDuration,
|
||||
@@ -104,9 +101,9 @@ func DetectSpeechSegments(
|
||||
|
||||
type silenceInterval struct{ start, end float64 }
|
||||
|
||||
// invertSilences turns silence regions into speech regions, with a small
|
||||
// removeSilence turns silence regions into speech regions, with a small
|
||||
// padding buffer so words at the edges don't get clipped.
|
||||
func invertSilences(silences []silenceInterval, totalDuration, padding float64) []Segment {
|
||||
func removeSilence(silences []silenceInterval, totalDuration, padding float64) []Segment {
|
||||
if len(silences) == 0 {
|
||||
return []Segment{{Start: 0, End: totalDuration}}
|
||||
}
|
||||
@@ -136,37 +133,41 @@ func invertSilences(silences []silenceInterval, totalDuration, padding float64)
|
||||
return segments
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
// Export
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// ExportSegments concatenates the given segments using ffmpeg's concat demuxer
|
||||
// (stream-copy, no re-encode). Progress is streamed via broadcast.
|
||||
func ExportSegments(
|
||||
inputPath, outputPath string,
|
||||
segments []Segment,
|
||||
broadcast func([]byte),
|
||||
) error {
|
||||
concatPath := outputPath + ".concat.txt"
|
||||
f, err := os.Create(concatPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create concat file: %w", err)
|
||||
}
|
||||
defer os.Remove(concatPath)
|
||||
|
||||
totalDuration := 0.0
|
||||
for _, seg := range segments {
|
||||
totalDuration += seg.End - seg.Start
|
||||
fmt.Fprintf(f, "file '%s'\ninpoint %.6f\noutpoint %.6f\n",
|
||||
inputPath, seg.Start, seg.End)
|
||||
}
|
||||
f.Close()
|
||||
|
||||
// Build a filter_complex that trims each segment and resets timestamps,
|
||||
// then concatenates them. This avoids non-monotonic DTS issues that occur
|
||||
// when stream-copying segments with their original timestamps.
|
||||
var filterParts []string
|
||||
var concatInputs string
|
||||
for i, seg := range segments {
|
||||
filterParts = append(filterParts,
|
||||
fmt.Sprintf("[0:v]trim=start=%.6f:end=%.6f,setpts=PTS-STARTPTS[v%d]", seg.Start, seg.End, i),
|
||||
fmt.Sprintf("[0:a]atrim=start=%.6f:end=%.6f,asetpts=PTS-STARTPTS[a%d]", seg.Start, seg.End, i),
|
||||
)
|
||||
concatInputs += fmt.Sprintf("[v%d][a%d]", i, i)
|
||||
}
|
||||
filterParts = append(filterParts,
|
||||
fmt.Sprintf("%sconcat=n=%d:v=1:a=1[outv][outa]", concatInputs, len(segments)),
|
||||
)
|
||||
filterComplex := strings.Join(filterParts, ";")
|
||||
|
||||
cmd := exec.Command("ffmpeg",
|
||||
"-f", "concat",
|
||||
"-safe", "0",
|
||||
"-i", concatPath,
|
||||
"-c", "copy",
|
||||
"-i", inputPath,
|
||||
"-filter_complex", filterComplex,
|
||||
"-map", "[outv]",
|
||||
"-map", "[outa]",
|
||||
// around 80% compression
|
||||
"-c:v", "libx264", "-crf", "28", "-preset", "fast",
|
||||
"-c:a", "aac", "-b:a", "128k",
|
||||
"-progress", "pipe:1",
|
||||
"-y",
|
||||
outputPath,
|
||||
@@ -194,7 +195,7 @@ func ExportSegments(
|
||||
if pct > 100 {
|
||||
pct = 100
|
||||
}
|
||||
send(wsMsg{Type: "progress", Percent: pct}, broadcast)
|
||||
send(&wsMsg{Type: "progress", Percent: pct}, broadcast)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user