first commit
This commit is contained in:
206
transcode/transcode.go
Normal file
206
transcode/transcode.go
Normal file
@@ -0,0 +1,206 @@
|
||||
package transcode
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Segment is a time interval [Start, End) in seconds.
|
||||
type Segment struct {
|
||||
Start float64 `json:"start"`
|
||||
End float64 `json:"end"`
|
||||
}
|
||||
|
||||
type wsMsg struct {
|
||||
Type string `json:"type"`
|
||||
Segments []Segment `json:"segments,omitempty"`
|
||||
Duration float64 `json:"duration,omitempty"`
|
||||
Percent float64 `json:"percent,omitempty"`
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func send(msg wsMsg, broadcast func([]byte)) {
|
||||
data, _ := json.Marshal(msg)
|
||||
broadcast(data)
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
// Silence detection
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// DetectSpeechSegments runs ffmpeg's silencedetect filter and returns the
|
||||
// non-silent (speech) segments. A "segments" WebSocket message is broadcast
|
||||
// when detection finishes.
|
||||
func DetectSpeechSegments(
|
||||
inputPath string,
|
||||
noiseDb, minDuration, padding float64,
|
||||
broadcast func([]byte),
|
||||
) ([]Segment, error) {
|
||||
filter := fmt.Sprintf("silencedetect=noise=%.0fdB:d=%.2f", noiseDb, minDuration)
|
||||
|
||||
cmd := exec.Command("ffmpeg",
|
||||
"-i", inputPath,
|
||||
"-af", filter,
|
||||
"-f", "null", "-",
|
||||
)
|
||||
|
||||
// ffmpeg writes silencedetect output to stderr
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("ffmpeg start: %w", err)
|
||||
}
|
||||
|
||||
var (
|
||||
silences []silenceInterval
|
||||
pendingStart float64
|
||||
totalDuration float64
|
||||
|
||||
durationRe = regexp.MustCompile(`Duration:\s+(\d+):(\d+):([0-9.]+)`)
|
||||
startRe = regexp.MustCompile(`silence_start:\s*([0-9.e+\-]+)`)
|
||||
endRe = regexp.MustCompile(`silence_end:\s*([0-9.e+\-]+)`)
|
||||
)
|
||||
|
||||
scanner := bufio.NewScanner(stderr)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
if m := durationRe.FindStringSubmatch(line); m != nil {
|
||||
h, _ := strconv.ParseFloat(m[1], 64)
|
||||
min, _ := strconv.ParseFloat(m[2], 64)
|
||||
s, _ := strconv.ParseFloat(m[3], 64)
|
||||
totalDuration = h*3600 + min*60 + s
|
||||
}
|
||||
if m := startRe.FindStringSubmatch(line); m != nil {
|
||||
pendingStart, _ = strconv.ParseFloat(m[1], 64)
|
||||
}
|
||||
if m := endRe.FindStringSubmatch(line); m != nil {
|
||||
end, _ := strconv.ParseFloat(m[1], 64)
|
||||
silences = append(silences, silenceInterval{start: pendingStart, end: end})
|
||||
}
|
||||
}
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return nil, fmt.Errorf("ffmpeg: %w", err)
|
||||
}
|
||||
|
||||
segments := invertSilences(silences, totalDuration, padding)
|
||||
|
||||
send(wsMsg{
|
||||
Type: "segments",
|
||||
Segments: segments,
|
||||
Duration: totalDuration,
|
||||
}, broadcast)
|
||||
|
||||
return segments, nil
|
||||
}
|
||||
|
||||
type silenceInterval struct{ start, end float64 }
|
||||
|
||||
// invertSilences turns silence regions into speech regions, with a small
|
||||
// padding buffer so words at the edges don't get clipped.
|
||||
func invertSilences(silences []silenceInterval, totalDuration, padding float64) []Segment {
|
||||
if len(silences) == 0 {
|
||||
return []Segment{{Start: 0, End: totalDuration}}
|
||||
}
|
||||
|
||||
var segments []Segment
|
||||
cursor := 0.0
|
||||
|
||||
for _, s := range silences {
|
||||
segEnd := s.start + padding
|
||||
if segEnd > totalDuration {
|
||||
segEnd = totalDuration
|
||||
}
|
||||
if segEnd-cursor > 0.05 {
|
||||
segments = append(segments, Segment{Start: cursor, End: segEnd})
|
||||
}
|
||||
next := s.end - padding
|
||||
if next < segEnd {
|
||||
next = segEnd
|
||||
}
|
||||
cursor = next
|
||||
}
|
||||
|
||||
if cursor < totalDuration-0.01 {
|
||||
segments = append(segments, Segment{Start: cursor, End: totalDuration})
|
||||
}
|
||||
|
||||
return segments
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
// Export
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// ExportSegments concatenates the given segments using ffmpeg's concat demuxer
|
||||
// (stream-copy, no re-encode). Progress is streamed via broadcast.
|
||||
func ExportSegments(
|
||||
inputPath, outputPath string,
|
||||
segments []Segment,
|
||||
broadcast func([]byte),
|
||||
) error {
|
||||
concatPath := outputPath + ".concat.txt"
|
||||
f, err := os.Create(concatPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create concat file: %w", err)
|
||||
}
|
||||
defer os.Remove(concatPath)
|
||||
|
||||
totalDuration := 0.0
|
||||
for _, seg := range segments {
|
||||
totalDuration += seg.End - seg.Start
|
||||
fmt.Fprintf(f, "file '%s'\ninpoint %.6f\noutpoint %.6f\n",
|
||||
inputPath, seg.Start, seg.End)
|
||||
}
|
||||
f.Close()
|
||||
|
||||
cmd := exec.Command("ffmpeg",
|
||||
"-f", "concat",
|
||||
"-safe", "0",
|
||||
"-i", concatPath,
|
||||
"-c", "copy",
|
||||
"-progress", "pipe:1",
|
||||
"-y",
|
||||
outputPath,
|
||||
)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cmd.Stderr = os.Stderr
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("ffmpeg start: %w", err)
|
||||
}
|
||||
|
||||
timeRe := regexp.MustCompile(`out_time_ms=(\d+)`)
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
for scanner.Scan() {
|
||||
if m := timeRe.FindStringSubmatch(scanner.Text()); m != nil {
|
||||
ms, err := strconv.ParseFloat(m[1], 64)
|
||||
if err != nil || ms < 0 {
|
||||
continue
|
||||
}
|
||||
pct := (ms / 1e6) / totalDuration * 100
|
||||
if pct > 100 {
|
||||
pct = 100
|
||||
}
|
||||
send(wsMsg{Type: "progress", Percent: pct}, broadcast)
|
||||
}
|
||||
}
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return fmt.Errorf("ffmpeg: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user