.remake audio quality analyzer
This commit is contained in:
+21
-184
@@ -2,170 +2,26 @@ package backend
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-flac/go-flac"
|
||||
mewflac "github.com/mewkiz/flac"
|
||||
)
|
||||
|
||||
type AnalysisResult struct {
|
||||
FilePath string `json:"file_path"`
|
||||
FileSize int64 `json:"file_size"`
|
||||
SampleRate uint32 `json:"sample_rate"`
|
||||
Channels uint8 `json:"channels"`
|
||||
BitsPerSample uint8 `json:"bits_per_sample"`
|
||||
TotalSamples uint64 `json:"total_samples"`
|
||||
Duration float64 `json:"duration"`
|
||||
Bitrate int `json:"bit_rate"`
|
||||
BitDepth string `json:"bit_depth"`
|
||||
DynamicRange float64 `json:"dynamic_range"`
|
||||
PeakAmplitude float64 `json:"peak_amplitude"`
|
||||
RMSLevel float64 `json:"rms_level"`
|
||||
Spectrum *SpectrumData `json:"spectrum,omitempty"`
|
||||
}
|
||||
|
||||
func AnalyzeTrack(filepath string) (*AnalysisResult, error) {
|
||||
if !fileExists(filepath) {
|
||||
return nil, fmt.Errorf("file does not exist: %s", filepath)
|
||||
}
|
||||
|
||||
fileInfo, err := os.Stat(filepath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get file info: %w", err)
|
||||
}
|
||||
|
||||
f, err := flac.ParseFile(filepath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse FLAC file: %w", err)
|
||||
}
|
||||
|
||||
result := &AnalysisResult{
|
||||
FilePath: filepath,
|
||||
FileSize: fileInfo.Size(),
|
||||
}
|
||||
|
||||
if len(f.Meta) > 0 {
|
||||
streamInfo := f.Meta[0]
|
||||
if streamInfo.Type == flac.StreamInfo {
|
||||
|
||||
data := streamInfo.Data
|
||||
if len(data) >= 18 {
|
||||
|
||||
result.SampleRate = uint32(data[10])<<12 | uint32(data[11])<<4 | uint32(data[12])>>4
|
||||
|
||||
result.Channels = ((data[12] >> 1) & 0x07) + 1
|
||||
|
||||
result.BitsPerSample = ((data[12]&0x01)<<4 | data[13]>>4) + 1
|
||||
|
||||
result.TotalSamples = uint64(data[13]&0x0F)<<32 |
|
||||
uint64(data[14])<<24 |
|
||||
uint64(data[15])<<16 |
|
||||
uint64(data[16])<<8 |
|
||||
uint64(data[17])
|
||||
|
||||
if result.SampleRate > 0 {
|
||||
result.Duration = float64(result.TotalSamples) / float64(result.SampleRate)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spectrum, err := AnalyzeSpectrum(filepath)
|
||||
if err != nil {
|
||||
|
||||
fmt.Printf("Warning: failed to analyze spectrum: %v\n", err)
|
||||
} else {
|
||||
result.Spectrum = spectrum
|
||||
|
||||
calculateRealAudioMetrics(result, filepath)
|
||||
}
|
||||
|
||||
result.BitDepth = fmt.Sprintf("%d-bit", result.BitsPerSample)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func calculateRealAudioMetrics(result *AnalysisResult, filepath string) {
|
||||
|
||||
samples, err := decodeFLACForMetrics(filepath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var peak float64
|
||||
var sumSquares float64
|
||||
|
||||
for _, sample := range samples {
|
||||
absVal := sample
|
||||
if absVal < 0 {
|
||||
absVal = -absVal
|
||||
}
|
||||
if absVal > peak {
|
||||
peak = absVal
|
||||
}
|
||||
sumSquares += sample * sample
|
||||
}
|
||||
|
||||
peakDB := 20.0 * math.Log10(peak)
|
||||
result.PeakAmplitude = peakDB
|
||||
|
||||
rms := math.Sqrt(sumSquares / float64(len(samples)))
|
||||
rmsDB := 20.0 * math.Log10(rms)
|
||||
result.RMSLevel = rmsDB
|
||||
|
||||
result.DynamicRange = peakDB - rmsDB
|
||||
}
|
||||
|
||||
func decodeFLACForMetrics(filepath string) ([]float64, error) {
|
||||
stream, err := mewflac.ParseFile(filepath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer stream.Close()
|
||||
|
||||
maxSamples := 10000000
|
||||
samples := make([]float64, 0, maxSamples)
|
||||
|
||||
for {
|
||||
frame, err := stream.ParseNext()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
var channelSamples []int32
|
||||
if len(frame.Subframes) > 0 {
|
||||
channelSamples = frame.Subframes[0].Samples
|
||||
}
|
||||
|
||||
maxVal := float64(int64(1) << (stream.Info.BitsPerSample - 1))
|
||||
for _, sample := range channelSamples {
|
||||
if len(samples) >= maxSamples {
|
||||
return samples, nil
|
||||
}
|
||||
normalized := float64(sample) / maxVal
|
||||
samples = append(samples, normalized)
|
||||
}
|
||||
|
||||
if len(samples) >= maxSamples {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return samples, nil
|
||||
}
|
||||
|
||||
func GetFileSize(filepath string) (int64, error) {
|
||||
info, err := os.Stat(filepath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return info.Size(), nil
|
||||
FilePath string `json:"file_path"`
|
||||
FileSize int64 `json:"file_size"`
|
||||
SampleRate uint32 `json:"sample_rate"`
|
||||
Channels uint8 `json:"channels"`
|
||||
BitsPerSample uint8 `json:"bits_per_sample"`
|
||||
TotalSamples uint64 `json:"total_samples"`
|
||||
Duration float64 `json:"duration"`
|
||||
Bitrate int `json:"bit_rate"`
|
||||
BitDepth string `json:"bit_depth"`
|
||||
DynamicRange float64 `json:"dynamic_range"`
|
||||
PeakAmplitude float64 `json:"peak_amplitude"`
|
||||
RMSLevel float64 `json:"rms_level"`
|
||||
}
|
||||
|
||||
func GetTrackMetadata(filepath string) (*AnalysisResult, error) {
|
||||
@@ -194,20 +50,23 @@ func GetMetadataWithFFprobe(filePath string) (*AnalysisResult, error) {
|
||||
"-v", "error",
|
||||
"-select_streams", "a:0",
|
||||
"-show_entries", "stream=sample_rate,channels,bits_per_raw_sample,bits_per_sample,duration,bit_rate",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
"-of", "default=noprint_wrappers=0",
|
||||
filePath,
|
||||
}
|
||||
|
||||
cmd := exec.Command(ffprobePath, args...)
|
||||
setHideWindow(cmd)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ffprobe failed: %w - %s", err, string(output))
|
||||
return nil, fmt.Errorf("ffprobe failed: %v - %s", err, string(output))
|
||||
}
|
||||
|
||||
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
|
||||
if len(lines) < 4 {
|
||||
return nil, fmt.Errorf("unexpected ffprobe output: %s", string(output))
|
||||
infoMap := make(map[string]string)
|
||||
lines := strings.Split(string(output), "\n")
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, "=") {
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
infoMap[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
|
||||
}
|
||||
}
|
||||
|
||||
res := &AnalysisResult{
|
||||
@@ -218,28 +77,6 @@ func GetMetadataWithFFprobe(filePath string) (*AnalysisResult, error) {
|
||||
res.FileSize = info.Size()
|
||||
}
|
||||
|
||||
infoMap := make(map[string]string)
|
||||
|
||||
args = []string{
|
||||
"-v", "error",
|
||||
"-select_streams", "a:0",
|
||||
"-show_entries", "stream=sample_rate,channels,bits_per_raw_sample,bits_per_sample,duration,bit_rate",
|
||||
"-of", "default=noprint_wrappers=0",
|
||||
filePath,
|
||||
}
|
||||
cmd = exec.Command(ffprobePath, args...)
|
||||
setHideWindow(cmd)
|
||||
output, err = cmd.CombinedOutput()
|
||||
if err == nil {
|
||||
lines = strings.Split(string(output), "\n")
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, "=") {
|
||||
parts := strings.SplitN(line, "=", 2)
|
||||
infoMap[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if val, ok := infoMap["sample_rate"]; ok {
|
||||
s, _ := strconv.Atoi(val)
|
||||
res.SampleRate = uint32(s)
|
||||
|
||||
@@ -1,222 +0,0 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"math/cmplx"
|
||||
|
||||
"github.com/mewkiz/flac"
|
||||
)
|
||||
|
||||
type SpectrumData struct {
|
||||
TimeSlices []TimeSlice `json:"time_slices"`
|
||||
SampleRate int `json:"sample_rate"`
|
||||
FreqBins int `json:"freq_bins"`
|
||||
Duration float64 `json:"duration"`
|
||||
MaxFreq float64 `json:"max_freq"`
|
||||
}
|
||||
|
||||
type TimeSlice struct {
|
||||
Time float64 `json:"time"`
|
||||
Magnitudes []float64 `json:"magnitudes"`
|
||||
}
|
||||
|
||||
type SpectrumParams struct {
|
||||
FFTSize int `json:"fft_size"`
|
||||
WindowFunction string `json:"window_function"`
|
||||
}
|
||||
|
||||
func DefaultSpectrumParams() SpectrumParams {
|
||||
return SpectrumParams{
|
||||
FFTSize: 4096,
|
||||
WindowFunction: "hann",
|
||||
}
|
||||
}
|
||||
|
||||
func AnalyzeSpectrum(filepath string) (*SpectrumData, error) {
|
||||
return AnalyzeSpectrumWithParams(filepath, DefaultSpectrumParams())
|
||||
}
|
||||
|
||||
func AnalyzeSpectrumWithParams(filepath string, params SpectrumParams) (*SpectrumData, error) {
|
||||
stream, err := flac.ParseFile(filepath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse FLAC: %w", err)
|
||||
}
|
||||
defer stream.Close()
|
||||
|
||||
info := stream.Info
|
||||
sampleRate := int(info.SampleRate)
|
||||
channels := int(info.NChannels)
|
||||
|
||||
samples, err := readSamples(stream, channels)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read samples: %w", err)
|
||||
}
|
||||
|
||||
if len(samples) == 0 {
|
||||
return nil, fmt.Errorf("no audio samples found")
|
||||
}
|
||||
|
||||
fftSize := params.FFTSize
|
||||
validSizes := []int{512, 1024, 2048, 4096, 8192}
|
||||
valid := false
|
||||
for _, s := range validSizes {
|
||||
if fftSize == s {
|
||||
valid = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !valid {
|
||||
fftSize = 4096
|
||||
}
|
||||
|
||||
return calculateSpectrumWithParams(samples, sampleRate, fftSize, params.WindowFunction), nil
|
||||
}
|
||||
|
||||
func readSamples(stream *flac.Stream, channels int) ([]float64, error) {
|
||||
var allSamples []float64
|
||||
maxSamples := 10 * 1024 * 1024
|
||||
|
||||
for {
|
||||
frame, err := stream.ParseNext()
|
||||
if err != nil {
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
for i := 0; i < frame.Subframes[0].NSamples; i++ {
|
||||
var sample float64
|
||||
|
||||
for ch := 0; ch < channels; ch++ {
|
||||
sample += float64(frame.Subframes[ch].Samples[i])
|
||||
}
|
||||
sample /= float64(channels)
|
||||
|
||||
allSamples = append(allSamples, sample)
|
||||
|
||||
if len(allSamples) >= maxSamples {
|
||||
return allSamples, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allSamples, nil
|
||||
}
|
||||
|
||||
func calculateSpectrumWithParams(samples []float64, sampleRate, fftSize int, windowFunc string) *SpectrumData {
|
||||
numTimeSlices := 300
|
||||
|
||||
duration := float64(len(samples)) / float64(sampleRate)
|
||||
|
||||
samplesPerSlice := len(samples) / numTimeSlices
|
||||
if samplesPerSlice < fftSize {
|
||||
samplesPerSlice = fftSize
|
||||
numTimeSlices = len(samples) / fftSize
|
||||
}
|
||||
|
||||
timeSlices := make([]TimeSlice, 0, numTimeSlices)
|
||||
freqBins := fftSize / 2
|
||||
maxFreq := float64(sampleRate) / 2.0
|
||||
|
||||
for i := 0; i < numTimeSlices; i++ {
|
||||
startIdx := i * samplesPerSlice
|
||||
if startIdx+fftSize > len(samples) {
|
||||
break
|
||||
}
|
||||
|
||||
window := samples[startIdx : startIdx+fftSize]
|
||||
windowedSamples := applyWindow(window, windowFunc)
|
||||
|
||||
spectrum := fft(windowedSamples)
|
||||
|
||||
magnitudes := make([]float64, freqBins)
|
||||
for j := 0; j < freqBins; j++ {
|
||||
magnitude := cmplx.Abs(spectrum[j])
|
||||
|
||||
if magnitude < 1e-10 {
|
||||
magnitude = 1e-10
|
||||
}
|
||||
magnitudes[j] = 20 * math.Log10(magnitude)
|
||||
}
|
||||
|
||||
timeSlice := TimeSlice{
|
||||
Time: float64(startIdx) / float64(sampleRate),
|
||||
Magnitudes: magnitudes,
|
||||
}
|
||||
timeSlices = append(timeSlices, timeSlice)
|
||||
}
|
||||
|
||||
return &SpectrumData{
|
||||
TimeSlices: timeSlices,
|
||||
SampleRate: sampleRate,
|
||||
FreqBins: freqBins,
|
||||
Duration: duration,
|
||||
MaxFreq: maxFreq,
|
||||
}
|
||||
}
|
||||
|
||||
func applyWindow(samples []float64, windowType string) []float64 {
|
||||
n := len(samples)
|
||||
windowed := make([]float64, n)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
var w float64
|
||||
switch windowType {
|
||||
case "hamming":
|
||||
w = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/float64(n-1))
|
||||
case "blackman":
|
||||
w = 0.42 - 0.5*math.Cos(2*math.Pi*float64(i)/float64(n-1)) +
|
||||
0.08*math.Cos(4*math.Pi*float64(i)/float64(n-1))
|
||||
case "rectangular":
|
||||
w = 1.0
|
||||
default:
|
||||
w = 0.5 * (1.0 - math.Cos(2*math.Pi*float64(i)/float64(n-1)))
|
||||
}
|
||||
windowed[i] = samples[i] * w
|
||||
}
|
||||
|
||||
return windowed
|
||||
}
|
||||
|
||||
func applyHannWindow(samples []float64) []float64 {
|
||||
return applyWindow(samples, "hann")
|
||||
}
|
||||
|
||||
func fft(samples []float64) []complex128 {
|
||||
n := len(samples)
|
||||
|
||||
x := make([]complex128, n)
|
||||
for i := 0; i < n; i++ {
|
||||
x[i] = complex(samples[i], 0)
|
||||
}
|
||||
|
||||
return fftRecursive(x)
|
||||
}
|
||||
|
||||
func fftRecursive(x []complex128) []complex128 {
|
||||
n := len(x)
|
||||
|
||||
if n <= 1 {
|
||||
return x
|
||||
}
|
||||
|
||||
even := make([]complex128, n/2)
|
||||
odd := make([]complex128, n/2)
|
||||
|
||||
for i := 0; i < n/2; i++ {
|
||||
even[i] = x[2*i]
|
||||
odd[i] = x[2*i+1]
|
||||
}
|
||||
|
||||
evenFFT := fftRecursive(even)
|
||||
oddFFT := fftRecursive(odd)
|
||||
|
||||
result := make([]complex128, n)
|
||||
for k := 0; k < n/2; k++ {
|
||||
t := cmplx.Exp(complex(0, -2*math.Pi*float64(k)/float64(n))) * oddFFT[k]
|
||||
result[k] = evenFFT[k] + t
|
||||
result[k+n/2] = evenFFT[k] - t
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
Reference in New Issue
Block a user