Add FLAC audio quality analysis and spectrum visualization (#110)

Introduces backend support for analyzing FLAC audio files, including technical metrics and frequency spectrum extraction. Adds frontend components and hooks for file selection, analysis, and visualization, integrating a new Audio Quality Analyzer dialog into the UI. Updates types and dependencies to support audio analysis features.
This commit is contained in:
Lukas
2025-11-25 22:05:12 +01:00
committed by GitHub
parent ddf1844237
commit 2172981110
12 changed files with 1145 additions and 0 deletions
+181
View File
@@ -0,0 +1,181 @@
package backend
import (
"fmt"
"math"
"os"
"github.com/go-flac/go-flac"
mewflac "github.com/mewkiz/flac"
)
// AnalysisResult contains the audio analysis data
type AnalysisResult struct {
FilePath string `json:"file_path"`
SampleRate uint32 `json:"sample_rate"`
Channels uint8 `json:"channels"`
BitsPerSample uint8 `json:"bits_per_sample"`
TotalSamples uint64 `json:"total_samples"`
Duration float64 `json:"duration"`
BitDepth string `json:"bit_depth"`
DynamicRange float64 `json:"dynamic_range"`
PeakAmplitude float64 `json:"peak_amplitude"`
RMSLevel float64 `json:"rms_level"`
Spectrum *SpectrumData `json:"spectrum,omitempty"`
}
// AnalyzeTrack performs audio analysis on a FLAC file
func AnalyzeTrack(filepath string) (*AnalysisResult, error) {
if !fileExists(filepath) {
return nil, fmt.Errorf("file does not exist: %s", filepath)
}
// Parse FLAC file
f, err := flac.ParseFile(filepath)
if err != nil {
return nil, fmt.Errorf("failed to parse FLAC file: %w", err)
}
result := &AnalysisResult{
FilePath: filepath,
}
// Extract basic audio properties from STREAMINFO block
if len(f.Meta) > 0 {
streamInfo := f.Meta[0]
if streamInfo.Type == flac.StreamInfo {
// Read STREAMINFO data
data := streamInfo.Data
if len(data) >= 18 {
// Sample rate (bits 10-29 of bytes 10-13)
result.SampleRate = uint32(data[10])<<12 | uint32(data[11])<<4 | uint32(data[12])>>4
// Channels (bits 30-32 of byte 12)
result.Channels = ((data[12] >> 1) & 0x07) + 1
// Bits per sample (bits 33-37 of bytes 12-13)
result.BitsPerSample = ((data[12]&0x01)<<4 | data[13]>>4) + 1
// Total samples (bits 38-73 of bytes 13-17)
result.TotalSamples = uint64(data[13]&0x0F)<<32 |
uint64(data[14])<<24 |
uint64(data[15])<<16 |
uint64(data[16])<<8 |
uint64(data[17])
// Calculate duration
if result.SampleRate > 0 {
result.Duration = float64(result.TotalSamples) / float64(result.SampleRate)
}
// Read min/max frame size and block size for additional analysis
// Min block size (bytes 0-1)
// Max block size (bytes 2-3)
// These can give us hints about encoding quality
}
}
}
// Analyze spectrum and calculate real audio metrics
spectrum, err := AnalyzeSpectrum(filepath)
if err != nil {
// Log error but continue
fmt.Printf("Warning: failed to analyze spectrum: %v\n", err)
} else {
result.Spectrum = spectrum
// Calculate dynamic range, peak, and RMS from decoded samples
calculateRealAudioMetrics(result, filepath)
}
// Set bit depth
result.BitDepth = fmt.Sprintf("%d-bit", result.BitsPerSample)
return result, nil
}
// calculateRealAudioMetrics calculates actual dynamic range, peak, and RMS from decoded audio
func calculateRealAudioMetrics(result *AnalysisResult, filepath string) {
// Decode FLAC to get actual samples
samples, err := decodeFLACForMetrics(filepath)
if err != nil {
return
}
// Calculate peak amplitude
var peak float64
var sumSquares float64
for _, sample := range samples {
absVal := sample
if absVal < 0 {
absVal = -absVal
}
if absVal > peak {
peak = absVal
}
sumSquares += sample * sample
}
// Convert peak to dB (reference: 1.0 = 0 dBFS)
peakDB := 20.0 * math.Log10(peak)
result.PeakAmplitude = peakDB
// Calculate RMS (Root Mean Square)
rms := math.Sqrt(sumSquares / float64(len(samples)))
rmsDB := 20.0 * math.Log10(rms)
result.RMSLevel = rmsDB
// Dynamic range is the difference between peak and RMS
result.DynamicRange = peakDB - rmsDB
}
// decodeFLACForMetrics decodes FLAC file and returns normalized samples for metric calculation
func decodeFLACForMetrics(filepath string) ([]float64, error) {
stream, err := mewflac.ParseFile(filepath)
if err != nil {
return nil, err
}
defer stream.Close()
// Limit samples to prevent memory issues (10 million samples = ~3.8 minutes at 44.1kHz)
maxSamples := 10000000
samples := make([]float64, 0, maxSamples)
// Read all audio frames
for {
frame, err := stream.ParseNext()
if err != nil {
break
}
// Get samples from first channel (mono or left channel)
var channelSamples []int32
if len(frame.Subframes) > 0 {
channelSamples = frame.Subframes[0].Samples
}
// Normalize samples to -1.0 to 1.0 range
maxVal := float64(int64(1) << (stream.Info.BitsPerSample - 1))
for _, sample := range channelSamples {
if len(samples) >= maxSamples {
return samples, nil
}
normalized := float64(sample) / maxVal
samples = append(samples, normalized)
}
if len(samples) >= maxSamples {
break
}
}
return samples, nil
}
func GetFileSize(filepath string) (int64, error) {
info, err := os.Stat(filepath)
if err != nil {
return 0, err
}
return info.Size(), nil
}
+28
View File
@@ -48,3 +48,31 @@ func SelectFolderDialog(ctx context.Context, defaultPath string) (string, error)
return selectedPath, nil
}
func SelectFileDialog(ctx context.Context) (string, error) {
options := wailsRuntime.OpenDialogOptions{
Title: "Select FLAC File for Analysis",
Filters: []wailsRuntime.FileFilter{
{
DisplayName: "FLAC Audio Files (*.flac)",
Pattern: "*.flac",
},
{
DisplayName: "All Files (*.*)",
Pattern: "*.*",
},
},
}
selectedFile, err := wailsRuntime.OpenFileDialog(ctx, options)
if err != nil {
return "", err
}
// If user cancelled, selectedFile will be empty
if selectedFile == "" {
return "", nil
}
return selectedFile, nil
}
+205
View File
@@ -0,0 +1,205 @@
package backend
import (
"fmt"
"math"
"math/cmplx"
"os"
"github.com/mewkiz/flac"
)
// SpectrumData contains frequency spectrum information
type SpectrumData struct {
TimeSlices []TimeSlice `json:"time_slices"`
SampleRate int `json:"sample_rate"`
FreqBins int `json:"freq_bins"`
Duration float64 `json:"duration"`
MaxFreq float64 `json:"max_freq"`
}
// TimeSlice represents spectrum data at a point in time
type TimeSlice struct {
Time float64 `json:"time"`
Magnitudes []float64 `json:"magnitudes"`
}
// AnalyzeSpectrum decodes FLAC file and performs FFT analysis
func AnalyzeSpectrum(filepath string) (*SpectrumData, error) {
// Open FLAC file
stream, err := flac.ParseFile(filepath)
if err != nil {
return nil, fmt.Errorf("failed to parse FLAC: %w", err)
}
defer stream.Close()
info := stream.Info
sampleRate := int(info.SampleRate)
channels := int(info.NChannels)
// Read audio samples
samples, err := readSamples(stream, channels)
if err != nil {
return nil, fmt.Errorf("failed to read samples: %w", err)
}
if len(samples) == 0 {
return nil, fmt.Errorf("no audio samples found")
}
// Calculate spectrum
return calculateSpectrum(samples, sampleRate), nil
}
// readSamples reads and decodes audio samples from FLAC stream
func readSamples(stream *flac.Stream, channels int) ([]float64, error) {
var allSamples []float64
maxSamples := 10 * 1024 * 1024 // Limit to ~10 million samples to avoid memory issues
// Decode frames
for {
frame, err := stream.ParseNext()
if err != nil {
// End of stream
break
}
// Convert samples to float64 and mix channels to mono
for i := 0; i < frame.Subframes[0].NSamples; i++ {
var sample float64
// Mix all channels to mono by averaging
for ch := 0; ch < channels; ch++ {
sample += float64(frame.Subframes[ch].Samples[i])
}
sample /= float64(channels)
allSamples = append(allSamples, sample)
// Limit sample count
if len(allSamples) >= maxSamples {
return allSamples, nil
}
}
}
return allSamples, nil
}
// calculateSpectrum performs FFT analysis on audio samples
func calculateSpectrum(samples []float64, sampleRate int) *SpectrumData {
fftSize := 8192
numTimeSlices := 300
duration := float64(len(samples)) / float64(sampleRate)
samplesPerSlice := len(samples) / numTimeSlices
if samplesPerSlice < fftSize {
samplesPerSlice = fftSize
numTimeSlices = len(samples) / fftSize
}
timeSlices := make([]TimeSlice, 0, numTimeSlices)
freqBins := fftSize / 2
maxFreq := float64(sampleRate) / 2.0
for i := 0; i < numTimeSlices; i++ {
startIdx := i * samplesPerSlice
if startIdx+fftSize > len(samples) {
break
}
window := samples[startIdx : startIdx+fftSize]
windowedSamples := applyHannWindow(window)
spectrum := fft(windowedSamples)
magnitudes := make([]float64, freqBins)
for j := 0; j < freqBins; j++ {
magnitude := cmplx.Abs(spectrum[j])
if magnitude < 1e-10 {
magnitude = 1e-10
}
magnitudes[j] = 20 * math.Log10(magnitude)
}
timeSlice := TimeSlice{
Time: float64(startIdx) / float64(sampleRate),
Magnitudes: magnitudes,
}
timeSlices = append(timeSlices, timeSlice)
}
return &SpectrumData{
TimeSlices: timeSlices,
SampleRate: sampleRate,
FreqBins: freqBins,
Duration: duration,
MaxFreq: maxFreq,
}
}
// applyHannWindow applies Hann window to reduce spectral leakage
func applyHannWindow(samples []float64) []float64 {
n := len(samples)
windowed := make([]float64, n)
for i := 0; i < n; i++ {
window := 0.5 * (1.0 - math.Cos(2.0*math.Pi*float64(i)/float64(n-1)))
windowed[i] = samples[i] * window
}
return windowed
}
// fft performs Fast Fourier Transform using Cooley-Tukey algorithm
func fft(samples []float64) []complex128 {
n := len(samples)
x := make([]complex128, n)
for i := 0; i < n; i++ {
x[i] = complex(samples[i], 0)
}
return fftRecursive(x)
}
// fftRecursive performs recursive FFT
func fftRecursive(x []complex128) []complex128 {
n := len(x)
if n <= 1 {
return x
}
even := make([]complex128, n/2)
odd := make([]complex128, n/2)
for i := 0; i < n/2; i++ {
even[i] = x[2*i]
odd[i] = x[2*i+1]
}
evenFFT := fftRecursive(even)
oddFFT := fftRecursive(odd)
result := make([]complex128, n)
for k := 0; k < n/2; k++ {
t := cmplx.Exp(complex(0, -2*math.Pi*float64(k)/float64(n))) * oddFFT[k]
result[k] = evenFFT[k] + t
result[k+n/2] = evenFFT[k] - t
}
return result
}
// GetFileSize helper
func getSpectrumFileSize(filepath string) (int64, error) {
info, err := os.Stat(filepath)
if err != nil {
return 0, err
}
return info.Size(), nil
}