.fix audio analyzer alac

This commit is contained in:
afkarxyz
2026-04-02 10:31:56 +07:00
parent cfcb890469
commit 9314b8ec99
4 changed files with 274 additions and 71 deletions
+8
View File
@@ -1298,6 +1298,14 @@ func (a *App) ReadFileAsBase64(filePath string) (string, error) {
return base64.StdEncoding.EncodeToString(content), nil return base64.StdEncoding.EncodeToString(content), nil
} }
func (a *App) DecodeAudioForAnalysis(filePath string) (*backend.AnalysisDecodeResponse, error) {
if filePath == "" {
return nil, fmt.Errorf("file path is required")
}
return backend.DecodeAudioForAnalysis(filePath)
}
func (a *App) RenameFileTo(oldPath, newName string) error { func (a *App) RenameFileTo(oldPath, newName string) error {
dir := filepath.Dir(oldPath) dir := filepath.Dir(oldPath)
ext := filepath.Ext(oldPath) ext := filepath.Ext(oldPath)
+99
View File
@@ -1,6 +1,8 @@
package backend package backend
import ( import (
"bytes"
"encoding/base64"
"fmt" "fmt"
"os" "os"
"os/exec" "os/exec"
@@ -24,6 +26,16 @@ type AnalysisResult struct {
RMSLevel float64 `json:"rms_level"` RMSLevel float64 `json:"rms_level"`
} }
type AnalysisDecodeResponse struct {
PCMBase64 string `json:"pcm_base64"`
SampleRate uint32 `json:"sample_rate"`
Channels uint8 `json:"channels"`
BitsPerSample uint8 `json:"bits_per_sample"`
Duration float64 `json:"duration"`
BitrateKbps int `json:"bitrate_kbps,omitempty"`
BitDepth string `json:"bit_depth,omitempty"`
}
func GetTrackMetadata(filepath string) (*AnalysisResult, error) { func GetTrackMetadata(filepath string) (*AnalysisResult, error) {
if !fileExists(filepath) { if !fileExists(filepath) {
return nil, fmt.Errorf("file does not exist: %s", filepath) return nil, fmt.Errorf("file does not exist: %s", filepath)
@@ -113,3 +125,90 @@ func GetMetadataWithFFprobe(filePath string) (*AnalysisResult, error) {
return res, nil return res, nil
} }
func DecodeAudioForAnalysis(filePath string) (*AnalysisDecodeResponse, error) {
metadata, err := GetTrackMetadata(filePath)
if err != nil {
return nil, err
}
pcmBase64, err := extractAnalysisPCMBase64(filePath)
if err != nil {
return nil, err
}
resp := &AnalysisDecodeResponse{
PCMBase64: pcmBase64,
SampleRate: metadata.SampleRate,
Channels: metadata.Channels,
BitsPerSample: metadata.BitsPerSample,
Duration: metadata.Duration,
BitDepth: metadata.BitDepth,
}
if metadata.Bitrate > 0 {
resp.BitrateKbps = metadata.Bitrate / 1000
}
return resp, nil
}
func extractAnalysisPCMBase64(filePath string) (string, error) {
ffmpegPath, err := GetFFmpegPath()
if err != nil {
return "", err
}
argSets := [][]string{
{
"-v", "error",
"-i", filePath,
"-vn",
"-map", "0:a:0",
"-af", "pan=mono|c0=c0",
"-f", "s16le",
"-acodec", "pcm_s16le",
"pipe:1",
},
{
"-v", "error",
"-i", filePath,
"-vn",
"-map", "0:a:0",
"-ac", "1",
"-f", "s16le",
"-acodec", "pcm_s16le",
"pipe:1",
},
}
var lastErr error
for _, args := range argSets {
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd := exec.Command(ffmpegPath, args...)
setHideWindow(cmd)
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
lastErr = fmt.Errorf("ffmpeg analysis decode failed: %w - %s", err, strings.TrimSpace(stderr.String()))
continue
}
if stdout.Len() == 0 {
lastErr = fmt.Errorf("ffmpeg analysis decode returned empty PCM output")
continue
}
return base64.StdEncoding.EncodeToString(stdout.Bytes()), nil
}
if lastErr != nil {
return "", lastErr
}
return "", fmt.Errorf("ffmpeg analysis decode failed")
}
+82 -6
View File
@@ -2,7 +2,7 @@ import { useState, useCallback, useRef, useEffect, type MutableRefObject } from
import type { AnalysisResult } from "@/types/api"; import type { AnalysisResult } from "@/types/api";
import { logger } from "@/lib/logger"; import { logger } from "@/lib/logger";
import { toastWithSound as toast } from "@/lib/toast-with-sound"; import { toastWithSound as toast } from "@/lib/toast-with-sound";
import { analyzeAudioArrayBuffer, analyzeAudioFile, analyzeSpectrumFromSamples, type AnalysisProgress, } from "@/lib/flac-analysis"; import { analyzeAudioArrayBuffer, analyzeAudioFile, analyzeDecodedSamples, analyzeSpectrumFromSamples, parseAudioMetadataFromInput, pcm16MonoArrayBufferToFloat32Samples, type AnalysisProgress, type FrontendAnalysisPayload, type ParsedAudioMetadata, } from "@/lib/flac-analysis";
import { loadAudioAnalysisPreferences } from "@/lib/audio-analysis-preferences"; import { loadAudioAnalysisPreferences } from "@/lib/audio-analysis-preferences";
type WindowFunction = "hann" | "hamming" | "blackman" | "rectangular"; type WindowFunction = "hann" | "hamming" | "blackman" | "rectangular";
function toWindowFunction(value: string): WindowFunction { function toWindowFunction(value: string): WindowFunction {
@@ -60,6 +60,25 @@ const DEFAULT_PROGRESS_STATE: ProgressState = {
interface CancelToken { interface CancelToken {
cancelled: boolean; cancelled: boolean;
} }
interface WailsWindow extends Window {
go?: {
main?: {
App?: {
ReadFileAsBase64?: (path: string) => Promise<string>;
DecodeAudioForAnalysis?: (path: string) => Promise<BackendAnalysisDecodeResponse>;
};
};
};
}
interface BackendAnalysisDecodeResponse {
pcm_base64: string;
sample_rate: number;
channels: number;
bits_per_sample: number;
duration: number;
bitrate_kbps?: number;
bit_depth?: string;
}
function cancelToken(tokenRef: MutableRefObject<CancelToken | null>): void { function cancelToken(tokenRef: MutableRefObject<CancelToken | null>): void {
if (tokenRef.current) { if (tokenRef.current) {
tokenRef.current.cancelled = true; tokenRef.current.cancelled = true;
@@ -81,6 +100,23 @@ function toProgressState(progress: AnalysisProgress): ProgressState {
message: progress.message, message: progress.message,
}; };
} }
function isDecodeFailure(error: unknown): boolean {
return error instanceof Error && /decode/i.test(error.message);
}
function mergeBackendDecodedMetadata(parsed: ParsedAudioMetadata, decoded: BackendAnalysisDecodeResponse): ParsedAudioMetadata {
const sampleRate = decoded.sample_rate > 0 ? decoded.sample_rate : parsed.sampleRate;
const bitsPerSample = decoded.bits_per_sample > 0 ? decoded.bits_per_sample : parsed.bitsPerSample;
const duration = decoded.duration > 0 ? decoded.duration : parsed.duration;
return {
...parsed,
sampleRate,
channels: decoded.channels > 0 ? decoded.channels : parsed.channels,
bitsPerSample,
totalSamples: duration > 0 && sampleRate > 0 ? Math.floor(duration * sampleRate) : parsed.totalSamples,
duration,
bitrateKbps: decoded.bitrate_kbps ?? parsed.bitrateKbps,
};
}
export function useAudioAnalysis() { export function useAudioAnalysis() {
const [analyzing, setAnalyzing] = useState(false); const [analyzing, setAnalyzing] = useState(false);
const [analysisProgress, setAnalysisProgress] = useState<ProgressState>(DEFAULT_PROGRESS_STATE); const [analysisProgress, setAnalysisProgress] = useState<ProgressState>(DEFAULT_PROGRESS_STATE);
@@ -189,7 +225,7 @@ export function useAudioAnalysis() {
logger.info(`Analyzing audio file (frontend from path): ${filePath}`); logger.info(`Analyzing audio file (frontend from path): ${filePath}`);
const start = Date.now(); const start = Date.now();
const prefs = loadAudioAnalysisPreferences(); const prefs = loadAudioAnalysisPreferences();
const readFileAsBase64 = (window as any)?.go?.main?.App?.ReadFileAsBase64 as ((path: string) => Promise<string>) | undefined; const readFileAsBase64 = (window as WailsWindow).go?.main?.App?.ReadFileAsBase64;
if (!readFileAsBase64) { if (!readFileAsBase64) {
throw new Error("ReadFileAsBase64 backend method is unavailable"); throw new Error("ReadFileAsBase64 backend method is unavailable");
} }
@@ -211,14 +247,16 @@ export function useAudioAnalysis() {
message: "Preparing audio buffer...", message: "Preparing audio buffer...",
}); });
const fileName = fileNameFromPath(filePath); const fileName = fileNameFromPath(filePath);
const payload = await analyzeAudioArrayBuffer({ const input = {
fileName, fileName,
fileSize: arrayBuffer.byteLength, fileSize: arrayBuffer.byteLength,
arrayBuffer, arrayBuffer,
}, { };
const analysisParams = {
fftSize: prefs.fftSize, fftSize: prefs.fftSize,
windowFunction: prefs.windowFunction, windowFunction: prefs.windowFunction,
}, (progress) => { } as const;
const updateProgress = (progress: AnalysisProgress) => {
if (token.cancelled) if (token.cancelled)
return; return;
const mappedPercent = 10 + (progress.percent * 0.9); const mappedPercent = 10 + (progress.percent * 0.9);
@@ -226,7 +264,45 @@ export function useAudioAnalysis() {
percent: Math.round(Math.max(0, Math.min(100, mappedPercent))), percent: Math.round(Math.max(0, Math.min(100, mappedPercent))),
message: progress.message, message: progress.message,
}); });
}, () => token.cancelled); };
let payload: FrontendAnalysisPayload;
try {
payload = await analyzeAudioArrayBuffer(input, analysisParams, updateProgress, () => token.cancelled);
}
catch (err) {
if (!isDecodeFailure(err)) {
throw err;
}
const decodeAudioForAnalysis = (window as WailsWindow).go?.main?.App?.DecodeAudioForAnalysis;
if (!decodeAudioForAnalysis) {
throw err;
}
logger.warning(`Browser decoder failed for ${fileName}; trying FFmpeg fallback`);
setAnalysisProgress({
percent: 18,
message: "Browser decoder failed, trying FFmpeg fallback...",
});
const decoded = await decodeAudioForAnalysis(filePath);
if (token.cancelled) {
return null;
}
setAnalysisProgress({
percent: 24,
message: "Decoding audio with FFmpeg...",
});
const pcmBase64 = decoded.pcm_base64 || "";
if (!pcmBase64) {
throw new Error("FFmpeg analysis decode returned no PCM data");
}
const pcmBuffer = await base64ToArrayBuffer(pcmBase64, () => token.cancelled);
if (token.cancelled) {
return null;
}
const parsedMetadata = parseAudioMetadataFromInput(input);
const mergedMetadata = mergeBackendDecodedMetadata(parsedMetadata, decoded);
const samples = pcm16MonoArrayBufferToFloat32Samples(pcmBuffer);
payload = await analyzeDecodedSamples(input, mergedMetadata, samples, analysisParams, updateProgress, () => token.cancelled, mergedMetadata.duration);
}
if (token.cancelled) { if (token.cancelled) {
return null; return null;
} }
+36 -16
View File
@@ -17,8 +17,8 @@ const MP4_CONTAINER_TYPES = new Set([
"moov", "trak", "mdia", "minf", "stbl", "edts", "dinf", "moov", "trak", "mdia", "minf", "stbl", "edts", "dinf",
"udta", "ilst", "meta", "stsd", "wave", "udta", "ilst", "meta", "stsd", "wave",
]); ]);
type SupportedAudioFileType = "FLAC" | "MP3" | "M4A" | "AAC"; export type SupportedAudioFileType = "FLAC" | "MP3" | "M4A" | "AAC";
interface ParsedAudioMetadata { export interface ParsedAudioMetadata {
fileType: SupportedAudioFileType; fileType: SupportedAudioFileType;
sampleRate: number; sampleRate: number;
channels: number; channels: number;
@@ -417,7 +417,7 @@ function parseM4aMetadata(buffer: ArrayBuffer): ParsedAudioMetadata {
} }
} }
} }
else if ((box.type === "mp4a" || box.type === "aac ") && box.offset + 36 <= boxEnd) { else if ((box.type === "mp4a" || box.type === "aac " || box.type === "alac") && box.offset + 36 <= boxEnd) {
channels = view.getUint16(box.offset + 24, false) || channels; channels = view.getUint16(box.offset + 24, false) || channels;
bitsPerSample = view.getUint16(box.offset + 26, false) || bitsPerSample; bitsPerSample = view.getUint16(box.offset + 26, false) || bitsPerSample;
if (!sampleRate) { if (!sampleRate) {
@@ -455,7 +455,7 @@ function parseM4aMetadata(buffer: ArrayBuffer): ParsedAudioMetadata {
duration, duration,
}; };
} }
function parseAudioMetadata(input: AudioArrayBufferInput): ParsedAudioMetadata { export function parseAudioMetadataFromInput(input: AudioArrayBufferInput): ParsedAudioMetadata {
const fileType = detectAudioFileType(input.arrayBuffer, input.fileName); const fileType = detectAudioFileType(input.arrayBuffer, input.fileName);
switch (fileType) { switch (fileType) {
case "FLAC": return parseFlacMetadata(input.arrayBuffer); case "FLAC": return parseFlacMetadata(input.arrayBuffer);
@@ -465,6 +465,15 @@ function parseAudioMetadata(input: AudioArrayBufferInput): ParsedAudioMetadata {
default: throw new Error(`Unsupported audio format: ${input.fileName || "unknown"}`); default: throw new Error(`Unsupported audio format: ${input.fileName || "unknown"}`);
} }
} }
export function pcm16MonoArrayBufferToFloat32Samples(buffer: ArrayBuffer): Float32Array {
const sampleCount = Math.floor(buffer.byteLength / 2);
const samples = new Float32Array(sampleCount);
const view = new DataView(buffer);
for (let i = 0; i < sampleCount; i++) {
samples[i] = view.getInt16(i * 2, true) / 32768;
}
return samples;
}
function buildWindowCoefficients(size: number, windowFunction: SpectrumParams["windowFunction"]): Float32Array { function buildWindowCoefficients(size: number, windowFunction: SpectrumParams["windowFunction"]): Float32Array {
const coeffs = new Float32Array(size); const coeffs = new Float32Array(size);
if (size <= 1) { if (size <= 1) {
@@ -649,7 +658,7 @@ export async function analyzeAudioFile(file: File, params: SpectrumParams = DEFA
export async function analyzeAudioArrayBuffer(input: AudioArrayBufferInput, params: SpectrumParams = DEFAULT_PARAMS, onProgress?: AnalysisProgressCallback, shouldCancel?: AnalysisCancelCheck): Promise<FrontendAnalysisPayload> { export async function analyzeAudioArrayBuffer(input: AudioArrayBufferInput, params: SpectrumParams = DEFAULT_PARAMS, onProgress?: AnalysisProgressCallback, shouldCancel?: AnalysisCancelCheck): Promise<FrontendAnalysisPayload> {
throwIfCancelled(shouldCancel); throwIfCancelled(shouldCancel);
reportProgress(onProgress, "parse", 5, "Parsing audio metadata..."); reportProgress(onProgress, "parse", 5, "Parsing audio metadata...");
const metadata = parseAudioMetadata(input); const metadata = parseAudioMetadataFromInput(input);
throwIfCancelled(shouldCancel); throwIfCancelled(shouldCancel);
reportProgress(onProgress, "decode", 15, "Decoding audio stream..."); reportProgress(onProgress, "decode", 15, "Decoding audio stream...");
const audioContext = createAnalysisAudioContext(metadata.sampleRate); const audioContext = createAnalysisAudioContext(metadata.sampleRate);
@@ -658,6 +667,17 @@ export async function analyzeAudioArrayBuffer(input: AudioArrayBufferInput, para
throwIfCancelled(shouldCancel); throwIfCancelled(shouldCancel);
reportProgress(onProgress, "decode", 35, "Audio decoded"); reportProgress(onProgress, "decode", 35, "Audio decoded");
const samples = audioBuffer.getChannelData(0); const samples = audioBuffer.getChannelData(0);
return analyzeDecodedSamples(input, metadata, samples, params, onProgress, shouldCancel, audioBuffer.duration);
}
finally {
await audioContext.close();
}
}
export async function analyzeDecodedSamples(input: AudioArrayBufferInput, metadata: ParsedAudioMetadata, samples: Float32Array, params: SpectrumParams = DEFAULT_PARAMS, onProgress?: AnalysisProgressCallback, shouldCancel?: AnalysisCancelCheck, durationOverride?: number): Promise<FrontendAnalysisPayload> {
throwIfCancelled(shouldCancel);
const analysisSampleRate = metadata.sampleRate > 0 ? metadata.sampleRate : 44100;
const analysisChannels = metadata.channels > 0 ? metadata.channels : 1;
const bitDepthLabel = metadata.bitsPerSample > 0 ? `${metadata.bitsPerSample}-bit` : "Unknown";
reportProgress(onProgress, "metrics", 40, "Calculating peak/RMS..."); reportProgress(onProgress, "metrics", 40, "Calculating peak/RMS...");
let peak = 0; let peak = 0;
let sumSquares = 0; let sumSquares = 0;
@@ -670,7 +690,7 @@ export async function analyzeAudioArrayBuffer(input: AudioArrayBufferInput, para
peak = absSample; peak = absSample;
sumSquares += sample * sample; sumSquares += sample * sample;
if ((i + 1) % METRICS_CHUNK_SIZE === 0 || i === samples.length - 1) { if ((i + 1) % METRICS_CHUNK_SIZE === 0 || i === samples.length - 1) {
const metricsProgress = 40 + (((i + 1) / samples.length) * 10); const metricsProgress = 40 + (((i + 1) / Math.max(1, samples.length)) * 10);
reportProgress(onProgress, "metrics", metricsProgress, "Calculating peak/RMS..."); reportProgress(onProgress, "metrics", metricsProgress, "Calculating peak/RMS...");
const now = nowMs(); const now = nowMs();
if (now - lastMetricsYieldAt >= 16) { if (now - lastMetricsYieldAt >= 16) {
@@ -684,12 +704,16 @@ export async function analyzeAudioArrayBuffer(input: AudioArrayBufferInput, para
const rms = samples.length > 0 ? Math.sqrt(sumSquares / samples.length) : 0; const rms = samples.length > 0 ? Math.sqrt(sumSquares / samples.length) : 0;
const rmsDB = rms > 0 ? 20 * Math.log10(rms) : -120; const rmsDB = rms > 0 ? 20 * Math.log10(rms) : -120;
const dynamicRange = peakDB - rmsDB; const dynamicRange = peakDB - rmsDB;
const duration = audioBuffer.duration > 0 ? audioBuffer.duration : metadata.duration; const duration = durationOverride && durationOverride > 0
? durationOverride
: (metadata.duration > 0
? metadata.duration
: (analysisSampleRate > 0 ? samples.length / analysisSampleRate : 0));
const totalSamples = metadata.totalSamples > 0 const totalSamples = metadata.totalSamples > 0
? metadata.totalSamples ? metadata.totalSamples
: Math.floor(duration * metadata.sampleRate); : (duration > 0 ? Math.floor(duration * analysisSampleRate) : samples.length);
reportProgress(onProgress, "metrics", 50, "Signal metrics complete"); reportProgress(onProgress, "metrics", 50, "Signal metrics complete");
const spectrum = await analyzeSpectrumFromSamples(samples, metadata.sampleRate, params, (progress) => { const spectrum = await analyzeSpectrumFromSamples(samples, analysisSampleRate, params, (progress) => {
const mappedPercent = 50 + (progress.percent * 0.45); const mappedPercent = 50 + (progress.percent * 0.45);
reportProgress(onProgress, "spectrum", mappedPercent, progress.message); reportProgress(onProgress, "spectrum", mappedPercent, progress.message);
}, shouldCancel); }, shouldCancel);
@@ -699,12 +723,12 @@ export async function analyzeAudioArrayBuffer(input: AudioArrayBufferInput, para
file_path: input.fileName, file_path: input.fileName,
file_size: input.fileSize, file_size: input.fileSize,
file_type: metadata.fileType, file_type: metadata.fileType,
sample_rate: metadata.sampleRate, sample_rate: analysisSampleRate,
channels: metadata.channels || audioBuffer.numberOfChannels, channels: analysisChannels,
bits_per_sample: metadata.bitsPerSample, bits_per_sample: metadata.bitsPerSample,
total_samples: totalSamples, total_samples: totalSamples,
duration, duration,
bit_depth: `${metadata.bitsPerSample}-bit`, bit_depth: bitDepthLabel,
dynamic_range: dynamicRange, dynamic_range: dynamicRange,
peak_amplitude: peakDB, peak_amplitude: peakDB,
rms_level: rmsDB, rms_level: rmsDB,
@@ -719,9 +743,5 @@ export async function analyzeAudioArrayBuffer(input: AudioArrayBufferInput, para
reportProgress(onProgress, "finalize", 100, "Analysis complete"); reportProgress(onProgress, "finalize", 100, "Analysis complete");
return payload; return payload;
} }
finally {
await audioContext.close();
}
}
export const analyzeFlacFile = analyzeAudioFile; export const analyzeFlacFile = analyzeAudioFile;
export const analyzeFlacArrayBuffer = analyzeAudioArrayBuffer; export const analyzeFlacArrayBuffer = analyzeAudioArrayBuffer;