Files
musicgen/src/audio.rs
2025-06-20 18:52:55 -06:00

594 lines
19 KiB
Rust

//! Audio output and file export module
//!
//! This module provides functionality for real-time audio playback and
//! exporting generated music to audio files.
use crate::SAMPLE_RATE;
use crate::core::Composition;
use crate::sequencer::Sequencer;
use std::fs;
use std::path::Path;
use std::sync::{Arc, Mutex};
/// Audio output configuration
#[derive(Debug, Clone)]
pub struct AudioConfig {
pub sample_rate: f32,
pub buffer_size: usize,
pub channels: usize,
}
impl Default for AudioConfig {
fn default() -> Self {
Self {
sample_rate: SAMPLE_RATE,
buffer_size: 512,
channels: 2, // Stereo
}
}
}
/// Real-time audio player using cpal
pub struct AudioPlayer {
sequencer: Arc<Mutex<Sequencer>>,
is_playing: bool,
}
impl AudioPlayer {
/// Create a new audio player
pub fn new(_config: AudioConfig) -> Result<Self, String> {
let sequencer = Arc::new(Mutex::new(Sequencer::new(120.0)));
Ok(Self {
sequencer,
is_playing: false,
})
}
/// Load a composition into the player
pub fn load_composition(&mut self, composition: &Composition) -> Result<(), String> {
let mut sequencer = self
.sequencer
.lock()
.map_err(|e| format!("Lock error: {}", e))?;
sequencer.load_composition(composition)
}
/// Start real-time audio playback
pub fn start_playback(&mut self) -> Result<(), String> {
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
let host = cpal::default_host();
let device = host
.default_output_device()
.ok_or("No output device available")?;
let supported_config = device
.default_output_config()
.map_err(|e| format!("Failed to get default config: {}", e))?;
let sample_format = supported_config.sample_format();
let config = supported_config.into();
let sequencer = Arc::clone(&self.sequencer);
let stream = match sample_format {
cpal::SampleFormat::F32 => {
device.build_output_stream(
&config,
move |data: &mut [f32], _: &cpal::OutputCallbackInfo| {
if let Ok(mut seq) = sequencer.lock() {
let _ = seq.process_audio(data);
} else {
// Fill with silence if we can't lock
for sample in data.iter_mut() {
*sample = 0.0;
}
}
},
|err| eprintln!("Audio stream error: {}", err),
None,
)
}
cpal::SampleFormat::I16 => {
device.build_output_stream(
&config,
move |data: &mut [i16], _: &cpal::OutputCallbackInfo| {
let mut float_buffer = vec![0.0f32; data.len()];
if let Ok(mut seq) = sequencer.lock() {
let _ = seq.process_audio(&mut float_buffer);
}
// Convert f32 to i16
for (i, &sample) in float_buffer.iter().enumerate() {
data[i] = (sample * i16::MAX as f32) as i16;
}
},
|err| eprintln!("Audio stream error: {}", err),
None,
)
}
cpal::SampleFormat::U16 => {
device.build_output_stream(
&config,
move |data: &mut [u16], _: &cpal::OutputCallbackInfo| {
let mut float_buffer = vec![0.0f32; data.len()];
if let Ok(mut seq) = sequencer.lock() {
let _ = seq.process_audio(&mut float_buffer);
}
// Convert f32 to u16
for (i, &sample) in float_buffer.iter().enumerate() {
let sample_u16 = ((sample + 1.0) * 0.5 * u16::MAX as f32) as u16;
data[i] = sample_u16;
}
},
|err| eprintln!("Audio stream error: {}", err),
None,
)
}
_ => {
return Err("Unsupported sample format".to_string());
}
}
.map_err(|e| format!("Failed to build stream: {}", e))?;
stream
.play()
.map_err(|e| format!("Failed to play stream: {}", e))?;
// Start the sequencer
if let Ok(mut seq) = self.sequencer.lock() {
seq.play();
}
self.is_playing = true;
// Keep the stream alive (in a real application, you'd want better lifecycle management)
std::mem::forget(stream);
Ok(())
}
/// Stop playback
pub fn stop_playback(&mut self) -> Result<(), String> {
if let Ok(mut seq) = self.sequencer.lock() {
seq.stop();
}
self.is_playing = false;
Ok(())
}
/// Pause playback
pub fn pause_playback(&mut self) -> Result<(), String> {
if let Ok(mut seq) = self.sequencer.lock() {
seq.pause();
}
Ok(())
}
/// Resume playback
pub fn resume_playback(&mut self) -> Result<(), String> {
if let Ok(mut seq) = self.sequencer.lock() {
seq.play();
}
Ok(())
}
/// Set playback position
pub fn set_position(&mut self, position: f32) -> Result<(), String> {
if let Ok(mut seq) = self.sequencer.lock() {
seq.set_position(position);
}
Ok(())
}
/// Set tempo
pub fn set_tempo(&mut self, tempo: f32) -> Result<(), String> {
if let Ok(mut seq) = self.sequencer.lock() {
seq.set_tempo(tempo);
}
Ok(())
}
/// Get sequencer for direct control
pub fn get_sequencer(&self) -> Arc<Mutex<Sequencer>> {
Arc::clone(&self.sequencer)
}
}
/// Audio file exporter
pub struct AudioExporter {
sample_rate: f32,
}
impl AudioExporter {
/// Create a new audio exporter
pub fn new(sample_rate: f32) -> Self {
Self { sample_rate }
}
/// Ensure output directory exists
fn ensure_output_dir() -> Result<(), String> {
let output_dir = Path::new("output");
if !output_dir.exists() {
fs::create_dir_all(output_dir)
.map_err(|e| format!("Failed to create output directory: {}", e))?;
}
Ok(())
}
/// Get full path for output file
fn get_output_path(filename: &str) -> String {
format!("output/{}", filename)
}
/// Export a composition to a WAV file
///
/// # Arguments
/// * `composition` - The composition to export
/// * `filename` - Output filename
/// * `duration_seconds` - Duration to export in seconds (None for full composition)
pub fn export_wav(
&self,
composition: &Composition,
filename: &str,
duration_seconds: Option<f32>,
) -> Result<(), String> {
Self::ensure_output_dir()?;
let output_path = Self::get_output_path(filename);
let spec = hound::WavSpec {
channels: 1, // Mono for simplicity
sample_rate: self.sample_rate as u32,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
};
let mut writer = hound::WavWriter::create(&output_path, spec)
.map_err(|e| format!("Failed to create WAV file: {}", e))?;
// Create a sequencer for rendering
let mut sequencer = Sequencer::new(composition.params.tempo);
sequencer
.load_composition(composition)
.map_err(|e| format!("Failed to load composition: {}", e))?;
sequencer.play();
// Calculate total samples to render
let export_duration = duration_seconds
.unwrap_or(composition.total_duration * 60.0 / composition.params.tempo);
let total_samples = (export_duration * self.sample_rate) as usize;
// Render audio in chunks
let chunk_size = 1024;
let mut buffer = vec![0.0f32; chunk_size];
let mut samples_rendered = 0;
while samples_rendered < total_samples {
let samples_to_render = (total_samples - samples_rendered).min(chunk_size);
buffer.resize(samples_to_render, 0.0);
// Process audio
if let Err(e) = sequencer.process_audio(&mut buffer) {
return Err(format!("Audio processing error: {}", e));
}
// Convert to i16 and write to file
for &sample in &buffer {
let sample_i16 = (sample * i16::MAX as f32) as i16;
writer
.write_sample(sample_i16)
.map_err(|e| format!("Failed to write sample: {}", e))?;
}
samples_rendered += samples_to_render;
// Progress indication (optional)
if samples_rendered % (self.sample_rate as usize) == 0 {
let progress = samples_rendered as f32 / total_samples as f32 * 100.0;
println!("Export progress: {:.1}%", progress);
}
}
writer
.finalize()
.map_err(|e| format!("Failed to finalize WAV file: {}", e))?;
println!("Successfully exported to {}", output_path);
Ok(())
}
/// Export a composition to a stereo WAV file with separate channels for different tracks
pub fn export_stereo_wav(
&self,
composition: &Composition,
filename: &str,
duration_seconds: Option<f32>,
) -> Result<(), String> {
Self::ensure_output_dir()?;
let output_path = Self::get_output_path(filename);
let spec = hound::WavSpec {
channels: 2, // Stereo
sample_rate: self.sample_rate as u32,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
};
let mut writer = hound::WavWriter::create(&output_path, spec)
.map_err(|e| format!("Failed to create WAV file: {}", e))?;
// Create a sequencer for rendering
let mut sequencer = Sequencer::new(composition.params.tempo);
sequencer
.load_composition(composition)
.map_err(|e| format!("Failed to load composition: {}", e))?;
sequencer.play();
// Calculate total samples to render
let export_duration = duration_seconds
.unwrap_or(composition.total_duration * 60.0 / composition.params.tempo);
let total_samples = (export_duration * self.sample_rate) as usize;
// Render audio in chunks
let chunk_size = 1024;
let mut buffer = vec![0.0f32; chunk_size];
let mut samples_rendered = 0;
while samples_rendered < total_samples {
let samples_to_render = (total_samples - samples_rendered).min(chunk_size);
buffer.resize(samples_to_render, 0.0);
// Process audio
if let Err(e) = sequencer.process_audio(&mut buffer) {
return Err(format!("Audio processing error: {}", e));
}
// Write stereo samples (duplicate mono to both channels)
for &sample in &buffer {
let sample_i16 = (sample * i16::MAX as f32) as i16;
writer
.write_sample(sample_i16) // Left channel
.map_err(|e| format!("Failed to write left sample: {}", e))?;
writer
.write_sample(sample_i16) // Right channel
.map_err(|e| format!("Failed to write right sample: {}", e))?;
}
samples_rendered += samples_to_render;
// Progress indication
if samples_rendered % (self.sample_rate as usize) == 0 {
let progress = samples_rendered as f32 / total_samples as f32 * 100.0;
println!("Export progress: {:.1}%", progress);
}
}
writer
.finalize()
.map_err(|e| format!("Failed to finalize WAV file: {}", e))?;
println!("Successfully exported stereo to {}", output_path);
Ok(())
}
/// Export multiple takes of a composition with different parameters
pub fn export_variations(
&self,
base_composition: &Composition,
filename_prefix: &str,
variations: usize,
duration_seconds: Option<f32>,
) -> Result<(), String> {
Self::ensure_output_dir()?;
for i in 0..variations {
// Create variation by modifying parameters
let mut params = base_composition.params.clone();
params.complexity = (i as f32 / variations as f32).clamp(0.1, 1.0);
params.rhythmic_density = 0.5 + (i as f32 / variations as f32) * 0.4;
let mut variation = Composition::new(params);
variation
.generate()
.map_err(|e| format!("Failed to generate variation {}: {}", i, e))?;
let filename = format!("{}_{:02}.wav", filename_prefix, i + 1);
self.export_wav(&variation, &filename, duration_seconds)?;
}
println!("Successfully exported {} variations", variations);
Ok(())
}
/// Export composition as raw audio data (for further processing)
pub fn export_raw_audio(
&self,
composition: &Composition,
duration_seconds: Option<f32>,
) -> Result<Vec<f32>, String> {
// Create a sequencer for rendering
let mut sequencer = Sequencer::new(composition.params.tempo);
sequencer
.load_composition(composition)
.map_err(|e| format!("Failed to load composition: {}", e))?;
sequencer.play();
// Calculate total samples to render
let export_duration = duration_seconds
.unwrap_or(composition.total_duration * 60.0 / composition.params.tempo);
let total_samples = (export_duration * self.sample_rate) as usize;
let mut audio_data = Vec::with_capacity(total_samples);
// Render audio in chunks
let chunk_size = 1024;
let mut buffer = vec![0.0f32; chunk_size];
let mut samples_rendered = 0;
while samples_rendered < total_samples {
let samples_to_render = (total_samples - samples_rendered).min(chunk_size);
buffer.resize(samples_to_render, 0.0);
// Process audio
if let Err(e) = sequencer.process_audio(&mut buffer) {
return Err(format!("Audio processing error: {}", e));
}
audio_data.extend_from_slice(&buffer);
samples_rendered += samples_to_render;
}
Ok(audio_data)
}
}
impl Default for AudioExporter {
fn default() -> Self {
Self::new(SAMPLE_RATE)
}
}
/// Simple audio analysis utilities
pub struct AudioAnalyzer;
impl AudioAnalyzer {
/// Calculate RMS (Root Mean Square) level of audio data
pub fn calculate_rms(audio_data: &[f32]) -> f32 {
if audio_data.is_empty() {
return 0.0;
}
let sum_squares: f32 = audio_data.iter().map(|&x| x * x).sum();
(sum_squares / audio_data.len() as f32).sqrt()
}
/// Find peak amplitude in audio data
pub fn find_peak(audio_data: &[f32]) -> f32 {
audio_data.iter().map(|&x| x.abs()).fold(0.0, f32::max)
}
/// Calculate dynamic range (ratio of peak to RMS)
pub fn calculate_dynamic_range(audio_data: &[f32]) -> f32 {
let peak = Self::find_peak(audio_data);
let rms = Self::calculate_rms(audio_data);
if rms > 0.0 {
20.0 * (peak / rms).log10() // In dB
} else {
f32::INFINITY
}
}
/// Simple frequency analysis using zero-crossing rate
pub fn zero_crossing_rate(audio_data: &[f32]) -> f32 {
if audio_data.len() < 2 {
return 0.0;
}
let mut crossings = 0;
for i in 1..audio_data.len() {
if (audio_data[i] >= 0.0) != (audio_data[i - 1] >= 0.0) {
crossings += 1;
}
}
crossings as f32 / audio_data.len() as f32
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::{CompositionBuilder, CompositionStyle};
#[test]
fn test_audio_config() {
let config = AudioConfig::default();
assert_eq!(config.sample_rate, SAMPLE_RATE);
assert_eq!(config.channels, 2);
assert!(config.buffer_size > 0);
}
#[test]
fn test_audio_exporter_creation() {
let exporter = AudioExporter::new(44100.0);
assert_eq!(exporter.sample_rate, 44100.0);
}
#[test]
fn test_raw_audio_export() {
let mut composition = CompositionBuilder::new()
.style(CompositionStyle::Electronic)
.measures(2)
.tempo(120.0)
.build();
let _ = composition.generate();
let exporter = AudioExporter::new(44100.0);
let result = exporter.export_raw_audio(&composition, Some(1.0));
assert!(result.is_ok());
let audio_data = result.unwrap();
assert_eq!(audio_data.len(), 44100); // 1 second at 44.1kHz
}
#[test]
fn test_audio_analysis() {
// Test with a simple sine wave
let sample_rate = 44100.0;
let frequency = 440.0;
let duration = 1.0;
let samples = (sample_rate * duration) as usize;
let mut audio_data = Vec::with_capacity(samples);
for i in 0..samples {
let t = i as f32 / sample_rate;
let sample = (2.0 * std::f32::consts::PI * frequency * t).sin() * 0.5;
audio_data.push(sample);
}
let rms = AudioAnalyzer::calculate_rms(&audio_data);
let peak = AudioAnalyzer::find_peak(&audio_data);
let zcr = AudioAnalyzer::zero_crossing_rate(&audio_data);
assert!(rms > 0.0);
assert!(peak > rms);
assert!(zcr > 0.0);
// For a sine wave, RMS should be approximately peak / sqrt(2)
let expected_rms = 0.5 / (2.0_f32).sqrt();
assert!((rms - expected_rms).abs() < 0.01);
}
#[test]
fn test_dynamic_range_calculation() {
let audio_data = vec![0.0, 0.5, -0.3, 0.8, -0.2, 0.1];
let dynamic_range = AudioAnalyzer::calculate_dynamic_range(&audio_data);
assert!(dynamic_range > 0.0);
assert!(dynamic_range.is_finite());
}
#[test]
fn test_zero_crossing_rate() {
// Test with alternating positive/negative values
let audio_data = vec![1.0, -1.0, 1.0, -1.0, 1.0, -1.0];
let zcr = AudioAnalyzer::zero_crossing_rate(&audio_data);
// Should have high zero-crossing rate
assert!(zcr > 0.5);
}
#[test]
fn test_audio_player_creation() {
let config = AudioConfig::default();
let result = AudioPlayer::new(config);
assert!(result.is_ok());
}
}