4
4
5
5
// third-party utilities
6
6
// use your favorite implementations
7
- #define DR_WAV_IMPLEMENTATION
8
- #include " dr_wav.h"
7
+ #define STB_VORBIS_HEADER_ONLY
8
+ #include " stb_vorbis.c" /* Enables Vorbis decoding. */
9
+
10
+ #define MA_NO_DEVICE_IO
11
+ #define MA_NO_THREADING
12
+ #define MA_NO_ENCODING
13
+ #define MA_NO_GENERATION
14
+ #define MA_NO_RESOURCE_MANAGER
15
+ #define MA_NO_NODE_GRAPH
16
+ #define MINIAUDIO_IMPLEMENTATION
17
+ #include " miniaudio.h"
9
18
10
19
#include < cmath>
11
20
#include < cstring>
@@ -639,9 +648,14 @@ bool is_wav_buffer(const std::string buf) {
639
648
return true ;
640
649
}
641
650
642
- bool read_wav (const std::string & fname, std::vector<float >& pcmf32, std::vector<std::vector<float >>& pcmf32s, bool stereo) {
643
- drwav wav;
644
- std::vector<uint8_t > wav_data; // used for pipe input from stdin or ffmpeg decoding output
651
+ bool read_audio_data (const std::string & fname, std::vector<float >& pcmf32, std::vector<std::vector<float >>& pcmf32s, bool stereo) {
652
+ std::vector<uint8_t > audio_data; // used for pipe input from stdin or ffmpeg decoding output
653
+
654
+ ma_result result;
655
+ ma_decoder_config decoder_config;
656
+ ma_decoder decoder;
657
+
658
+ decoder_config = ma_decoder_config_init (ma_format_f32, stereo ? 2 : 1 , COMMON_SAMPLE_RATE);
645
659
646
660
if (fname == " -" ) {
647
661
{
@@ -656,93 +670,55 @@ bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector
656
670
if (n == 0 ) {
657
671
break ;
658
672
}
659
- wav_data .insert (wav_data .end (), buf, buf + n);
673
+ audio_data .insert (audio_data .end (), buf, buf + n);
660
674
}
661
675
}
662
676
663
- if (drwav_init_memory (&wav, wav_data .data (), wav_data .size (), nullptr ) == false ) {
664
- fprintf (stderr, " error : failed to open WAV file from stdin\n " );
677
+ if (ma_decoder_init_memory (audio_data .data (), audio_data .size (), &decoder_config, &decoder) != MA_SUCCESS ) {
678
+ fprintf (stderr, " Error : failed to open audio data from stdin\n " );
665
679
return false ;
666
- }
680
+ }
667
681
668
- fprintf (stderr, " %s: read %zu bytes from stdin\n " , __func__, wav_data .size ());
682
+ fprintf (stderr, " %s: read %zu bytes from stdin\n " , __func__, audio_data .size ());
669
683
}
670
684
else if (is_wav_buffer (fname)) {
671
- if (drwav_init_memory (&wav, fname. c_str (), fname .size (), nullptr ) == false ) {
672
- fprintf (stderr, " error : failed to open WAV file from fname buffer\n " );
685
+ if (ma_decoder_init_memory (audio_data. data (), audio_data .size (), &decoder_config, &decoder) != MA_SUCCESS ) {
686
+ fprintf (stderr, " Error : failed to open audio data from fname buffer\n " );
673
687
return false ;
674
- }
688
+ }
675
689
}
676
- else if (drwav_init_file (&wav, fname.c_str (), nullptr ) == false ) {
690
+ else if (ma_decoder_init_file ( fname.c_str (), &decoder_config, &decoder) != MA_SUCCESS ) {
677
691
#if defined(WHISPER_FFMPEG)
678
- if (ffmpeg_decode_audio (fname, wav_data ) != 0 ) {
692
+ if (ffmpeg_decode_audio (fname, audio_data ) != 0 ) {
679
693
fprintf (stderr, " error: failed to ffmpeg decode '%s' \n " , fname.c_str ());
680
694
return false ;
681
695
}
682
- if (drwav_init_memory (&wav, wav_data .data (), wav_data .size (), nullptr ) == false ) {
696
+ if (ma_decoder_init_memory (audio_data .data (), audio_data .size (), &decoder_config, &decoder) != MA_SUCCESS ) {
683
697
fprintf (stderr, " error: failed to read wav data as wav \n " );
684
698
return false ;
685
699
}
686
700
#else
687
- fprintf (stderr, " error: failed to open '%s' as WAV file\n " , fname.c_str ());
701
+ fprintf (stderr, " error: failed to open '%s' file\n " , fname.c_str ());
688
702
return false ;
689
703
#endif
690
704
}
691
705
692
- if (wav.channels != 1 && wav.channels != 2 ) {
693
- fprintf (stderr, " %s: WAV file '%s' must be mono or stereo\n " , __func__, fname.c_str ());
694
- drwav_uninit (&wav);
695
- return false ;
696
- }
697
-
698
- if (stereo && wav.channels != 2 ) {
699
- fprintf (stderr, " %s: WAV file '%s' must be stereo for diarization\n " , __func__, fname.c_str ());
700
- drwav_uninit (&wav);
701
- return false ;
702
- }
703
-
704
- if (wav.sampleRate != COMMON_SAMPLE_RATE) {
705
- fprintf (stderr, " %s: WAV file '%s' must be %i kHz\n " , __func__, fname.c_str (), COMMON_SAMPLE_RATE/1000 );
706
- drwav_uninit (&wav);
707
- return false ;
708
- }
709
-
710
- if (wav.bitsPerSample != 16 ) {
711
- fprintf (stderr, " %s: WAV file '%s' must be 16-bit\n " , __func__, fname.c_str ());
712
- drwav_uninit (&wav);
713
- return false ;
714
- }
715
-
716
- const uint64_t n = wav_data.empty () ? wav.totalPCMFrameCount : wav_data.size ()/(wav.channels *wav.bitsPerSample /8 );
706
+ ma_uint64 frame_count;
707
+ ma_uint64 frames_read;
717
708
718
- std::vector<int16_t > pcm16;
719
- pcm16.resize (n*wav.channels );
720
- drwav_read_pcm_frames_s16 (&wav, n, pcm16.data ());
721
- drwav_uninit (&wav);
722
-
723
- // convert to mono, float
724
- pcmf32.resize (n);
725
- if (wav.channels == 1 ) {
726
- for (uint64_t i = 0 ; i < n; i++) {
727
- pcmf32[i] = float (pcm16[i])/32768 .0f ;
728
- }
729
- } else {
730
- for (uint64_t i = 0 ; i < n; i++) {
731
- pcmf32[i] = float (pcm16[2 *i] + pcm16[2 *i + 1 ])/65536 .0f ;
732
- }
733
- }
709
+ ma_decoder_get_length_in_pcm_frames (&decoder, &frame_count);
710
+ pcmf32.resize (stereo ? frame_count*2 : frame_count);
711
+ ma_decoder_read_pcm_frames (&decoder, pcmf32.data (), frame_count, &frames_read);
734
712
735
713
if (stereo) {
736
- // convert to stereo, float
737
- pcmf32s.resize (2 );
738
-
739
- pcmf32s[0 ].resize (n);
740
- pcmf32s[1 ].resize (n);
741
- for (uint64_t i = 0 ; i < n; i++) {
742
- pcmf32s[0 ][i] = float (pcm16[2 *i])/32768 .0f ;
743
- pcmf32s[1 ][i] = float (pcm16[2 *i + 1 ])/32768 .0f ;
714
+ pcmf32s[0 ].resize (frame_count);
715
+ pcmf32s[1 ].resize (frame_count);
716
+ for (uint64_t i = 0 ; i < frame_count; i++) {
717
+ pcmf32s[0 ][i] = pcmf32[2 *i];
718
+ pcmf32s[1 ][i] = pcmf32[2 *i + 1 ];
744
719
}
745
720
}
721
+ ma_decoder_uninit (&decoder);
746
722
747
723
return true ;
748
724
}
@@ -909,3 +885,6 @@ bool speak_with_file(const std::string & command, const std::string & text, cons
909
885
}
910
886
return true ;
911
887
}
888
+
889
+ #undef STB_VORBIS_HEADER_ONLY
890
+ #include " stb_vorbis.c"
0 commit comments