Skip to content

Commit 1b51fdf

Browse files
examples : add support for decoding input with ffmpeg (Linux) (#2133)
- search for ffmpeg libs/headers at cmake time - added ffmpeg-transcode.cpp into libcommon if ffmpeg on - hooked ffmpeg trancoding in common read_wav(...) - passed test: ./main -m ggml-base.en.bin -f samples/jfk.mp3
1 parent adee3f9 commit 1b51fdf

9 files changed

+574
-2
lines changed

CMakeLists.txt

+24
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDA
5959

6060
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
6161

62+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
63+
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
64+
endif()
65+
6266
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
6367
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
6468
option(WHISPER_NO_AVX512 "whisper: disable AVX512" ON)
@@ -125,6 +129,26 @@ else()
125129
set(CMAKE_CXX_STANDARD 11)
126130
endif()
127131

132+
if (WHISPER_FFMPEG)
133+
# As of cmake 3.27, there is no official cmake support for FindFFmpeg.
134+
# Consequnelty we added a FindFFmpeg.cmake script the cmake subfolder:
135+
# whisper.cpp does not need the full ffmpeg libs, just AVFORMAT AVCODEC AVUTIL SWRESAMPLE
136+
# libswresample performs highly optimized audio resampling, rematrixing and sample format conversion operations
137+
# libavcodec provides a generic encoding/decoding framework and contains multiple decoders and encoders for audio, video and subtitle streams, and several bitstream filters.
138+
# libavformat provides a generic framework for multiplexing and demultiplexing (muxing and demuxing) audio, video and subtitle streams.
139+
find_package(FFmpeg REQUIRED)
140+
if (NOT ${FFMPEG_FOUND})
141+
message(FATAL_ERROR "Cannot find ffmpeg libs/headers")
142+
endif()
143+
message(STATUS "Found ffmpeg libs: ${FFMPEG_LIBRARIES}")
144+
message(STATUS "Found ffmpeg headers in: ${FFMPEG_INCLUDE_DIRS}")
145+
message(STATUS "ffmpeg definitions: ${FFMPEG_DEFINITIONS}")
146+
message(STATUS "Found avformat ${AVFORMAT_VERSION}")
147+
include_directories(${FFMPEG_INCLUDE_DIRS})
148+
add_compile_definitions(WHISPER_FFMPEG)
149+
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${FFMPEG_LIBRARIES})
150+
endif()
151+
128152
# on APPLE
129153
if (APPLE)
130154
# include Accelerate framework

cmake/FindFFmpeg.cmake

+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# From
2+
# https://github.com/snikulov/cmake-modules/blob/master/FindFFmpeg.cmake
3+
#
4+
# vim: ts=2 sw=2
5+
# - Try to find the required ffmpeg components(default: AVFORMAT, AVUTIL, AVCODEC)
6+
#
7+
# Once done this will define
8+
# FFMPEG_FOUND - System has the all required components.
9+
# FFMPEG_INCLUDE_DIRS - Include directory necessary for using the required components headers.
10+
# FFMPEG_LIBRARIES - Link these to use the required ffmpeg components.
11+
# FFMPEG_DEFINITIONS - Compiler switches required for using the required ffmpeg components.
12+
#
13+
# For each of the components it will additionally set.
14+
# - AVCODEC
15+
# - AVDEVICE
16+
# - AVFORMAT
17+
# - AVFILTER
18+
# - AVUTIL
19+
# - POSTPROC
20+
# - SWSCALE
21+
# the following variables will be defined
22+
# <component>_FOUND - System has <component>
23+
# <component>_INCLUDE_DIRS - Include directory necessary for using the <component> headers
24+
# <component>_LIBRARIES - Link these to use <component>
25+
# <component>_DEFINITIONS - Compiler switches required for using <component>
26+
# <component>_VERSION - The components version
27+
#
28+
# Copyright (c) 2006, Matthias Kretz, <kretz@kde.org>
29+
# Copyright (c) 2008, Alexander Neundorf, <neundorf@kde.org>
30+
# Copyright (c) 2011, Michael Jansen, <kde@michael-jansen.biz>
31+
#
32+
# Redistribution and use is allowed according to the terms of the BSD license.
33+
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
34+
35+
include(FindPackageHandleStandardArgs)
36+
37+
# The default components were taken from a survey over other FindFFMPEG.cmake files
38+
if (NOT FFmpeg_FIND_COMPONENTS)
39+
set(FFmpeg_FIND_COMPONENTS AVFORMAT AVCODEC AVUTIL SWRESAMPLE)
40+
endif()
41+
42+
#
43+
### Macro: set_component_found
44+
#
45+
# Marks the given component as found if both *_LIBRARIES AND *_INCLUDE_DIRS is present.
46+
#
47+
macro(set_component_found _component )
48+
if (${_component}_LIBRARIES AND ${_component}_INCLUDE_DIRS)
49+
message(DEBUG " - ${_component} found.")
50+
set(${_component}_FOUND TRUE)
51+
else ()
52+
message(DEBUG " - ${_component} not found.")
53+
endif ()
54+
endmacro()
55+
56+
#
57+
### Macro: find_component
58+
#
59+
# Checks for the given component by invoking pkgconfig and then looking up the libraries and
60+
# include directories.
61+
#
62+
macro(find_component _component _pkgconfig _library _header)
63+
64+
if (NOT WIN32)
65+
# use pkg-config to get the directories and then use these values
66+
# in the FIND_PATH() and FIND_LIBRARY() calls
67+
find_package(PkgConfig)
68+
if (PKG_CONFIG_FOUND)
69+
pkg_check_modules(PC_${_component} ${_pkgconfig})
70+
message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDEDIR}")
71+
message(STATUS "Pkgconfig found: ${PC_${_component}_INCLUDE_DIRS}")
72+
message(STATUS "${PC_${_component}_CFLAGS}")
73+
endif ()
74+
endif (NOT WIN32)
75+
76+
77+
find_path(${_component}_INCLUDE_DIRS ${_header}
78+
HINTS
79+
${PC_${_component}_INCLUDEDIR}
80+
${PC_${_component}_INCLUDE_DIRS}
81+
PATH_SUFFIXES
82+
ffmpeg
83+
)
84+
85+
# CMake's default is to search first for shared libraries and then for static libraries.
86+
# Todo later: add option to prefer static libs over dynamic:
87+
find_library(${_component}_LIBRARIES NAMES ${_library} lib${_library}.a
88+
HINTS
89+
${PC_${_component}_LIBDIR}
90+
${PC_${_component}_LIBRARY_DIRS}
91+
)
92+
93+
set(${_component}_DEFINITIONS ${PC_${_component}_CFLAGS_OTHER} CACHE STRING "The ${_component} CFLAGS.")
94+
set(${_component}_VERSION ${PC_${_component}_VERSION} CACHE STRING "The ${_component} version number.")
95+
96+
set_component_found(${_component})
97+
98+
mark_as_advanced(
99+
${_component}_INCLUDE_DIRS
100+
${_component}_LIBRARIES
101+
${_component}_DEFINITIONS
102+
${_component}_VERSION)
103+
104+
endmacro()
105+
106+
107+
# Check for cached results. If there are skip the costly part.
108+
if (NOT FFMPEG_LIBRARIES)
109+
110+
# Check for all possible component.
111+
find_component(AVCODEC libavcodec avcodec libavcodec/avcodec.h)
112+
find_component(AVFORMAT libavformat avformat libavformat/avformat.h)
113+
find_component(AVDEVICE libavdevice avdevice libavdevice/avdevice.h)
114+
#find_component(AVRESAMPLE libavresample avresample libavresample/avresample.h) # old name for swresample
115+
find_component(AVUTIL libavutil avutil libavutil/avutil.h)
116+
find_component(AVFILTER libavfilter avfilter libavfilter/avfilter.h)
117+
find_component(SWSCALE libswscale swscale libswscale/swscale.h)
118+
find_component(POSTPROC libpostproc postproc libpostproc/postprocess.h)
119+
find_component(SWRESAMPLE libswresample swresample libswresample/swresample.h)
120+
121+
# Check if the required components were found and add their stuff to the FFMPEG_* vars.
122+
foreach (_component ${FFmpeg_FIND_COMPONENTS})
123+
if (${_component}_FOUND)
124+
# message(STATUS "Required component ${_component} present.")
125+
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} ${${_component}_LIBRARIES})
126+
set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} ${${_component}_DEFINITIONS})
127+
list(APPEND FFMPEG_INCLUDE_DIRS ${${_component}_INCLUDE_DIRS})
128+
else ()
129+
# message(STATUS "Required component ${_component} missing.")
130+
endif ()
131+
endforeach ()
132+
133+
# Build the include path with duplicates removed.
134+
if (FFMPEG_INCLUDE_DIRS)
135+
list(REMOVE_DUPLICATES FFMPEG_INCLUDE_DIRS)
136+
endif ()
137+
138+
# cache the vars.
139+
set(FFMPEG_INCLUDE_DIRS ${FFMPEG_INCLUDE_DIRS} CACHE STRING "The FFmpeg include directories." FORCE)
140+
set(FFMPEG_LIBRARIES ${FFMPEG_LIBRARIES} CACHE STRING "The FFmpeg libraries." FORCE)
141+
set(FFMPEG_DEFINITIONS ${FFMPEG_DEFINITIONS} CACHE STRING "The FFmpeg cflags." FORCE)
142+
143+
mark_as_advanced(FFMPEG_INCLUDE_DIRS
144+
FFMPEG_LIBRARIES
145+
FFMPEG_DEFINITIONS)
146+
147+
endif ()
148+
149+
# Now set the noncached _FOUND vars for the components.
150+
# whisper.cpp does not need SWSCALE
151+
foreach (_component AVCODEC AVDEVICE AVFORMAT AVRESAMPLE AVUTIL POSTPROCESS)
152+
set_component_found(${_component})
153+
endforeach ()
154+
155+
# Compile the list of required vars
156+
set(_FFmpeg_REQUIRED_VARS FFMPEG_LIBRARIES FFMPEG_INCLUDE_DIRS)
157+
foreach (_component ${FFmpeg_FIND_COMPONENTS})
158+
list(APPEND _FFmpeg_REQUIRED_VARS ${_component}_LIBRARIES ${_component}_INCLUDE_DIRS)
159+
endforeach ()
160+
161+
# Give a nice error message if some of the required vars are missing.
162+
find_package_handle_standard_args(FFmpeg DEFAULT_MSG ${_FFmpeg_REQUIRED_VARS})
163+

examples/CMakeLists.txt

+5
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,18 @@ endif()
2222

2323
set(TARGET common)
2424

25+
if (WHISPER_FFMPEG)
26+
set(COMMON_SOURCES_FFMPEG ffmpeg-transcode.cpp)
27+
endif()
28+
2529
add_library(${TARGET} STATIC
2630
common.h
2731
common.cpp
2832
common-ggml.h
2933
common-ggml.cpp
3034
grammar-parser.h
3135
grammar-parser.cpp
36+
${COMMON_SOURCES_FFMPEG}
3237
)
3338

3439
include(DefaultTargetOptions)

examples/common.cpp

+17-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
#include <io.h>
2525
#endif
2626

27+
#ifdef WHISPER_FFMPEG
28+
// as implemented in ffmpeg_trancode.cpp only embedded in common lib if whisper built with ffmpeg support
29+
extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data);
30+
#endif
31+
2732
// Function to check if the next argument exists
2833
std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
2934
if (i + 1 < argc && argv[i + 1][0] != '-') {
@@ -637,7 +642,7 @@ bool is_wav_buffer(const std::string buf) {
637642

638643
bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) {
639644
drwav wav;
640-
std::vector<uint8_t> wav_data; // used for pipe input from stdin
645+
std::vector<uint8_t> wav_data; // used for pipe input from stdin or ffmpeg decoding output
641646

642647
if (fname == "-") {
643648
{
@@ -670,8 +675,19 @@ bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector
670675
}
671676
}
672677
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
678+
#if defined(WHISPER_FFMPEG)
679+
if (ffmpeg_decode_audio(fname, wav_data) != 0) {
680+
fprintf(stderr, "error: failed to ffmpeg decode '%s' \n", fname.c_str());
681+
return false;
682+
}
683+
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
684+
fprintf(stderr, "error: failed to read wav data as wav \n");
685+
return false;
686+
}
687+
#else
673688
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
674689
return false;
690+
#endif
675691
}
676692

677693
if (wav.channels != 1 && wav.channels != 2) {

0 commit comments

Comments
 (0)