Source code for moviepy.audio.AudioClip

"""Implements AudioClip (base class for audio clips) and its main subclasses:

- Audio clips: AudioClip, AudioFileClip, AudioArrayClip
- Composition: CompositeAudioClip
"""

import numbers
import os

import numpy as np
import proglog

from moviepy.audio.io.ffmpeg_audiowriter import ffmpeg_audiowrite
from moviepy.audio.io.ffplay_audiopreviewer import ffplay_audiopreview
from moviepy.Clip import Clip
from moviepy.decorators import convert_path_to_string, requires_duration
from moviepy.tools import extensions_dict


[docs]class AudioClip(Clip):
    """Base class for audio clips.

    See ``AudioFileClip`` and ``CompositeAudioClip`` for usable classes.

    An AudioClip is a Clip with a ``frame_function``  attribute of
    the form `` t -> [ f_t ]`` for mono sound and
    ``t-> [ f1_t, f2_t ]`` for stereo sound (the arrays are Numpy arrays).
    The `f_t` are floats between -1 and 1. These bounds can be
    trespassed without problems (the program will put the
    sound back into the bounds at conversion time, without much impact).

    Parameters
    ----------

    frame_function
      A function `t-> frame at time t`. The frame does not mean much
      for a sound, it is just a float. What 'makes' the sound are
      the variations of that float in the time.

    duration
      Duration of the clip (in seconds). Some clips are infinite, in
      this case their duration will be ``None``.

    nchannels
      Number of channels (one or two for mono or stereo).

    Examples
    --------

    .. code:: python

        # Plays the note A in mono (a sine wave of frequency 440 Hz)
        import numpy as np
        frame_function = lambda t: np.sin(440 * 2 * np.pi * t)
        clip = AudioClip(frame_function, duration=5, fps=44100)
        clip.preview()

        # Plays the note A in stereo (two sine waves of frequencies 440 and 880 Hz)
        frame_function = lambda t: np.array([
            np.sin(440 * 2 * np.pi * t),
            np.sin(880 * 2 * np.pi * t)
        ]).T.copy(order="C")
        clip = AudioClip(frame_function, duration=3, fps=44100)
        clip.preview()

    """

    def __init__(self, frame_function=None, duration=None, fps=None):
        super().__init__()

        if fps is not None:
            self.fps = fps

        if frame_function is not None:
            self.frame_function = frame_function
            frame0 = self.get_frame(0)
            if hasattr(frame0, "__iter__"):
                self.nchannels = len(list(frame0))
            else:
                self.nchannels = 1
        if duration is not None:
            self.duration = duration
            self.end = duration

[docs]    @requires_duration
    def iter_chunks(
        self,
        chunksize=None,
        chunk_duration=None,
        fps=None,
        quantize=False,
        nbytes=2,
        logger=None,
    ):
        """Iterator that returns the whole sound array of the clip by chunks"""
        if fps is None:
            fps = self.fps
        logger = proglog.default_bar_logger(logger)
        if chunk_duration is not None:
            chunksize = int(chunk_duration * fps)

        total_size = int(fps * self.duration)

        nchunks = total_size // chunksize + 1

        positions = np.linspace(0, total_size, nchunks + 1, endpoint=True, dtype=int)

        for i in logger.iter_bar(chunk=list(range(nchunks))):
            size = positions[i + 1] - positions[i]
            assert size <= chunksize
            timings = (1.0 / fps) * np.arange(positions[i], positions[i + 1])
            yield self.to_soundarray(
                timings, nbytes=nbytes, quantize=quantize, fps=fps, buffersize=chunksize
            )

[docs]    @requires_duration
    def to_soundarray(
        self, tt=None, fps=None, quantize=False, nbytes=2, buffersize=50000
    ):
        """
        Transforms the sound into an array that can be played by pygame
        or written in a wav file. See ``AudioClip.preview``.

        Parameters
        ----------

        fps
          Frame rate of the sound for the conversion.
          44100 for top quality.

        nbytes
          Number of bytes to encode the sound: 1 for 8bit sound,
          2 for 16bit, 4 for 32bit sound.

        """
        if tt is None:
            if fps is None:
                fps = self.fps

            max_duration = 1 * buffersize / fps
            if self.duration > max_duration:
                stacker = np.vstack if self.nchannels == 2 else np.hstack
                return stacker(
                    tuple(
                        self.iter_chunks(
                            fps=fps, quantize=quantize, nbytes=2, chunksize=buffersize
                        )
                    )
                )
            else:
                tt = np.arange(0, self.duration, 1.0 / fps)
        """
        elif len(tt)> 1.5*buffersize:
            nchunks = int(len(tt)/buffersize+1)
            tt_chunks = np.array_split(tt, nchunks)
            return stacker([self.to_soundarray(tt=ttc, buffersize=buffersize, fps=fps,
                                        quantize=quantize, nbytes=nbytes)
                              for ttc in tt_chunks])
        """
        snd_array = self.get_frame(tt)

        if quantize:
            snd_array = np.maximum(-0.99, np.minimum(0.99, snd_array))
            inttype = {1: "int8", 2: "int16", 4: "int32"}[nbytes]
            snd_array = (2 ** (8 * nbytes - 1) * snd_array).astype(inttype)

        return snd_array

[docs]    def max_volume(self, stereo=False, chunksize=50000, logger=None):
        """Returns the maximum volume level of the clip."""
        # max volume separated by channels if ``stereo`` and not mono
        stereo = stereo and self.nchannels > 1

        # zero for each channel
        maxi = np.zeros(self.nchannels)
        for chunk in self.iter_chunks(chunksize=chunksize, logger=logger):
            maxi = np.maximum(maxi, abs(chunk).max(axis=0))

        # if mono returns float, otherwise array of volumes by channel
        return maxi if stereo else maxi[0]

[docs]    @requires_duration
    @convert_path_to_string("filename")
    def write_audiofile(
        self,
        filename,
        fps=None,
        nbytes=2,
        buffersize=2000,
        codec=None,
        bitrate=None,
        ffmpeg_params=None,
        write_logfile=False,
        logger="bar",
    ):
        """Writes an audio file from the AudioClip.


        Parameters
        ----------

        filename
          Name of the output file, as a string or a path-like object.

        fps
          Frames per second. If not set, it will try default to self.fps if
          already set, otherwise it will default to 44100.

        nbytes
          Sample width (set to 2 for 16-bit sound, 4 for 32-bit sound)

        buffersize
          The sound is not generated all at once, but rather made by bunches
          of frames (chunks). ``buffersize`` is the size of such a chunk.
          Try varying it if you meet audio problems (but you shouldn't
          have to). Default to 2000

        codec
          Which audio codec should be used. If None provided, the codec is
          determined based on the extension of the filename. Choose
          'pcm_s16le' for 16-bit wav and 'pcm_s32le' for 32-bit wav.

        bitrate
          Audio bitrate, given as a string like '50k', '500k', '3000k'.
          Will determine the size and quality of the output file.
          Note that it mainly an indicative goal, the bitrate won't
          necessarily be the this in the output file.

        ffmpeg_params
          Any additional parameters you would like to pass, as a list
          of terms, like ['-option1', 'value1', '-option2', 'value2']

        write_logfile
          If true, produces a detailed logfile named filename + '.log'
          when writing the file

        logger
          Either ``"bar"`` for progress bar or ``None`` or any Proglog logger.

        """
        if not fps:
            if hasattr(self, "fps"):
                fps = self.fps
            else:
                fps = 44100

        if codec is None:
            name, ext = os.path.splitext(os.path.basename(filename))
            try:
                codec = extensions_dict[ext[1:]]["codec"][0]
            except KeyError:
                raise ValueError(
                    "MoviePy couldn't find the codec associated "
                    "with the filename. Provide the 'codec' "
                    "parameter in write_audiofile."
                )

        return ffmpeg_audiowrite(
            self,
            filename,
            fps,
            nbytes,
            buffersize,
            codec=codec,
            bitrate=bitrate,
            write_logfile=write_logfile,
            ffmpeg_params=ffmpeg_params,
            logger=logger,
        )

[docs]    @requires_duration
    def audiopreview(
        self, fps=None, buffersize=2000, nbytes=2, audio_flag=None, video_flag=None
    ):
        """
        Preview an AudioClip using ffplay

        Parameters
        ----------

        fps
            Frame rate of the sound. 44100 gives top quality, but may cause
            problems if your computer is not fast enough and your clip is
            complicated. If the sound jumps during the preview, lower it
            (11025 is still fine, 5000 is tolerable).

        buffersize
            The sound is not generated all at once, but rather made by bunches
            of frames (chunks). ``buffersize`` is the size of such a chunk.
            Try varying it if you meet audio problems (but you shouldn't
            have to).

        nbytes:
            Number of bytes to encode the sound: 1 for 8bit sound, 2 for
            16bit, 4 for 32bit sound. 2 bytes is fine.

        audio_flag, video_flag:
            Instances of class threading events that are used to synchronize
            video and audio during ``VideoClip.preview()``.
        """
        ffplay_audiopreview(
            clip=self,
            fps=fps,
            buffersize=buffersize,
            nbytes=nbytes,
            audio_flag=audio_flag,
            video_flag=video_flag,
        )

    def __add__(self, other):
        if isinstance(other, AudioClip):
            return concatenate_audioclips([self, other])
        return super(AudioClip, self).__add__(other)


[docs]class AudioArrayClip(AudioClip):
    """

    An audio clip made from a sound array.

    Parameters
    ----------

    array
      A Numpy array representing the sound, of size Nx1 for mono,
      Nx2 for stereo.

    fps
      Frames per second : speed at which the sound is supposed to be
      played.

    """

    def __init__(self, array, fps):
        Clip.__init__(self)
        self.array = array
        self.fps = fps
        self.duration = 1.0 * len(array) / fps

        def frame_function(t):
            """Complicated, but must be able to handle the case where t
            is a list of the form sin(t).
            """
            if isinstance(t, np.ndarray):
                array_inds = np.round(self.fps * t).astype(int)
                in_array = (array_inds >= 0) & (array_inds < len(self.array))
                result = np.zeros((len(t), 2))
                result[in_array] = self.array[array_inds[in_array]]
                return result
            else:
                i = int(self.fps * t)
                if i < 0 or i >= len(self.array):
                    return 0 * self.array[0]
                else:
                    return self.array[i]

        self.frame_function = frame_function
        self.nchannels = len(list(self.get_frame(0)))


[docs]class CompositeAudioClip(AudioClip):
    """Clip made by composing several AudioClips.

    An audio clip made by putting together several audio clips.

    Parameters
    ----------

    clips
      List of audio clips, which may start playing at different times or
      together, depends on their ``start`` attributes. If all have their
      ``duration`` attribute set, the duration of the composite clip is
      computed automatically.
    """

    def __init__(self, clips):
        self.clips = clips
        self.nchannels = max(clip.nchannels for clip in self.clips)

        # self.duration is set at AudioClip
        duration = None
        for end in self.ends:
            if end is None:
                break
            duration = max(end, duration or 0)

        # self.fps is set at AudioClip
        fps = None
        for clip in self.clips:
            if hasattr(clip, "fps") and isinstance(clip.fps, numbers.Number):
                fps = max(clip.fps, fps or 0)

        super().__init__(duration=duration, fps=fps)

    @property
    def starts(self):
        """Returns starting times for all clips in the composition."""
        return (clip.start for clip in self.clips)

    @property
    def ends(self):
        """Returns ending times for all clips in the composition."""
        return (clip.end for clip in self.clips)

[docs]    def frame_function(self, t):
        """Renders a frame for the composition for the time ``t``."""
        played_parts = [clip.is_playing(t) for clip in self.clips]

        sounds = [
            clip.get_frame(t - clip.start) * np.array([part]).T
            for clip, part in zip(self.clips, played_parts)
            if (part is not False)
        ]

        if isinstance(t, np.ndarray):
            zero = np.zeros((len(t), self.nchannels))
        else:
            zero = np.zeros(self.nchannels)

        return zero + sum(sounds)


[docs]def concatenate_audioclips(clips):
    """Concatenates one AudioClip after another, in the order that are passed
    to ``clips`` parameter.

    Parameters
    ----------

    clips
      List of audio clips, which will be played one after other.
    """
    # start, end/start2, end2/start3... end
    starts_end = np.cumsum([0, *[clip.duration for clip in clips]])
    newclips = [clip.with_start(t) for clip, t in zip(clips, starts_end[:-1])]

    return CompositeAudioClip(newclips).with_duration(starts_end[-1])