Source code for komoog.audio

# -*- coding: utf-8 -*-
"""
Audio handling and conversion.
"""

import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
import simpleaudio as sa

from komoog.gpx import convert_gpx_tracks_to_arrays, convert_tour_to_gpx_tracks

_NOTES = {
            'C': -9,
            'C#': -8,
            'Db': -8,
            'D': -7,
            'D#': -6,
            'Eb': -6,
            'E' : -5,
            'F': -4,
            'F#': -3,
            'Gb': -3,
            'G': -2,
            'G#': -1,
            'Ab': -1,
            'A': 0,
            'A#': 1,
            'Bb': 1,
            'B': 2,
        }

_TUNE_A = 440 #Hz

[docs]def get_tune(tune):
    """
    Convert a tune value to a frequency.
    """

    if isinstance(tune,str):
        try:
            tune = _TUNE_A * 2.**(_NOTES[tune]/12.)
        except KeyError as e:
            raise ValueError("If `tune` is provided as a string, it has to be any of "+str(list(_NOTES.keys())))

    return tune

[docs]def play_audio(audio_data, sampling_rate):
    """
    Play audio data.
    """
    play_obj = sa.play_buffer(audio_data, 1, 2,sampling_rate)
    play_obj.wait_done()


[docs]def convert_distance_and_elevation_to_signal(distance,
                                             elevation,
                                             max_elevation_difference=0,
                                             ):
    """
    Convert a distance/elevation profile to a signal, i.e. normalize distance
    to range [0,1] and y to range [-1,1].

    Parameters
    ==========
    distance : numpy.ndarray
        Contains the covered 2D distance in meters.
    elevation : numpy.ndarray
        Contains the corresponding elevation profile in meters
    max_elevation_difference : float, default = 0
        Used to control the level of the audio signal. If this
        value is ``<= 0``, the audio level will always be maximized.
        If given a positive value, this value will represent the maximum
        scale of the audio signal. If the elevation profile's elevation
        difference is larger than this value, the signal will simply be
        maximized. A good value is ``max_elevation_difference = 2000``.

    Returns
    =======
    x : numpy.ndarray
        covered distance in range [0,1]
    y : numpy.ndarray
        signal in range [-1,1]
    """

    # assert that data is sorted
    assert(np.all(np.diff(distance) >= 0))

    # assert that arrays have same length
    assert(len(distance) == len(elevation))

    mn = min(elevation)
    mx = max(elevation)
    elevation_diff = mx - mn

    maximize_signal = elevation_diff >= max_elevation_difference

    # position the signal such that the area under the curve is equal
    mean = np.trapz(elevation,distance) / distance[-1]

    if maximize_signal:
        y = (elevation - mn) / elevation_diff * 2. - 1.
    else:
        y = (elevation - mean) / max_elevation_difference * 2.

    x = distance / np.max(distance)

    return x, y

[docs]def convert_distance_and_elevation_to_audio(distance,
                                            elevation,
                                            max_elevation_difference=0,
                                            tune='C',
                                            sampling_rate=44100,
                                            approximate_length_in_seconds=1,
                                            ):
    """
    Convert a distance/elevation profile to an audio signal.

    Parameters
    ==========
    distance : numpy.ndarray
        Contains the covered 2D distance in meters.
    elevation : numpy.ndarray
        Contains the corresponding elevation profile in meters
    max_elevation_difference : float, default = 0
        Used to control the level of the audio signal. If this
        value is ``<= 0``, the audio level will always be maximized.
        If given a positive value, this value will represent the maximum
        scale of the audio signal. If the elevation profile's elevation
        difference is larger than this value, the signal will simply be
        maximized. A good value is ``max_elevation_difference = 2000``.
    tune : str or float
        Desired frequency of the sound. Can be any of

        .. code:: python

            [ 'C', 'C#', 'Db', 'D', 'D#', 'Eb', 'E', 'F', 'F#',
              'Gb', 'G', 'G#', 'Ab', 'A', 'A#', 'Bb', 'B' ]

        where ``'A'`` corresponds to 440Hz.

        Can also be a frequency in Hz.
    sampling_rate : int, default = 44100
        Sampling rate in Hz
    approximate_length_in_seconds : float, default = 1.
        The desired length of the audio signal in seconds
        If equal to zero, will return a single loop.

    Returns
    =======
    audio : numpy.ndarray of numpy.int16
        The transformed audio signal
    sampling_rate : int
        The sampling rate of the audio signal.
    """

    x, y = convert_distance_and_elevation_to_signal(distance,
                                                    elevation,
                                                    max_elevation_difference=max_elevation_difference,
                                                    )

    return convert_signal_to_audio(x,
                                   y,
                                   tune=tune,
                                   sampling_rate=sampling_rate,
                                   approximate_length_in_seconds=approximate_length_in_seconds,
                                  )

[docs]def convert_signal_to_audio(x,
                            y,
                            tune='C',
                            sampling_rate=44100,
                            approximate_length_in_seconds=1,
                            ):
    """
    Convert a normalized distance/elevation signal to an audio signal.

    Parameters
    ==========
    distance : numpy.ndarray
        Contains the covered 2D distance in meters.
    elevation : numpy.ndarray
        Contains the corresponding elevation profile in meters
    tune : str or float
        Desired frequency of the sound. Can be any of

        .. code:: python

            [ 'C', 'C#', 'Db', 'D', 'D#', 'Eb', 'E', 'F', 'F#',
              'Gb', 'G', 'G#', 'Ab', 'A', 'A#', 'Bb', 'B' ]

        where ``'A'`` corresponds to 440Hz.

        Can also be a frequency in Hz.
    sampling_rate : int, default = 44100
        Sampling rate in Hz
    approximate_length_in_seconds : float, default = 1.
        The desired length of the audio signal in seconds
        If equal to zero, will return a single loop.

    Returns
    =======
    audio : numpy.ndarray of numpy.int16
        The transformed audio signal
    sampling_rate : int
        The sampling rate of the audio signal.
    """

    tune = get_tune(tune)

    x_sample = np.linspace(0,1,len(x)*2+1)
    f = interp1d(x, y, kind='linear')
    y_sample = f(x_sample)

    if len(y_sample) > 400:
        window_length = 101
    else:
        window_length = int(len(y_sample) * 0.1)
        if window_length % 2 == 0:
            window_length += 1

    y_filtered = savgol_filter(y_sample, window_length, 2, mode='wrap')

    sampling_t = np.linspace(0,1/tune,int(sampling_rate/tune))

    f = interp1d(x_sample/tune, y_filtered, kind='cubic')
    y = f(sampling_t)

    ymax = np.max(np.abs(y))
    ymax = np.max([1.,ymax])

    # convert to 16-bit data
    audio = y * 32767 / ymax
    raw_audio = audio.astype(np.int16)

    if approximate_length_in_seconds > 0:
        necessary_samples = sampling_rate * approximate_length_in_seconds
        copies = int(np.ceil(necessary_samples/len(raw_audio)))
        audio = np.concatenate([raw_audio]*copies)
    else:
        audio = raw_audio

    return audio, sampling_rate


[docs]def convert_distance_and_elevation_to_profile_audio(
                                            distance,
                                            elevation,
                                            max_elevation_difference=0,
                                            tune='C',
                                            sampling_rate=44100,
                                            approximate_length_in_seconds=1,
                                            ):
    """
    Convert a distance/elevation profile to an audio signal that
    mimicks the elevation profile.

    Parameters
    ==========
    distance : numpy.ndarray
        Contains the covered 2D distance in meters.
    elevation : numpy.ndarray
        Contains the corresponding elevation profile in meters
    max_elevation_difference : float, default = 0
        Used to control the level of the audio signal. If this
        value is ``<= 0``, the audio level will always be maximized.
        If given a positive value, this value will represent the maximum
        scale of the audio signal. If the elevation profile's elevation
        difference is larger than this value, the signal will simply be
        maximized. A good value is ``max_elevation_difference = 2000``.
    tune : str or float
        Desired frequency of the sound. Can be any of

        .. code:: python

            [ 'C', 'C#', 'Db', 'D', 'D#', 'Eb', 'E', 'F', 'F#',
              'Gb', 'G', 'G#', 'Ab', 'A', 'A#', 'Bb', 'B' ]

        where ``'A'`` corresponds to 440Hz.

        Can also be a frequency in Hz.
    sampling_rate : int, default = 44100
        Sampling rate in Hz
    approximate_length_in_seconds : float, default = 1.
        The desired length of the audio signal in seconds
        If equal to zero, will return a single loop.

    Returns
    =======
    audio : numpy.ndarray of numpy.int16
        The transformed audio signal
    sampling_rate : int
        The sampling rate of the audio signal.
    """

    x, y = convert_distance_and_elevation_to_signal(distance,
                                                    elevation,
                                                    max_elevation_difference=max_elevation_difference,
                                                   )

    frequency_scalar = 2**y

    # get length of audio signal
    single_audio, _ = convert_signal_to_audio(
                                   x,
                                   y,
                                   tune=tune,
                                   sampling_rate=sampling_rate,
                                   approximate_length_in_seconds=0,
                                  )

    full_audio, _ = convert_signal_to_audio(
                                   x,
                                   y,
                                   tune=tune,
                                   sampling_rate=sampling_rate,
                                   approximate_length_in_seconds=approximate_length_in_seconds,
                                  )

    copies = int(np.ceil(len(full_audio)/len(single_audio)))
    new_x = x * copies
    f = interp1d(new_x,frequency_scalar,kind='cubic')


    tune = get_tune(tune)

    audios = []
    for copy in range(copies):
        this_audio, _ = convert_signal_to_audio(
                                   x,
                                   y,
                                   tune=tune*f(copy),
                                   sampling_rate=sampling_rate,
                                   approximate_length_in_seconds=0,
                                  )
        audios.append(this_audio)

    return np.concatenate(audios).astype(np.int16), sampling_rate

[docs]def convert_tour_to_audio(tour,
                          max_elevation_difference=0,
                          tune='C',
                          sampling_rate=44100,
                          approximate_length_in_seconds=1,
                          set_tune_to_follow_tour_profile=False,
                          ):
    """
    Convert a hiking tour to audio.

    Parameters
    ==========
    tour : dict
        A komoot tour item as provided by e.g.
        :func:`komoog.io.read_tours`.
    max_elevation_difference : float, default = 0
        Used to control the level of the audio signal. If this
        value is ``<= 0``, the audio level will always be maximized.
        If given a positive value, this value will represent the maximum
        scale of the audio signal. If the elevation profile's elevation
        difference is larger than this value, the signal will simply be
        maximized. A good value is ``max_elevation_difference = 2000``.
    tune : str or float
        Desired frequency of the sound. Can be any of

        .. code:: python

            [ 'C', 'C#', 'Db', 'D', 'D#', 'Eb', 'E', 'F', 'F#',
              'Gb', 'G', 'G#', 'Ab', 'A', 'A#', 'Bb', 'B' ]

        where ``'A'`` corresponds to 440Hz.

        Can also be a frequency in Hz.
    sampling_rate : int, default = 44100
        Sampling rate in Hz
    approximate_length_in_seconds : float, default = 1.
        The desired length of the audio signal in seconds
        If equal to zero, will return a single loop.
    set_tune_to_follow_tour_profile : bool, defaukt = False
        If set to ``True`` the tune of the returned audio
        signal will follow the tour profile.

    Returns
    =======
    audio : numpy.ndarray of numpy.int16
        The transformed audio signal
    sampling_rate : int
        The sampling rate of the audio signal.
    """

    tracks = convert_tour_to_gpx_tracks(tour)
    distance, elevation = convert_gpx_tracks_to_arrays(tracks)

    if set_tune_to_follow_tour_profile:
        audio, _ = convert_distance_and_elevation_to_profile_audio(
                                                    distance,
                                                    elevation,
                                                    max_elevation_difference=max_elevation_difference,
                                                    tune=tune,
                                                    sampling_rate=sampling_rate,
                                                    approximate_length_in_seconds=approximate_length_in_seconds,
                                                    )
    else:
        audio, _ = convert_distance_and_elevation_to_audio(
                                                distance,
                                                elevation,
                                                max_elevation_difference=max_elevation_difference,
                                                tune=tune,
                                                sampling_rate=sampling_rate,
                                                approximate_length_in_seconds=approximate_length_in_seconds,
                                                )

    return audio, sampling_rate

if __name__=="__main__":

    import gpxpy
    import simpleaudio as sa
    import matplotlib.pyplot as pl

    from scipy.io import wavfile

    fn = '/Users/bfmaier/Downloads/Tour.gpx'
    fn = '/Users/bfmaier/Downloads/Tour-2.gpx'
    fn = '/Users/bfmaier/Downloads/Tour-3.gpx'
    fn = '/Users/bfmaier/Downloads/Tour-4.gpx'
    fn = '/Users/bfmaier/Downloads/Tour-5.gpx'

    with open(fn,'r') as gpx_file:
        gpx = gpxpy.parse(gpx_file)

    print(gpx.name)

    distance, elevation = convert_gpx_tracks_to_arrays(gpx.tracks)

    sampling_rate = 44100

    x, y = convert_distance_and_elevation_to_signal(distance, elevation,max_elevation_difference=200)
    y, _ = convert_distance_and_elevation_to_audio(distance, elevation, max_elevation_difference=2000,sampling_rate=sampling_rate)

    x = np.arange(len(y)) / sampling_rate


    # start playback

    # wait for playback to finish before exiting

    #wavfile.write('./tour.wav',sampling_rate,y)

    play_obj = sa.play_buffer(y, 1, 2,sampling_rate)
    play_obj.wait_done()

    pl.plot(x, y)
    pl.show()
    y2, _ = convert_distance_and_elevation_to_profile_audio(distance, elevation, max_elevation_difference=2000,approximate_length_in_seconds=10)
    play_obj = sa.play_buffer(y2, 1, 2,sampling_rate)
    play_obj.wait_done()





    #pl.plot(x, y)
    #pl.ylim(-1,1)
    pl.show()