Source code for komoog.audio

# -*- coding: utf-8 -*-
"""
Audio handling and conversion.
"""

import numpy as np
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
import simpleaudio as sa

from komoog.gpx import convert_gpx_tracks_to_arrays, convert_tour_to_gpx_tracks

_NOTES = {
            'C': -9,
            'C#': -8,
            'Db': -8,
            'D': -7,
            'D#': -6,
            'Eb': -6,
            'E' : -5,
            'F': -4,
            'F#': -3,
            'Gb': -3,
            'G': -2,
            'G#': -1,
            'Ab': -1,
            'A': 0,
            'A#': 1,
            'Bb': 1,
            'B': 2,
        }

_TUNE_A = 440 #Hz

[docs]def get_tune(tune): """ Convert a tune value to a frequency. """ if isinstance(tune,str): try: tune = _TUNE_A * 2.**(_NOTES[tune]/12.) except KeyError as e: raise ValueError("If `tune` is provided as a string, it has to be any of "+str(list(_NOTES.keys()))) return tune
[docs]def play_audio(audio_data, sampling_rate): """ Play audio data. """ play_obj = sa.play_buffer(audio_data, 1, 2,sampling_rate) play_obj.wait_done()
[docs]def convert_distance_and_elevation_to_signal(distance, elevation, max_elevation_difference=0, ): """ Convert a distance/elevation profile to a signal, i.e. normalize distance to range [0,1] and y to range [-1,1]. Parameters ========== distance : numpy.ndarray Contains the covered 2D distance in meters. elevation : numpy.ndarray Contains the corresponding elevation profile in meters max_elevation_difference : float, default = 0 Used to control the level of the audio signal. If this value is ``<= 0``, the audio level will always be maximized. If given a positive value, this value will represent the maximum scale of the audio signal. If the elevation profile's elevation difference is larger than this value, the signal will simply be maximized. A good value is ``max_elevation_difference = 2000``. Returns ======= x : numpy.ndarray covered distance in range [0,1] y : numpy.ndarray signal in range [-1,1] """ # assert that data is sorted assert(np.all(np.diff(distance) >= 0)) # assert that arrays have same length assert(len(distance) == len(elevation)) mn = min(elevation) mx = max(elevation) elevation_diff = mx - mn maximize_signal = elevation_diff >= max_elevation_difference # position the signal such that the area under the curve is equal mean = np.trapz(elevation,distance) / distance[-1] if maximize_signal: y = (elevation - mn) / elevation_diff * 2. - 1. else: y = (elevation - mean) / max_elevation_difference * 2. x = distance / np.max(distance) return x, y
[docs]def convert_distance_and_elevation_to_audio(distance, elevation, max_elevation_difference=0, tune='C', sampling_rate=44100, approximate_length_in_seconds=1, ): """ Convert a distance/elevation profile to an audio signal. Parameters ========== distance : numpy.ndarray Contains the covered 2D distance in meters. elevation : numpy.ndarray Contains the corresponding elevation profile in meters max_elevation_difference : float, default = 0 Used to control the level of the audio signal. If this value is ``<= 0``, the audio level will always be maximized. If given a positive value, this value will represent the maximum scale of the audio signal. If the elevation profile's elevation difference is larger than this value, the signal will simply be maximized. A good value is ``max_elevation_difference = 2000``. tune : str or float Desired frequency of the sound. Can be any of .. code:: python [ 'C', 'C#', 'Db', 'D', 'D#', 'Eb', 'E', 'F', 'F#', 'Gb', 'G', 'G#', 'Ab', 'A', 'A#', 'Bb', 'B' ] where ``'A'`` corresponds to 440Hz. Can also be a frequency in Hz. sampling_rate : int, default = 44100 Sampling rate in Hz approximate_length_in_seconds : float, default = 1. The desired length of the audio signal in seconds If equal to zero, will return a single loop. Returns ======= audio : numpy.ndarray of numpy.int16 The transformed audio signal sampling_rate : int The sampling rate of the audio signal. """ x, y = convert_distance_and_elevation_to_signal(distance, elevation, max_elevation_difference=max_elevation_difference, ) return convert_signal_to_audio(x, y, tune=tune, sampling_rate=sampling_rate, approximate_length_in_seconds=approximate_length_in_seconds, )
[docs]def convert_signal_to_audio(x, y, tune='C', sampling_rate=44100, approximate_length_in_seconds=1, ): """ Convert a normalized distance/elevation signal to an audio signal. Parameters ========== distance : numpy.ndarray Contains the covered 2D distance in meters. elevation : numpy.ndarray Contains the corresponding elevation profile in meters tune : str or float Desired frequency of the sound. Can be any of .. code:: python [ 'C', 'C#', 'Db', 'D', 'D#', 'Eb', 'E', 'F', 'F#', 'Gb', 'G', 'G#', 'Ab', 'A', 'A#', 'Bb', 'B' ] where ``'A'`` corresponds to 440Hz. Can also be a frequency in Hz. sampling_rate : int, default = 44100 Sampling rate in Hz approximate_length_in_seconds : float, default = 1. The desired length of the audio signal in seconds If equal to zero, will return a single loop. Returns ======= audio : numpy.ndarray of numpy.int16 The transformed audio signal sampling_rate : int The sampling rate of the audio signal. """ tune = get_tune(tune) x_sample = np.linspace(0,1,len(x)*2+1) f = interp1d(x, y, kind='linear') y_sample = f(x_sample) if len(y_sample) > 400: window_length = 101 else: window_length = int(len(y_sample) * 0.1) if window_length % 2 == 0: window_length += 1 y_filtered = savgol_filter(y_sample, window_length, 2, mode='wrap') sampling_t = np.linspace(0,1/tune,int(sampling_rate/tune)) f = interp1d(x_sample/tune, y_filtered, kind='cubic') y = f(sampling_t) ymax = np.max(np.abs(y)) ymax = np.max([1.,ymax]) # convert to 16-bit data audio = y * 32767 / ymax raw_audio = audio.astype(np.int16) if approximate_length_in_seconds > 0: necessary_samples = sampling_rate * approximate_length_in_seconds copies = int(np.ceil(necessary_samples/len(raw_audio))) audio = np.concatenate([raw_audio]*copies) else: audio = raw_audio return audio, sampling_rate
[docs]def convert_distance_and_elevation_to_profile_audio( distance, elevation, max_elevation_difference=0, tune='C', sampling_rate=44100, approximate_length_in_seconds=1, ): """ Convert a distance/elevation profile to an audio signal that mimicks the elevation profile. Parameters ========== distance : numpy.ndarray Contains the covered 2D distance in meters. elevation : numpy.ndarray Contains the corresponding elevation profile in meters max_elevation_difference : float, default = 0 Used to control the level of the audio signal. If this value is ``<= 0``, the audio level will always be maximized. If given a positive value, this value will represent the maximum scale of the audio signal. If the elevation profile's elevation difference is larger than this value, the signal will simply be maximized. A good value is ``max_elevation_difference = 2000``. tune : str or float Desired frequency of the sound. Can be any of .. code:: python [ 'C', 'C#', 'Db', 'D', 'D#', 'Eb', 'E', 'F', 'F#', 'Gb', 'G', 'G#', 'Ab', 'A', 'A#', 'Bb', 'B' ] where ``'A'`` corresponds to 440Hz. Can also be a frequency in Hz. sampling_rate : int, default = 44100 Sampling rate in Hz approximate_length_in_seconds : float, default = 1. The desired length of the audio signal in seconds If equal to zero, will return a single loop. Returns ======= audio : numpy.ndarray of numpy.int16 The transformed audio signal sampling_rate : int The sampling rate of the audio signal. """ x, y = convert_distance_and_elevation_to_signal(distance, elevation, max_elevation_difference=max_elevation_difference, ) frequency_scalar = 2**y # get length of audio signal single_audio, _ = convert_signal_to_audio( x, y, tune=tune, sampling_rate=sampling_rate, approximate_length_in_seconds=0, ) full_audio, _ = convert_signal_to_audio( x, y, tune=tune, sampling_rate=sampling_rate, approximate_length_in_seconds=approximate_length_in_seconds, ) copies = int(np.ceil(len(full_audio)/len(single_audio))) new_x = x * copies f = interp1d(new_x,frequency_scalar,kind='cubic') tune = get_tune(tune) audios = [] for copy in range(copies): this_audio, _ = convert_signal_to_audio( x, y, tune=tune*f(copy), sampling_rate=sampling_rate, approximate_length_in_seconds=0, ) audios.append(this_audio) return np.concatenate(audios).astype(np.int16), sampling_rate
[docs]def convert_tour_to_audio(tour, max_elevation_difference=0, tune='C', sampling_rate=44100, approximate_length_in_seconds=1, set_tune_to_follow_tour_profile=False, ): """ Convert a hiking tour to audio. Parameters ========== tour : dict A komoot tour item as provided by e.g. :func:`komoog.io.read_tours`. max_elevation_difference : float, default = 0 Used to control the level of the audio signal. If this value is ``<= 0``, the audio level will always be maximized. If given a positive value, this value will represent the maximum scale of the audio signal. If the elevation profile's elevation difference is larger than this value, the signal will simply be maximized. A good value is ``max_elevation_difference = 2000``. tune : str or float Desired frequency of the sound. Can be any of .. code:: python [ 'C', 'C#', 'Db', 'D', 'D#', 'Eb', 'E', 'F', 'F#', 'Gb', 'G', 'G#', 'Ab', 'A', 'A#', 'Bb', 'B' ] where ``'A'`` corresponds to 440Hz. Can also be a frequency in Hz. sampling_rate : int, default = 44100 Sampling rate in Hz approximate_length_in_seconds : float, default = 1. The desired length of the audio signal in seconds If equal to zero, will return a single loop. set_tune_to_follow_tour_profile : bool, defaukt = False If set to ``True`` the tune of the returned audio signal will follow the tour profile. Returns ======= audio : numpy.ndarray of numpy.int16 The transformed audio signal sampling_rate : int The sampling rate of the audio signal. """ tracks = convert_tour_to_gpx_tracks(tour) distance, elevation = convert_gpx_tracks_to_arrays(tracks) if set_tune_to_follow_tour_profile: audio, _ = convert_distance_and_elevation_to_profile_audio( distance, elevation, max_elevation_difference=max_elevation_difference, tune=tune, sampling_rate=sampling_rate, approximate_length_in_seconds=approximate_length_in_seconds, ) else: audio, _ = convert_distance_and_elevation_to_audio( distance, elevation, max_elevation_difference=max_elevation_difference, tune=tune, sampling_rate=sampling_rate, approximate_length_in_seconds=approximate_length_in_seconds, ) return audio, sampling_rate
if __name__=="__main__": import gpxpy import simpleaudio as sa import matplotlib.pyplot as pl from scipy.io import wavfile fn = '/Users/bfmaier/Downloads/Tour.gpx' fn = '/Users/bfmaier/Downloads/Tour-2.gpx' fn = '/Users/bfmaier/Downloads/Tour-3.gpx' fn = '/Users/bfmaier/Downloads/Tour-4.gpx' fn = '/Users/bfmaier/Downloads/Tour-5.gpx' with open(fn,'r') as gpx_file: gpx = gpxpy.parse(gpx_file) print(gpx.name) distance, elevation = convert_gpx_tracks_to_arrays(gpx.tracks) sampling_rate = 44100 x, y = convert_distance_and_elevation_to_signal(distance, elevation,max_elevation_difference=200) y, _ = convert_distance_and_elevation_to_audio(distance, elevation, max_elevation_difference=2000,sampling_rate=sampling_rate) x = np.arange(len(y)) / sampling_rate # start playback # wait for playback to finish before exiting #wavfile.write('./tour.wav',sampling_rate,y) play_obj = sa.play_buffer(y, 1, 2,sampling_rate) play_obj.wait_done() pl.plot(x, y) pl.show() y2, _ = convert_distance_and_elevation_to_profile_audio(distance, elevation, max_elevation_difference=2000,approximate_length_in_seconds=10) play_obj = sa.play_buffer(y2, 1, 2,sampling_rate) play_obj.wait_done() #pl.plot(x, y) #pl.ylim(-1,1) pl.show()