Title: | Tools for Voice Analysis, Speaker Recognition and Mood Inference |
---|---|
Description: | Tools for voice analysis, speaker recognition and mood inference. |
Authors: | Zabala Filipe J. [cre, aut] |
Maintainer: | Zabala Filipe J. <[email protected]> |
License: | GPL-3 |
Version: | 0.4.25 |
Built: | 2024-11-06 05:12:45 UTC |
Source: | https://github.com/filipezabala/voice |
Returns the total time of audio files in seconds
audio_time(x, filesRange = NULL, recursive = FALSE)
audio_time(x, filesRange = NULL, recursive = FALSE)
x |
Either a WAV file or a directory containing WAV files. |
filesRange |
The desired range of directory files (default: |
recursive |
Logical. Should the listing recursively into directories? (default: |
A tibble containing file name <chr> and audio time <dbl> in seconds.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) # Tibble containing file name and audio time (at <- voice::audio_time(unique(dirname(path2wav)))) str(at)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) # Tibble containing file name and audio time (at <- voice::audio_time(unique(dirname(path2wav)))) str(at)
Diarization of WAV audios.
diarize( fromWav, toRttm = NULL, autoDir = FALSE, pycall = "~/miniconda3/envs/pyvoice/bin/python", token = NULL )
diarize( fromWav, toRttm = NULL, autoDir = FALSE, pycall = "~/miniconda3/envs/pyvoice/bin/python", token = NULL )
fromWav |
Either a file or a directory containing WAV files. |
toRttm |
A directory to write RTTM files. If the default |
autoDir |
Logical. Must the directories tree be created? Default: |
pycall |
Python call. See https://github.com/filipezabala/voice for details. |
token |
Access token needed to instantiate pretrained speaker diarization pipeline from pyannote.audio. #1 Install pyannote.audio 3.1 with pip install pyannote.audio (already listed @ https://raw.githubusercontent.com/filipezabala/voice/master/requirements.txt). #2. Accept https://hf.co/pyannote/segmentation-3.0 user conditions. #3 Accept https://hf.co/pyannote-speaker-diarization-3.1 user conditions. #4. Create access token at https://hf.co/settings/tokens. |
When autoDir = TRUE
, the following directories are created: '../mp3'
,'../rttm'
, '../split'
and '../musicxml'
. Use getwd()
to find the parent directory '../'
.
RTTM files in NIST standard. See 'voice::read_rttm'.
## Not run: library(voice) wavDir <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) voice::diarize(fromWav = unique(dirname(wavDir)), toRttm = tempdir(), token = NULL) # Must enter a token! See documentation. (rttm <- dir(tempdir(), '.[Rr][Tt][Tt][Mm]$', full.names = TRUE)) file.info(rttm) ## End(Not run)
## Not run: library(voice) wavDir <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) voice::diarize(fromWav = unique(dirname(wavDir)), toRttm = tempdir(), token = NULL) # Must enter a token! See documentation. (rttm <- dir(tempdir(), '.[Rr][Tt][Tt][Mm]$', full.names = TRUE)) file.info(rttm) ## End(Not run)
Duration of sequences
duration(x, windowShift = 5)
duration(x, windowShift = 5)
x |
A vector containing symbols and |
windowShift |
Window shift to duration in ms (default: 5.0). |
A data frame with duration in number of lines/ocurrences (dur_line
), milliseconds (dur_ms
) and proportional (dur_prop
).
library(voice) duration(letters) duration(c('a','a','a',letters,'z')) nts <- c('NA','NA','A3','A3','A3','A3','A#3','B3','B3','C4','C4','C4','C4', 'C4','C4','C#4','C4','C4','C4','B3','A#3','NA','NA','NA','NA','NA','NA','NA', 'NA','NA','NA','NA','NA','NA','NA','NA','NA','NA','NA','D4','D4','D4','C#4', 'C#4','C#4','C4','C4','B3','B3','A#3','A#3','A3','A3','G3','G#3','G3','F#3') duration(nts)
library(voice) duration(letters) duration(c('a','a','a',letters,'z')) nts <- c('NA','NA','A3','A3','A3','A3','A#3','B3','B3','C4','C4','C4','C4', 'C4','C4','C#4','C4','C4','C4','B3','A#3','NA','NA','NA','NA','NA','NA','NA', 'NA','NA','NA','NA','NA','NA','NA','NA','NA','NA','NA','D4','D4','D4','C#4', 'C#4','C#4','C4','C4','B3','B3','A#3','A#3','A3','A3','G3','G#3','G3','F#3') duration(nts)
Enrich Rich Transcription Time Marked (RTTM) files obtained from 'voice::read_rttm
'.
enrich_rttm(listRttm, silence.gap = 0.5, as.tibble = TRUE)
enrich_rttm(listRttm, silence.gap = 0.5, as.tibble = TRUE)
listRttm |
A list containing RTTM files. |
silence.gap |
The silence gap (in seconds) between adjacent words in a keyword. Rows with |
as.tibble |
Logical. Should it return a tibble? |
A list containing either data frames or tibbles obtained from standard RTTM files. See 'voice::read_rttm'.
https://www.nist.gov/system/files/documents/itl/iad/mig/KWS15-evalplan-v05.pdf
voice::read_rttm
library(voice) url0 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' destfile0 <- paste0(tempdir(), '/sherlock0.rttm') download.file(url0, destfile = destfile0) url1 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock1.rttm' destfile1 <- paste0(tempdir(), '/sherlock1.rttm') download.file(url0, destfile = destfile1) rttm <- voice::read_rttm(dirname(destfile0)) (er <- voice::enrich_rttm(rttm)) class(er) lapply(er, class)
library(voice) url0 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' destfile0 <- paste0(tempdir(), '/sherlock0.rttm') download.file(url0, destfile = destfile0) url1 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock1.rttm' destfile1 <- paste0(tempdir(), '/sherlock1.rttm') download.file(url0, destfile = destfile1) rttm <- voice::read_rttm(dirname(destfile0)) (er <- voice::enrich_rttm(rttm)) class(er) lapply(er, class)
Expand model given y
and x
variables.
expand_model(y, x, k)
expand_model(y, x, k)
y |
The Y variable. |
x |
The X variables. |
k |
Number of additive components. |
A char
vector containing the expanded models.
library(voice) expand_model('y', LETTERS[1:4], 1) expand_model('y', LETTERS[1:4], 2) expand_model('y', LETTERS[1:4], 3) expand_model('y', LETTERS[1:4], 4) # multiple models using apply functions nx <- 10 # number of X variables to be used models <- lapply(1:nx, expand_model, y = 'y', x = LETTERS[1:nx]) names(models) <- 1:nx models sum(sapply(models, length)) # total of models
library(voice) expand_model('y', LETTERS[1:4], 1) expand_model('y', LETTERS[1:4], 2) expand_model('y', LETTERS[1:4], 3) expand_model('y', LETTERS[1:4], 4) # multiple models using apply functions nx <- 10 # number of X variables to be used models <- lapply(1:nx, expand_model, y = 'y', x = LETTERS[1:nx]) names(models) <- 1:nx models sum(sapply(models, length)) # total of models
Extracts features from WAV audio files.
extract_features( x, features = c("f0", "fmt", "rf", "rpf", "rcf", "rfc", "mfcc"), filesRange = NULL, sex = "u", windowShift = 10, numFormants = 8, numcep = 12, dcttype = c("t2", "t1", "t3", "t4"), fbtype = c("mel", "htkmel", "fcmel", "bark"), resolution = 40, usecmp = FALSE, mc.cores = 1, full.names = TRUE, recursive = FALSE, check.mono = FALSE, stereo2mono = FALSE, overwrite = FALSE, freq = 44100, round.to = NULL, verbose = FALSE, pycall = "~/miniconda3/envs/pyvoice/bin/python" )
extract_features( x, features = c("f0", "fmt", "rf", "rpf", "rcf", "rfc", "mfcc"), filesRange = NULL, sex = "u", windowShift = 10, numFormants = 8, numcep = 12, dcttype = c("t2", "t1", "t3", "t4"), fbtype = c("mel", "htkmel", "fcmel", "bark"), resolution = 40, usecmp = FALSE, mc.cores = 1, full.names = TRUE, recursive = FALSE, check.mono = FALSE, stereo2mono = FALSE, overwrite = FALSE, freq = 44100, round.to = NULL, verbose = FALSE, pycall = "~/miniconda3/envs/pyvoice/bin/python" )
x |
A vector containing either files or directories of audio files in WAV format. |
features |
Vector of features to be extracted. (Default: |
filesRange |
The desired range of directory files (Default: |
sex |
|
windowShift |
|
numFormants |
|
numcep |
Number of Mel-frequency cepstral coefficients (cepstra) to return (Default: |
dcttype |
Type of DCT used. |
fbtype |
Auditory frequency scale to use: |
resolution |
|
usecmp |
Logical. Apply equal-loudness weighting and cube-root compression (PLP instead of LPC) (Default: |
mc.cores |
Number of cores to be used in parallel processing. (Default: |
full.names |
Logical. If |
recursive |
Logical. Should the listing recursively into directories? (Default: |
check.mono |
Logical. Check if the WAV file is mono. (Default: |
stereo2mono |
(Experimental) Logical. Should files be converted from stereo to mono? (Default: |
overwrite |
(Experimental) Logical. Should converted files be overwritten? If not, the file gets the suffix |
freq |
Frequency in Hz to write the converted files when |
round.to |
Number of decimal places to round to. (Default: |
verbose |
Logical. Should the running status be showed? (Default: |
pycall |
Python call. See https://github.com/filipezabala/voice for details. |
The feature 'df' corresponds to 'formant dispersion' (df2:df8) by
Fitch (1997), 'pf' to formant position' (pf1:pf8) by Puts, Apicella & Cárdena
(2011), 'rf' to 'formant removal' (rf1:rf8) by Zabala (2023), 'rcf' to
'formant cumulated removal' (rcf2:rcf8) by Zabala (2023) and 'rpf' to
'formant position removal' (rpf2:rpf8) by Zabala (2023).
The 'fmt_praat'
feature may take long time processing. The following
features may contain a variable number of columns: 'cep'
, 'dft'
,
'css'
and 'lps'
.
A Media data frame containing the selected features.
Levinson N. (1946). The Wiener (root mean square) error criterion in filter design and prediction. Journal of Mathematics and Physics, 25(1-4), 261–278. (doi:10.1002/SAPM1946251261)
Durbin J. (1960). “The fitting of time-series models.” Revue de l’Institut International de Statistique, pp. 233–244. (https://www.jstor.org/stable/1401322)
Cooley J.W., Tukey J.W. (1965). “An algorithm for the machine calculation of complex Fourier series.” Mathematics of computation, 19(90), 297–301. (https://www.ams.org/journals/mcom/1965-19-090/S0025-5718-1965-0178586-1/)
Wasson D., Donaldson R. (1975). “Speech amplitude and zero crossings for automated identification of human speakers.” IEEE Transactions on Acoustics, Speech, and Signal Processing, 23(4), 390–392. (https://ieeexplore.ieee.org/document/1162690)
Allen J. (1977). “Short term spectral analysis, synthesis, and modification by discrete Fourier transform.” IEEE Transactions on Acoustics, Speech, and Signal Processing, 25(3), 235– 238. (https://ieeexplore.ieee.org/document/1162950)
Schäfer-Vincent K. (1982). "Significant points: Pitch period detection as a problem of segmentation." Phonetica, 39(4-5), 241–253. (doi:10.1159/000261665 )
Schäfer-Vincent K. (1983). "Pitch period detection and chaining: Method and evaluation." Phonetica, 40(3), 177–202. (doi:10.1159/000261691)
Ephraim Y., Malah D. (1984). “Speech enhancement using a minimum-mean square error short-time spectral amplitude estimator.” IEEE Transactions on acoustics, speech, and signal processing, 32(6), 1109–1121. (https://ieeexplore.ieee.org/document/1164453)
Delsarte P., Genin Y. (1986). “The split Levinson algorithm.” IEEE transactions on acoustics, speech, and signal processing, 34(3), 470–478. (https://ieeexplore.ieee.org/document/1164830)
Jackson J.C. (1995). "The Harmonic Sieve: A Novel Application of Fourier Analysis to Machine Learning Theory and Practice." Technical report, Carnegie-Mellon University Pittsburgh PA Schoo; of Computer Science. (https://apps.dtic.mil/sti/pdfs/ADA303368.pdf)
Fitch, W.T. (1997) "Vocal tract length and formant frequency dispersion correlate with body size in rhesus macaques." J. Acoust. Soc. Am. 102, 1213 – 1222. (doi:10.1121/1.421048)
Boersma P., van Heuven V. (2001). Praat, a system for doing phonetics by computer. Glot. Int., 5(9/10), 341–347. (https://www.fon.hum.uva.nl/paul/papers/speakUnspeakPraat_glot2001.pdf)
Ellis DPW (2005). “PLP and RASTA (and MFCC, and inversion) in Matlab.” Online web resource. (https://www.ee.columbia.edu/~dpwe/resources/matlab/rastamat/)
Puts, D.A., Apicella, C.L., Cardenas, R.A. (2012) "Masculine voices signal men's threat potential in forager and industrial societies." Proc. R. Soc. B Biol. Sci. 279, 601–609. (doi:10.1098/rspb.2011.0829)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # minimal usage M1 <- extract_features(path2wav) M2 <- extract_features(dirname(path2wav)) identical(M1,M2) table(basename(M1$wav_path)) # limiting filesRange M3 <- extract_features(path2wav, filesRange = 3:6) table(basename(M3$wav_path))
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # minimal usage M1 <- extract_features(path2wav) M2 <- extract_features(dirname(path2wav)) identical(M1,M2) table(basename(M1$wav_path)) # limiting filesRange M3 <- extract_features(path2wav, filesRange = 3:6) table(basename(M3$wav_path))
Returns summary measures of 'voice::extract_features'.
feat_summary( x, groupBy = "wav_path", wavPath = unique(x$wav_path), wavPathName = "wav_path", features = "f0", filesRange = NULL, sex = "u", windowShift = 10, numFormants = 8, numcep = 12, dcttype = c("t2", "t1", "t3", "t4"), fbtype = c("mel", "htkmel", "fcmel", "bark"), resolution = 40, usecmp = FALSE, mc.cores = 1, full.names = TRUE, recursive = FALSE, check.mono = FALSE, stereo2mono = FALSE, overwrite = FALSE, freq = 44100, round.to = 4, verbose = FALSE )
feat_summary( x, groupBy = "wav_path", wavPath = unique(x$wav_path), wavPathName = "wav_path", features = "f0", filesRange = NULL, sex = "u", windowShift = 10, numFormants = 8, numcep = 12, dcttype = c("t2", "t1", "t3", "t4"), fbtype = c("mel", "htkmel", "fcmel", "bark"), resolution = 40, usecmp = FALSE, mc.cores = 1, full.names = TRUE, recursive = FALSE, check.mono = FALSE, stereo2mono = FALSE, overwrite = FALSE, freq = 44100, round.to = 4, verbose = FALSE )
x |
An Extended data frame to be tagged with media information. |
groupBy |
A variable to group the summary measures. The argument must be a character vector. (Default: |
wavPath |
A vector containing the path(s) to WAV files. May be both as |
wavPathName |
A string containing the WAV path name. (Default: |
features |
Vector of features to be extracted. (Default: |
filesRange |
The desired range of directory files (default: |
sex |
|
windowShift |
|
numFormants |
|
numcep |
Number of Mel-frequency cepstral coefficients (cepstra) to return (Default: |
dcttype |
Type of DCT used. |
fbtype |
Auditory frequency scale to use: |
resolution |
|
usecmp |
Logical. Apply equal-loudness weighting and cube-root compression (PLP instead of LPC) (Default: |
mc.cores |
Number of cores to be used in parallel processing. (Default: |
full.names |
Logical. If |
recursive |
Logical. Should the listing recursively into directories? (Default: |
check.mono |
Logical. Check if the WAV file is mono. (Default: |
stereo2mono |
(Experimental) Logical. Should files be converted from stereo to mono? (Default: |
overwrite |
(Experimental) Logical. Should converted files be overwritten? If not, the file gets the suffix |
freq |
Frequency in Hz to write the converted files when |
round.to |
Number of decimal places to round to. (Default: |
verbose |
Logical. Should the running status be showed? (Default: |
filesRange
should only be used when all the WAV files are in the same folder.
A tibble data frame containing summarized numeric columns using (1) mean, (2) standard deviation, (3) variation coefficient, (4) median, (5) interquartile range and (6) median absolute deviation.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # creating Extended synthetic data E <- dplyr::tibble(subject_id = c(1,1,1,2,2,2,3,3,3), wav_path = path2wav) # minimal usage feat_summary(E) # canonical data feat_summary(E, groupBy = 'subject_id')
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # creating Extended synthetic data E <- dplyr::tibble(subject_id = c(1,1,1,2,2,2,3,3,3), wav_path = path2wav) # minimal usage feat_summary(E) # canonical data feat_summary(E, groupBy = 'subject_id')
Get bit rate from WAV file.
get_bit(x)
get_bit(x)
x |
Wave object from 'tuneR::readWave'. |
Integer indicating the bit rate from a WAV file.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) voice::get_bit(rw) rwl <- lapply(path2wav, tuneR::readWave) sapply(rwl, voice::get_bit)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) voice::get_bit(rw) rwl <- lapply(path2wav, tuneR::readWave) sapply(rwl, voice::get_bit)
Get time duration from WAV file.
get_dur(x)
get_dur(x)
x |
Wave object from 'tuneR::readWave'. |
Numeric indicating the time duration in seconds from a WAV file.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) voice::get_dur(rw) rwl <- lapply(path2wav, tuneR::readWave) sapply(rwl, voice::get_dur)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) voice::get_dur(rw) rwl <- lapply(path2wav, tuneR::readWave) sapply(rwl, voice::get_dur)
Get left channel from WAV file.
get_left(x)
get_left(x)
x |
Wave object from 'tuneR::readWave'. |
Numeric vector indicating the left channel from a WAV file.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) l <- voice::get_left(rw) head(l) length(l)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) l <- voice::get_left(rw) head(l) length(l)
Get right channel from WAV file.
get_right(x)
get_right(x)
x |
Wave object from 'tuneR::readWave'. |
Numeric vector indicating the right channel from a WAV file.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) r <- voice::get_right(rw) head(r) length(r)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) r <- voice::get_right(rw) head(r) length(r)
Get sample rate from WAV file.
get_samp.rate(x)
get_samp.rate(x)
x |
Wave object from 'tuneR::readWave'. |
Integer indicating the sample rate from a WAV file.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) voice::get_samp.rate(rw) rwl <- lapply(path2wav, tuneR::readWave) sapply(rwl, voice::get_samp.rate)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern <- glob2rx('*.wav'), full.names = TRUE) rw <- tuneR::readWave(path2wav[1]) voice::get_samp.rate(rw) rwl <- lapply(path2wav, tuneR::readWave) sapply(rwl, voice::get_samp.rate)
Get time beginning from a data frame in RTTM standard.
get_tbeg(x)
get_tbeg(x)
x |
A data frame in RTTM standard. See 'voice::read_rttm'. |
Numeric vector containing the time beginning in seconds.
library(voice) url0 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' download.file(url0, destfile = paste0(tempdir(), '/sherlock0.rttm')) rttm <- voice::read_rttm(tempdir()) (gtb <- voice::get_tbeg(rttm$sherlock0.rttm)) class(gtb)
library(voice) url0 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' download.file(url0, destfile = paste0(tempdir(), '/sherlock0.rttm')) rttm <- voice::read_rttm(tempdir()) (gtb <- voice::get_tbeg(rttm$sherlock0.rttm)) class(gtb)
Get time duration from a data frame in RTTM standard.
get_tdur(x)
get_tdur(x)
x |
A data frame in RTTM standard. See 'voice::read_rttm'. |
Numeric vector containing the time duration in seconds.
library(voice) url0 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' download.file(url0, destfile = paste0(tempdir(), '/sherlock0.rttm')) rttm <- voice::read_rttm(tempdir()) (gtd <- voice::get_tdur(rttm$sherlock0.rttm)) class(gtd)
library(voice) url0 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' download.file(url0, destfile = paste0(tempdir(), '/sherlock0.rttm')) rttm <- voice::read_rttm(tempdir()) (gtd <- voice::get_tdur(rttm$sherlock0.rttm)) class(gtd)
Interpolate vactors, compressing to compact.to
fraction. May remove zeros.
interp( y, compact.to, drop.zeros = FALSE, to.data.frame = FALSE, round.off = NULL, weight = NULL )
interp( y, compact.to, drop.zeros = FALSE, to.data.frame = FALSE, round.off = NULL, weight = NULL )
y |
A vector or time series. |
compact.to |
Proportion of remaining points after compaction, between (including) 0 and 1. If equals to 1 and keep.zeros = TRUE, the original vector is presented. |
drop.zeros |
Logical. Drop repeated zeros? Default: |
to.data.frame |
Logical. Convert to data frame? Default: |
round.off |
Number of decimal places of the interpolated |
weight |
Vector of weights with same length of |
A list of interpolated x
and y
values with length near to compact.to*length(y)
.
rm0
, interp_mc
, interp_df
library(voice) v1 <- 1:100 (c1 <- interp(v1, compact.to = 0.2)) length(c1$y) plot(1:100, type = 'l') points(c1$x, c1$y, col='red') # with weight (c2 <- interp(v1, compact.to = 0.2, weight = rev(v1))) plot(c1$y) points(c2$y, col = 'red') (v2 <- c(1:5, rep(0,10), 1:10, rep(0,5), 10:20, rep(0,10))) length(v2) interp(v2, 0.1, drop.zeros = TRUE, to.data.frame = FALSE) interp(v2, 0.1, drop.zeros = TRUE, to.data.frame = TRUE) interp(v2, 0.2, drop.zeros = TRUE) interp(v2, 0.2, drop.zeros = FALSE) (v3 <- c(rep(0,10), 1:20, rep(0,3))) (c3 <- interp(v3, 1/3, drop.zeros = FALSE, to.data.frame = FALSE)) lapply(c3, length) plot(v3, type = 'l') points(c3$x, c3$y, col = 'red') (v4 <- c(rnorm(1:100))) (c4 <- interp(v4, 1/4, round.off = 3))
library(voice) v1 <- 1:100 (c1 <- interp(v1, compact.to = 0.2)) length(c1$y) plot(1:100, type = 'l') points(c1$x, c1$y, col='red') # with weight (c2 <- interp(v1, compact.to = 0.2, weight = rev(v1))) plot(c1$y) points(c2$y, col = 'red') (v2 <- c(1:5, rep(0,10), 1:10, rep(0,5), 10:20, rep(0,10))) length(v2) interp(v2, 0.1, drop.zeros = TRUE, to.data.frame = FALSE) interp(v2, 0.1, drop.zeros = TRUE, to.data.frame = TRUE) interp(v2, 0.2, drop.zeros = TRUE) interp(v2, 0.2, drop.zeros = FALSE) (v3 <- c(rep(0,10), 1:20, rep(0,3))) (c3 <- interp(v3, 1/3, drop.zeros = FALSE, to.data.frame = FALSE)) lapply(c3, length) plot(v3, type = 'l') points(c3$x, c3$y, col = 'red') (v4 <- c(rnorm(1:100))) (c4 <- interp(v4, 1/4, round.off = 3))
Interpolate data frames using multicore, compressing to compact.to
fraction. May remove zeros.
interp_df( x, compact.to, id = colnames(x)[1], colnum = NULL, drop.x = TRUE, drop.zeros = FALSE, to.data.frame = TRUE, round.off = NULL, weight = NULL, mc.cores = 1 )
interp_df( x, compact.to, id = colnames(x)[1], colnum = NULL, drop.x = TRUE, drop.zeros = FALSE, to.data.frame = TRUE, round.off = NULL, weight = NULL, mc.cores = 1 )
x |
A data frame. |
compact.to |
Proportion of remaining points after interpolation. If equals to 1 and keep.zeros = TRUE, the original vector is presented. |
id |
The identification column. Default: |
colnum |
A |
drop.x |
Logical. Drop columns containing .x? Default: |
drop.zeros |
Logical. Drop repeated zeros or keep 1 zero per null set? Default: |
to.data.frame |
Logical. Should return a data frame? If |
round.off |
Number of decimal places of the interpolated |
weight |
Vector of weights with same length of |
mc.cores |
The number of cores to mclapply. Default: |
A data frame of interpolated values with nrow near to compact.to*length(x)
.
interp
, interp_mc
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # getting Media data frame via lean call M <- extract_features(dirname(path2wav), features = c('f0','fmt'), mc.cores = 1, verbose = FALSE) (cM.df <- interp_df(M[,-(1:2)], 0.1, mc.cores = 1)) (cM.df2 <- interp_df(M[,-(1:2)], 0.1, drop.x = FALSE, mc.cores = 1)) dim(M) dim(cM.df) dim(cM.df2) (cM.list <- interp_df(M[,-(1:2)], 0.1, to.data.frame = FALSE, mc.cores = 1))
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # getting Media data frame via lean call M <- extract_features(dirname(path2wav), features = c('f0','fmt'), mc.cores = 1, verbose = FALSE) (cM.df <- interp_df(M[,-(1:2)], 0.1, mc.cores = 1)) (cM.df2 <- interp_df(M[,-(1:2)], 0.1, drop.x = FALSE, mc.cores = 1)) dim(M) dim(cM.df) dim(cM.df2) (cM.list <- interp_df(M[,-(1:2)], 0.1, to.data.frame = FALSE, mc.cores = 1))
Interpolate vectors using multicore
interp_mc( y, compact.to, drop.zeros = FALSE, to.data.frame = FALSE, round.off = NULL, weight = NULL, mc.cores = 1 )
interp_mc( y, compact.to, drop.zeros = FALSE, to.data.frame = FALSE, round.off = NULL, weight = NULL, mc.cores = 1 )
y |
A numeric vector, matrix or data frame. |
compact.to |
Proportion of remaining points after compression. If equals to 1 and keep.zeros = TRUE, the original vector is presented. |
drop.zeros |
Logical. Drop repeated zeros? Default: |
to.data.frame |
Logical. Convert to data frame? Default: |
round.off |
Number of decimal places of the interpolated |
weight |
Vector of weights with same length of |
mc.cores |
The number of cores to mclapply. Default: |
A list of x and y convoluted values with length near to compact.to*length(y)
.
rm0
, interp
, interp_df
library(voice) # Same result of interp() function if x is a vector interp(1:100, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = FALSE) interp_mc(1:100, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = FALSE) interp(1:100, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = TRUE) interp_mc(1:100, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = TRUE) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # getting Media data frame M <- voice::extract_features(dirname(path2wav), mc.cores = 1, verbose = FALSE) M.num <- M[,-(1:3)] nrow(M.num) cm1 <- interp_mc(M.num, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = FALSE, mc.cores = 1) names(cm1) lapply(cm1$f0, length)
library(voice) # Same result of interp() function if x is a vector interp(1:100, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = FALSE) interp_mc(1:100, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = FALSE) interp(1:100, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = TRUE) interp_mc(1:100, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = TRUE) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # getting Media data frame M <- voice::extract_features(dirname(path2wav), mc.cores = 1, verbose = FALSE) M.num <- M[,-(1:3)] nrow(M.num) cm1 <- interp_mc(M.num, compact.to = 0.1, drop.zeros = TRUE, to.data.frame = FALSE, mc.cores = 1) names(cm1) lapply(cm1$f0, length)
Verify if an audio is mono
is_mono(x)
is_mono(x)
x |
Path to WAV audio file. |
Logical. 'TRUE' indicates a mono (one-channel) file. 'FALSE' indicates a non-mono (two-channel) file.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) is_mono(path2wav[1]) sapply(path2wav, is_mono)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) is_mono(path2wav[1]) sapply(path2wav, is_mono)
A dataset containing sample IDs and paths from Ardila et al (2019) 'Common voice: A massively-multilingual speech corpus', used in Zabala (2023) 'voice: new approaches to audio analysis'. The considered sample contains 34,425 rows associated with 838 IDs (p_s = 2.4%).
mozilla_id_path
mozilla_id_path
Ardila R, Branson M, Davis K, Henretty M, Kohler M, Meyer J, Morais R, Saunders L, Tyers FM, Weber G (2019). "Common voice: A massively-multilingual speech corpus." arXiv preprint arXiv:1912.06670. URL https://arxiv.org/abs/1912.06670.
library(voice) mozilla_id_path
library(voice) mozilla_id_path
Returns a vector of notes for equal-tempered scale, A4 = 440 Hz.
notes(x, method = "spn", moving.average = FALSE, k = 11)
notes(x, method = "spn", moving.average = FALSE, k = 11)
x |
Numeric vector of frequencies in Hz. |
method |
Method of specifying musical pitch. (Default: |
moving.average |
Logical. Must apply moving average? (Default: |
k |
Integer width of the rolling window used if moving.average is TRUE. (Default: |
The symbol '#' is being used to represent a sharp note, the higher in pitch by one semitone on Scientific Pitch Notation (SPN).
A vector containing the notes for equal-tempered scale, A4 = 440 Hz. When ‘method = ’spn'‘ the vector is of class ’ordered factor'. When ‘method = ’octave'‘ the vector is of class ’factor'. When ‘method = ’midi'‘ the vector is of class ’integer'.
https://pages.mtu.edu/~suits/notefreqs.html
notes_freq
library(voice) notes(c(220,440,880)) notes(c(220,440,880), method = 'octave') notes(c(220,440,880), method = 'midi')
library(voice) notes(c(220,440,880)) notes(c(220,440,880), method = 'octave') notes(c(220,440,880), method = 'midi')
Returns a tibble of frequencies on Scientific Pitch Notation (SPN) for equal-tempered scale, A4 = 440 Hz.
notes_freq()
notes_freq()
The symbol '#' is being used to represent a sharp note, the higher in pitch by one semitone. The SPN is also known as American Standard Pitch Notation (ASPN) or International Pitch Notation (IPN).
A tibble with frequencies for equal-tempered scale, A4 = 440 Hz.
https://pages.mtu.edu/~suits/notefreqs.html
notes
library(voice) notes_freq()
library(voice) notes_freq()
Read Rich Transcription Time Marked (RTTM) files in fromRttm
directory.
read_rttm(fromRttm)
read_rttm(fromRttm)
fromRttm |
A directory/folder containing RTTM files. |
The Rich Transcription Time Marked (RTTM) files are space-delimited text files containing one turn per line defined by NIST - National Institute of Standards and Technology. Each line containing ten fields:
type
Type: segment type; should always by SPEAKER.
file
File ID: file name; basename of the recording minus extension (e.g., rec1_a).
chnl
Channel ID: channel (1-indexed) that turn is on; should always be 1.
tbeg
Turn Onset – onset of turn in seconds from beginning of recording.
tdur
Turn Duration – duration of turn in seconds.
ortho
Orthography Field – should always by <NA>.
stype
Speaker Type – should always be <NA>.
name
Speaker Name – name of speaker of turn; should be unique within scope of each file.
conf
Confidence Score – system confidence (probability) that information is correct; should always be <NA>.
slat
Signal Lookahead Time – should always be <NA>.
A list containing data frames obtained from standard RTTM files. See 'Details'.
https://www.nist.gov/system/files/documents/itl/iad/mig/KWS15-evalplan-v05.pdf
voice::enrich_rttm
library(voice) url0 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' download.file(url0, destfile = paste0(tempdir(), '/sherlock0.rttm')) url1 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock1.rttm' download.file(url0, destfile = paste0(tempdir(), '/sherlock1.rttm')) (rttm <- voice::read_rttm(tempdir())) class(rttm) lapply(rttm, class)
library(voice) url0 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' download.file(url0, destfile = paste0(tempdir(), '/sherlock0.rttm')) url1 <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock1.rttm' download.file(url0, destfile = paste0(tempdir(), '/sherlock1.rttm')) (rttm <- voice::read_rttm(tempdir())) class(rttm) lapply(rttm, class)
Transforms n
sets of m>n
zeros (alternated with sets of non zeros) into n
sets of n
zeros.
rm0(y)
rm0(y)
y |
A vector or time series. |
Vector with n zeros.
library(voice) (v0 <- c(1:20,rep(0,10))) (r0 <- rm0(v0)) length(v0) length(r0) sum(v0 == 0) (v1 <- c(rep(0,10),1:20)) (r1 <- rm0(v1)) length(r1) (v2 <- rep(0,10)) (r2 <- rm0(v2)) length(r2) (v3 <- c(0:10)) (r3 <- rm0(v3)) length(r3) (v4 <- c(rep(0,10), 1:10, rep(0,5), 10:20, rep(0,10))) (r4 <- rm0(v4)) length(r4) sum(v4 == 0)
library(voice) (v0 <- c(1:20,rep(0,10))) (r0 <- rm0(v0)) length(v0) length(r0) sum(v0 == 0) (v1 <- c(rep(0,10),1:20)) (r1 <- rm0(v1)) length(r1) (v2 <- rep(0,10)) (r2 <- rm0(v2)) length(r2) (v3 <- c(0:10)) (r3 <- rm0(v3)) length(r3) (v4 <- c(rep(0,10), 1:10, rep(0,5), 10:20, rep(0,10))) (r4 <- rm0(v4)) length(r4) sum(v4 == 0)
Smooth numeric variables in a data frame
smooth_df(x, k = 11, id = colnames(x)[1], colnum = NULL, mc.cores = 1)
smooth_df(x, k = 11, id = colnames(x)[1], colnum = NULL, mc.cores = 1)
x |
A data frame. |
k |
Integer width of the rolling window. Default: |
id |
The identification column. Default: |
colnum |
A |
mc.cores |
The number of cores to mclapply. By default uses |
Vector of interpolated values with length near to compact.to*length(x)
.
extract_features
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # minimal usage M <- extract_features(path2wav, features = c('f0', 'fmt')) (Ms <- smooth_df(M[-(1:2)])) dim(M) dim(Ms)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # minimal usage M <- extract_features(path2wav, features = c('f0', 'fmt')) (Ms <- smooth_df(M[-(1:2)])) dim(M) dim(Ms)
Split WAV files either in fromWav
directory or using (same names) RTTM files/subdirectories as guidance.
splitw( fromWav, fromRttm = NULL, toSplit = NULL, autoDir = FALSE, subDir = FALSE, output = "wave", filesRange = NULL, full.names = TRUE, recursive = FALSE, silence.gap = 0.5 )
splitw( fromWav, fromRttm = NULL, toSplit = NULL, autoDir = FALSE, subDir = FALSE, output = "wave", filesRange = NULL, full.names = TRUE, recursive = FALSE, silence.gap = 0.5 )
fromWav |
Either WAV file or directory containing WAV files. |
fromRttm |
Either RTTM file or directory containing RTTM files. Default: |
toSplit |
A directory to write generated files. Default: |
autoDir |
Logical. Must the directories tree be created? Default: |
subDir |
Logical. Must the splitted files be placed in subdirectories? Default: |
output |
Character string, the class of the object to return, either 'wave' or 'list'. |
filesRange |
The desired range of directory files (default: |
full.names |
Logical. If |
recursive |
Logical. Should the listing recursively into directories? (default: |
silence.gap |
The silence gap (in seconds) between adjacent words in a keyword. Rows with |
When autoDir = TRUE
, the following directories are created: '../mp3'
,'../rttm'
, '../split'
and '../musicxml'
. Use getwd()
to find the parent directory '../'
.
Splited audio files according to the correspondent RTTM file(s). See 'voice::diarize
'.
voice::diarize
## Not run: library(voice) urlWav <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/wav/sherlock0.wav' destWav <- paste0(tempdir(), '/sherlock0.wav') download.file(urlWav, destfile = destWav) urlRttm <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' destRttm <- paste0(tempdir(), '/sherlock0.rttm') download.file(urlRttm, destfile = destRttm) splitDir <- paste0(tempdir(), '/split') dir.create(splitDir) splitw(destWav, fromRttm = destRttm, toSplit = splitDir) dir(splitDir) ## End(Not run)
## Not run: library(voice) urlWav <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/wav/sherlock0.wav' destWav <- paste0(tempdir(), '/sherlock0.wav') download.file(urlWav, destfile = destWav) urlRttm <- 'https://raw.githubusercontent.com/filipezabala/voiceAudios/main/rttm/sherlock0.rttm' destRttm <- paste0(tempdir(), '/sherlock0.rttm') download.file(urlRttm, destfile = destRttm) splitDir <- paste0(tempdir(), '/split') dir.create(splitDir) splitw(destWav, fromRttm = destRttm, toSplit = splitDir) dir(splitDir) ## End(Not run)
Tag a data frame with media information
tag( x, groupBy = "wav_path", wavPath = unique(x$wav_path), wavPathName = "wav_path", tags = c("feat_summary"), sortByGroupBy = TRUE, filesRange = NULL, features = "f0", sex = "u", windowShift = 5, numFormants = 8, numcep = 12, dcttype = c("t2", "t1", "t3", "t4"), fbtype = c("mel", "htkmel", "fcmel", "bark"), resolution = 40, usecmp = FALSE, mc.cores = 1, full.names = TRUE, recursive = FALSE, check.mono = FALSE, stereo2mono = FALSE, overwrite = FALSE, freq = 44100, round.to = 4, verbose = FALSE )
tag( x, groupBy = "wav_path", wavPath = unique(x$wav_path), wavPathName = "wav_path", tags = c("feat_summary"), sortByGroupBy = TRUE, filesRange = NULL, features = "f0", sex = "u", windowShift = 5, numFormants = 8, numcep = 12, dcttype = c("t2", "t1", "t3", "t4"), fbtype = c("mel", "htkmel", "fcmel", "bark"), resolution = 40, usecmp = FALSE, mc.cores = 1, full.names = TRUE, recursive = FALSE, check.mono = FALSE, stereo2mono = FALSE, overwrite = FALSE, freq = 44100, round.to = 4, verbose = FALSE )
x |
An Extended data frame to be tagged with media information. See references. |
groupBy |
A variable to group the summary measures. The argument must be a character vector. (Default: |
wavPath |
A vector containing the path(s) to WAV files. May be both as |
wavPathName |
A string containing the WAV path name. (Default: |
tags |
Tags to be added to |
sortByGroupBy |
Logical. Should the function sort the Extended data frame |
filesRange |
The desired range of directory files. Should only be used when all the WAV files are in the same folder. (Default: |
features |
Vector of features to be extracted. (Default: |
sex |
|
windowShift |
|
numFormants |
|
numcep |
Number of Mel-frequency cepstral coefficients (cepstra) to return (Default: |
dcttype |
Type of DCT used. |
fbtype |
Auditory frequency scale to use: |
resolution |
|
usecmp |
Logical. Apply equal-loudness weighting and cube-root compression (PLP instead of LPC) (Default: |
mc.cores |
Number of cores to be used in parallel processing. (Default: |
full.names |
Logical. If |
recursive |
Logical. Should the listing recursively into directories? (Default: |
check.mono |
Logical. Check if the WAV file is mono. (Default: |
stereo2mono |
(Experimental) Logical. Should files be converted from stereo to mono? (Default: |
overwrite |
(Experimental) Logical. Should converted files be overwritten? If not, the file gets the suffix |
freq |
Frequency in Hz to write the converted files when |
round.to |
Number of decimal places to round to. (Default: |
verbose |
Logical. Should the running status be showed? (Default: |
filesRange
should only be used when all the WAV files are in the same folder.
A tibble data frame containing summarized numeric columns using (1) mean, (2) standard deviation, (3) variation coefficient, (4) median, (5) interquartile range and (6) median absolute deviation.
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # creating Extended synthetic data E <- dplyr::tibble(subject_id = c(1,1,1,2,2,2,3,3,3), wav_path = path2wav) E # minimal usage tag(E) # canonical data tag(E, groupBy = 'subject_id') # limiting filesRange tag(E, filesRange = 3:6) # more features Et <- tag(E, features = c('f0', 'fmt', 'rf', 'rcf', 'rpf', 'rfc', 'mfcc'), groupBy = 'subject_id') Et str(Et)
library(voice) # get path to audio file path2wav <- list.files(system.file('extdata', package = 'wrassp'), pattern = glob2rx('*.wav'), full.names = TRUE) # creating Extended synthetic data E <- dplyr::tibble(subject_id = c(1,1,1,2,2,2,3,3,3), wav_path = path2wav) E # minimal usage tag(E) # canonical data tag(E, groupBy = 'subject_id') # limiting filesRange tag(E, filesRange = 3:6) # more features Et <- tag(E, features = c('f0', 'fmt', 'rf', 'rcf', 'rpf', 'rfc', 'mfcc'), groupBy = 'subject_id') Et str(Et)
Writes a list to a path
write_list(x, path)
write_list(x, path)
x |
A list. |
path |
A full path to file. |
A file named 'list.txt' in 'path'.
## Not run: library(voice) pts <- list(x = cars[,1], y = cars[,2]) listFile <- paste0(tempdir(), '/list.txt') voice::write_list(pts, listFile) file.info(listFile) system(paste0('head ', listFile)) ## End(Not run)
## Not run: library(voice) pts <- list(x = cars[,1], y = cars[,2]) listFile <- paste0(tempdir(), '/list.txt') voice::write_list(pts, listFile) file.info(listFile) system(paste0('head ', listFile)) ## End(Not run)