# read sound file and break it up into individual words library(sound) mysplit = function(d,nb.el.per.seg) { return(split(d, ceiling(seq_along(d)/nb.el.per.seg))) } file="psychology.wav" snd=loadSample(file) wave = snd$sound[1,] wave = abs(wave) print(str(wave)) thresh = 0.001 # if vol > thresh ,then "psychology" # if vol < thresh, then silence vol = wave > thresh # split the wave into pieces of 1000 samples each # 1000 samples is about 0.05 seconds (1/20 sec). splits = mysplit(wave,1000) # calculate the mean of every sample means = lapply(splits, mean) #------------------------------- above.silence = means > thresh print(above.silence) print(length(above.silence[above.silence == T])) # We have sound segments. Each with T/F. We wish to combine successive T segments # approach: create series of small functions. # function 1: calculate index of next T : it # function 2: calculate index of next F : if # function 3: combine splits[it] through splits[if] into a single sound segment. # Repeat functions 1 through 3 until all the "splits" are recombined. # NOTE: I am not concerned with efficiency (how fast this code executes, for now) # splits: a vector of T/F next.T = function (splits, start) { for (i in (start:length(splits))) { if (splits[i] == T) { return(i) } } return(NA) # no more trues } # test code #ssplits = c(T,F,F,T,F,T,T) #ssplits = c(T,F,F) #cat("next True", next.T(ssplits, 2),"\n") # once the function is tested, create a similar function for next.F (next false) next.F = function (splits, start) { for (i in (start:length(splits))) { if (splits[i] == F) { return(i) } } return(NA) # no more falses } signal.start = next.T(above.silence, 1) signal.end = next.F(above.silence, signal.start) # combine segments splits[signal.start] to splits[signal.end-1] into a single sound wave print(signal.start) print(signal.end) # Google search: "combine list of vectors into a vector" "r" # first hit: http://stackoverflow.com/questions/15515390/r-combine-a-list-of-vectors-into-a-single-vector # unlist(list) returns a vector sig = unlist(splits[signal.start:signal.end]) # length should equal 100*(signal.end-signal.start) print(length(sig)) #worked. So create a new function: create.word = function(split.signal, start, end) { return(unlist(split.signal[start:end])) } sig = create.word(splits, signal.start, signal.end) print(length(sig)) # returns 32000 # seems to work. So create a new function called next.word() next.word = function(splits, above.silence, start) { #cat("enter next.word, start= ", start,"\n") signal.start = next.T(above.silence, start) if (is.na(signal.start)) { return(list(c(0), NA)) } signal.end = next.F(above.silence, signal.start) if (is.na(signal.end)) { return(list(c(0), NA)) } #print(signal.start) #print(signal.end) word = create.word(splits, signal.start, signal.end-1) cat("signal.end= ", signal.end, "\n") return(list(word, signal.end)) # signal.end is next false } print("****") ret = next.word(splits, above.silence, start=1) print("after next.word") word = ret[[1]] end = ret[[2]] cat("end= ", end, "\n") # ret[2] is a list, cat does not work with lists. use ret[[2]] print("---") ret = next.word(splits, above.silence, start=end) print("****") word = ret[[1]] end = ret[[2]] cat("end= ", end, "\n") print(length(sig)) # The above did sufficient testing, and testing produced correct answers. # I checked above.threshold manually based on "end" and "start" points of the various words. # Now remove all sounds. how.many.words = 0 sig.start = 1 words = list() # store words in list for (i in 1:100) { ret = next.word(splits, above.silence, start=sig.start) word = ret[[1]] end = ret[[2]] #print(word[1:10]) # workds sig.start = end print(is.na(end)) if (is.na(end)) { break # exit loop } how.many.words = how.many.words + 1 words[[how.many.words]] = word } cat("nb of words: ", how.many.words, "\n") cat("nb of words: ", length(words), "\n") #Compute volume averages. for (i in 1:length(words)) { #print(words[[1]][1:10]) # works #print (str(words)) print(mean(words[[i]])) } #----------------------------------------------------------