# read sound file and break it up into individual words

library(sound)

mysplit = function(d,nb.el.per.seg) {
   return(split(d, ceiling(seq_along(d)/nb.el.per.seg)))
}


file="psychology.wav"
snd=loadSample(file)
wave = snd$sound[1,]
wave = abs(wave)
print(str(wave))

thresh = 0.001
# if vol > thresh ,then "psychology"
# if vol < thresh, then silence
vol = wave > thresh

# split the wave into pieces of 1000 samples each
# 1000 samples is about 0.05 seconds (1/20 sec). 
splits = mysplit(wave,1000)

# calculate the mean of every sample
means = lapply(splits, mean)

#-------------------------------
above.silence  = means > thresh
print(above.silence)
print(length(above.silence[above.silence == T]))

# We have sound segments. Each with T/F. We wish to combine successive T segments

# approach: create series of small functions. 
# function 1: calculate index of next T : it
# function 2: calculate index of next F : if
# function 3: combine splits[it] through splits[if] into a single sound segment. 
# Repeat functions 1 through 3 until  all the "splits" are recombined. 

# NOTE: I am not concerned with efficiency (how fast this code executes, for now)

# splits: a vector of T/F
next.T = function (splits, start) {
	for (i in (start:length(splits))) {
		if (splits[i] == T) { 
			return(i)
		}
	}
	return(NA)    # no more trues
}

# test code
#ssplits = c(T,F,F,T,F,T,T)
#ssplits = c(T,F,F)
#cat("next True", next.T(ssplits, 2),"\n")

# once the function is tested, create a similar function for next.F (next false)
next.F = function (splits, start) {
	for (i in (start:length(splits))) {
		if (splits[i] == F) { 
			return(i)
		}
	}
	return(NA)    # no more falses
}

signal.start = next.T(above.silence, 1)
signal.end = next.F(above.silence, signal.start)

# combine segments splits[signal.start] to splits[signal.end-1] into a single sound wave
print(signal.start)
print(signal.end)

# Google search:  "combine list of vectors into a vector" "r"
# first hit: http://stackoverflow.com/questions/15515390/r-combine-a-list-of-vectors-into-a-single-vector
# unlist(list) returns a vector
sig = unlist(splits[signal.start:signal.end])

# length should equal 100*(signal.end-signal.start)
print(length(sig))

#worked. So create a new function: 

create.word = function(split.signal, start, end) {
	return(unlist(split.signal[start:end]))
}

sig = create.word(splits, signal.start, signal.end)
print(length(sig))  # returns 32000

# seems to work. So create a new function called next.word()

next.word = function(splits, above.silence, start) {
	#cat("enter next.word, start= ", start,"\n")
	signal.start = next.T(above.silence, start)
	if (is.na(signal.start)) {
		return(list(c(0), NA))
	}
	signal.end = next.F(above.silence, signal.start)
	if (is.na(signal.end)) {
		return(list(c(0), NA))
	}
	#print(signal.start)
	#print(signal.end)
	word = create.word(splits, signal.start, signal.end-1)
	cat("signal.end= ", signal.end, "\n")
	return(list(word, signal.end))  # signal.end is next false
}

print("****")
ret = next.word(splits, above.silence, start=1)
print("after next.word")
word = ret[[1]]
end = ret[[2]]
cat("end= ", end, "\n")  # ret[2] is a list, cat does not work with lists. use ret[[2]]
print("---")
ret = next.word(splits, above.silence, start=end)
print("****")
word = ret[[1]]
end = ret[[2]]
cat("end= ", end, "\n")
print(length(sig))

# The above did sufficient testing, and testing produced correct answers. 
# I checked above.threshold manually based on "end" and "start" points of the various words. 

# Now remove all sounds. 

how.many.words = 0
sig.start = 1
words = list() # store words in list

for (i in 1:100) {
   ret = next.word(splits, above.silence, start=sig.start)
   word = ret[[1]]
   end = ret[[2]]
   #print(word[1:10])  # workds
   sig.start = end
   print(is.na(end))
   if (is.na(end)) {
   	 break  # exit loop
   }
   how.many.words = how.many.words + 1
   words[[how.many.words]] = word
}

cat("nb of words: ", how.many.words, "\n")
cat("nb of words: ", length(words), "\n")

#Compute volume averages. 

for (i in 1:length(words)) {
	#print(words[[1]][1:10]) # works
	#print (str(words))
	print(mean(words[[i]]))
}

#----------------------------------------------------------