Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
######################################################################
# Code from Francomano et al. (IBIS 2023) #
######################################################################
# Data Coverage Metadata Script #
######################################################################
# Created by: Dante Francomano (dfrancomano@alumni.purdue.edu)
# Date: 20191125
######################################################################
######################################################################
# Load packages
.libPaths("E:/Dante/R_3_5_3_Library")
library(reshape2)
library(plyr)
library(data.table)
# List files for all sites
siteList<-c("Isla_Martillo_019","Isla_Martillo_Baliza","Isla_Martillo_P53","Isla_Martillo_Rata_Muerta","Isla_Martillo_Zona_Erosionada","Isla_Martillo_Zona_Nueva","Isla_Martillo_Zona_Turistica",
"Isla_de_los_Estados_PM_01","Isla_de_los_Estados_PM_02","Isla_de_los_Estados_PPA_02","Isla_de_los_Estados_PPA_03","Isla_de_los_Estados_PPA_05","Isla_de_los_Estados_PPA_06","Isla_de_los_Estados_PPA_07")
IM019FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/019",full.names=TRUE,recursive=TRUE)
IMBalizaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Baliza",full.names=TRUE,recursive=TRUE)
IMP53FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/P53",full.names=TRUE,recursive=TRUE)
IMRataMuertaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Rata_Muerta",full.names=TRUE,recursive=TRUE)
IMZonaErosionadaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Zona_Erosionada",full.names=TRUE,recursive=TRUE)
IMZonaNuevaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Zona_Nueva",full.names=TRUE,recursive=TRUE)
IMZonaTuristicaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Zona_Turistica",full.names=TRUE,recursive=TRUE)
IDLEPM01FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PM_01",full.names=TRUE,recursive=TRUE)
IDLEPM02FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PM_02",full.names=TRUE,recursive=TRUE)
IDLEPPA02FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_02",full.names=TRUE,recursive=TRUE)
IDLEPPA03FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_03",full.names=TRUE,recursive=TRUE)
IDLEPPA05FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_05",full.names=TRUE,recursive=TRUE)
IDLEPPA06FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_06",full.names=TRUE,recursive=TRUE)
IDLEPPA07FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_07",full.names=TRUE,recursive=TRUE)
listAllFilePaths<-list(IM019FilePaths,IMBalizaFilePaths,IMP53FilePaths,IMRataMuertaFilePaths,IMZonaErosionadaFilePaths,IMZonaNuevaFilePaths,IMZonaTuristicaFilePaths,
IDLEPM01FilePaths,IDLEPM02FilePaths,IDLEPPA02FilePaths,IDLEPPA03FilePaths,IDLEPPA05FilePaths,IDLEPPA06FilePaths,IDLEPPA07FilePaths)
# Create list to receive inter-site comparison data
allSiteTimeList<-as.list(rep(NA,14))
names(allSiteTimeList)<-siteList
# Run loop to output recording times at each site and create per-site figures
for(i in 1:length(listAllFilePaths))
{
# Convert file paths to recording times
fileList<-listAllFilePaths[[i]]
# Extract YYYYMMDD_HHMMSS and find total difference between start and end
if(i<8) rawDateTime<-sapply(fileList,function(x) substr(strsplit(x,"/S4A")[[1]][[3]],7,21))
if(i>7) rawDateTime<-sapply(fileList,function(x) substr(strsplit(x,"/S4A")[[1]][[2]],7,21))
dateTime<-strptime(rawDateTime,"%Y%m%d_%H%M%S",tz="UTC")
totalLength<-difftime(dateTime[length(dateTime)],dateTime[1],units="mins")
# Generate a vector of all possible recording times
possibleTimes<-NULL
for(h in 0:(unclass(totalLength)[1]/5))
{
possibleTimes[h+1]<-format(dateTime[1]+(h*300),"%Y%m%d_%H%M%S")
}
# Check for correspondance between possible and actual
dataTest<-apply(as.matrix(possibleTimes),1,function(x) isTRUE(grep(x,rawDateTime,fixed=TRUE)>0))
temporalTable<-as.data.frame(cbind(substr(possibleTimes,1,8),substr(possibleTimes,10,15),dataTest))
colnames(temporalTable)<-c("Date","Time","Test")
allSiteTimeList[[i]]<-temporalTable
temporalTableCast<-dcast(temporalTable,Date~Time,value.var="Test")
temporalTableCastBinary<-as.matrix(ifelse(is.na(temporalTableCast[,2:289])|temporalTableCast[,2:289]!=TRUE,0,1))
rownames(temporalTableCastBinary)<-temporalTableCast[,1]
# Determine percent missing data between start and stop times
nTrue<-dim(temporalTable[which((temporalTable[,3])==TRUE),])[1]
nFalse<-dim(temporalTable[which((temporalTable[,3])==FALSE),])[1]
if(length(nFalse)==0) nFalse<-0
proportionMissing<-round(nFalse/(nTrue+nFalse),2)
if(proportionMissing<0.01) proportionMissing<-"0.00"
# Write and plot results
fullTemporalTable<-melt(setDT(as.data.table(temporalTableCastBinary,keep.rownames=TRUE)),id.vars="rn",variable.factor=TRUE)
fullTemporalTable<-as.data.frame(apply(fullTemporalTable,2,as.factor))
colnames(fullTemporalTable)<-c("Date","Time","Test")
write.csv(arrange(fullTemporalTable,Date,Time,Test),paste("E:/Dante/Penguins/Data_Coverage_Results/",siteList[i],".csv",sep=""),quote=TRUE,row.names=FALSE)
pdf(paste("E:/Dante/Penguins/Data_Coverage_Results/",siteList[i],".pdf",sep=""),width=7.25,height=(length(unique(temporalTable$Date))/4)+1,pointsize=6)
par(mai=c(0.5,0.75,0.5,0.5),xpd=FALSE)
image(seq(0:287),seq(1:length(unique(temporalTable$Date))),t(apply(temporalTableCastBinary,2,rev)),xlab="",ylab="",xaxt="n",yaxt="n",col=c("white","grey80"))
abline(v=seq(12.5,276.5,12),col="grey60")
abline(h=seq(0.5,length(unique(temporalTable$Date))-0.5,1),col="grey60")
title(main=gsub("_"," ",siteList[i]),adj=0,cex.main=2,col.main="grey20")
title(xlab="hour of day",cex.lab=1.5,col.lab="grey30")
mtext("date",side=2,line=5.5,cex=1.5,col="grey30")
axis(1,at=seq(0.5,287,12),labels=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23),cex.axis=1,col.axis="grey30",col.ticks="grey30")
axis(2,at=seq(0:(length(unique(temporalTable$Date))-1)),labels=rev(rownames(temporalTableCastBinary)),las=2.5,cex.axis=1,col.axis="grey30",col.ticks="grey30")
legend(x=150.6,y=length(unique(temporalTable$Date))+2.2,legend=c("recorded","missing",paste("number of missing files =",nFalse),paste("proportion of missing files =",proportionMissing)),
fill=c("grey80","white",NA,NA),border=c("grey30","grey30",NA,NA),ncol=2,text.col="grey30",box.col="grey30",xpd=TRUE)
box(col="grey30")
dev.off()
}
# Generate start and end times over multiple sites
IMOverallStartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:7)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IMOverallEndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:7)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IMPart1StartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(5:7)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IMPart1EndTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:4)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IMPart2StartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:4)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IMPart2EndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:7)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IDLEOverallStartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(8:14)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IDLEOverallEndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(8:14)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IDLEPMStartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(8:9)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IDLEPMEndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(8:9)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IDLEPPAStartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(10:14)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
IDLEPPAEndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(10:14)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC"))
# Set up data to create inter-site figures
interSiteCategories<-c("Isla_Martillo_Overall","Isla_Martillo_Part_1","Isla_Martillo_Part_2","Isla_de_los_Estados_Overall","Isla_de_los_Estados_Magellanic","Isla_de_los_Estados_Rockhopper")
IMOverallList<-list(IMOverallStartTime,IMOverallEndTime,allSiteTimeList[c(1:7)])
IMPart1List<-list(IMPart1StartTime,IMPart1EndTime,allSiteTimeList[c(5:7)])
IMPart2List<-list(IMPart2StartTime,IMPart2EndTime,allSiteTimeList[c(1:7)])
IDLEOverallList<-list(IDLEOverallStartTime,IDLEOverallEndTime,allSiteTimeList[c(8:14)])
IDLEPMList<-list(IDLEPMStartTime,IDLEPMEndTime,allSiteTimeList[c(8:9)])
IDLEPPAList<-list(IDLEPPAStartTime,IDLEPPAEndTime,allSiteTimeList[c(10:14)])
interSiteList<-list(IMOverallList,IMPart1List,IMPart2List,IDLEOverallList,IDLEPMList,IDLEPPAList)
names(interSiteList)<-interSiteCategories
# Run loop to create inter-site figures
for(i in 1:length(interSiteList))
{
# Find total difference between start and end
totalLength<-difftime(interSiteList[[i]][[2]],interSiteList[[i]][[1]],units="mins")
# Generate a vector of all possible recording times
possibleTimes<-NULL
for(h in 0:(unclass(totalLength)[1]/5))
{
possibleTimes[h+1]<-format(interSiteList[[i]][[1]]+(h*300),"%Y%m%d_%H%M%S")
}
# Convert all site date/times to a searchable character class
timesPerSite<-sapply(interSiteList[[i]][[3]],function(x) cbind(paste(x[,1],x[,2],sep="_"),as.logical(x[,3])))
trueTimesPerSite<-sapply(timesPerSite,function(x) x[which(x[,2]==TRUE),])
# Check for correspondance between possible and actual
perSiteDataTest<-ifelse(sapply(trueTimesPerSite,function(x) apply(as.matrix(possibleTimes),1,function(y) isTRUE(grep(y,x,fixed=TRUE)>0)))!=TRUE,0,1)
overallDataTest<-ifelse(rowSums(perSiteDataTest)==length(interSiteList[[i]][[3]]),TRUE,FALSE)
temporalTable<-as.data.frame(cbind(substr(possibleTimes,1,8),substr(possibleTimes,10,15),overallDataTest))
colnames(temporalTable)<-c("Date","Time","Test")
temporalTableCast<-dcast(temporalTable,Date~Time,value.var="Test")
temporalTableCastBinary<-as.matrix(ifelse(is.na(temporalTableCast[,2:289])|temporalTableCast[,2:289]!=TRUE,0,1))
rownames(temporalTableCastBinary)<-temporalTableCast[,1]
# Determine percent missing data between start and stop times
nTrue<-dim(temporalTable[which((temporalTable[,3])==TRUE),])[1]
nFalse<-dim(temporalTable[which((temporalTable[,3])==FALSE),])[1]
if(length(nFalse)==0) nFalse<-0
proportionMissing<-round(nFalse/(nTrue+nFalse),2)
if(proportionMissing<0.01) proportionMissing<-"0.00"
# Write and plot results
fullTemporalTable<-melt(setDT(as.data.table(temporalTableCastBinary,keep.rownames=TRUE)),id.vars="rn",variable.factor=TRUE)
fullTemporalTable<-as.data.frame(apply(fullTemporalTable,2,as.factor))
colnames(fullTemporalTable)<-c("Date","Time","Test")
write.csv(arrange(fullTemporalTable,Date,Time,Test),paste("E:/Dante/Penguins/Data_Coverage_Results/",interSiteCategories[i],".csv",sep=""),quote=TRUE,row.names=FALSE)
pdf(paste("E:/Dante/Penguins/Data_Coverage_Results/",interSiteCategories[i],".pdf",sep=""),width=7.25,height=(length(unique(temporalTable$Date))/4)+1,pointsize=6)
par(mai=c(0.5,0.75,0.5,0.5),xpd=FALSE)
image(seq(0:287),seq(1:length(unique(temporalTable$Date))),t(apply(temporalTableCastBinary,2,rev)),xlab="",ylab="",xaxt="n",yaxt="n",col=c("white","grey80"))
abline(v=seq(12.5,276.5,12),col="grey60")
abline(h=seq(0.5,length(unique(temporalTable$Date))-0.5,1),col="grey60")
title(main=gsub("_"," ",interSiteCategories[i]),adj=0,cex.main=2,col.main="grey20")
title(xlab="hour of day",cex.lab=1.5,col.lab="grey30")
mtext("date",side=2,line=5.5,cex=1.5,col="grey30")
axis(1,at=seq(0.5,287,12),labels=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23),cex.axis=1,col.axis="grey30",col.ticks="grey30")
axis(2,at=seq(0:(length(unique(temporalTable$Date))-1)),labels=rev(rownames(temporalTableCastBinary)),las=2.5,cex.axis=1,col.axis="grey30",col.ticks="grey30")
legend(x=150.6,y=length(unique(temporalTable$Date))+2.2,legend=c("recorded","missing",paste("number of missing files =",nFalse),paste("proportion of missing files =",proportionMissing)),
fill=c("grey80","white",NA,NA),border=c("grey30","grey30",NA,NA),ncol=2,text.col="grey30",box.col="grey30",xpd=TRUE)
box(col="grey30")
dev.off()
}