Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Francomano_et_al_2023_PAM_for_Penguins/Data_Coverage_Metadata_Script.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
181 lines (157 sloc)
13.5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###################################################################### | |
# Code from Francomano et al. (IBIS 2023) # | |
###################################################################### | |
# Data Coverage Metadata Script # | |
###################################################################### | |
# Created by: Dante Francomano (dfrancomano@alumni.purdue.edu) | |
# Date: 20191125 | |
###################################################################### | |
###################################################################### | |
# Load packages | |
.libPaths("E:/Dante/R_3_5_3_Library") | |
library(reshape2) | |
library(plyr) | |
library(data.table) | |
# List files for all sites | |
siteList<-c("Isla_Martillo_019","Isla_Martillo_Baliza","Isla_Martillo_P53","Isla_Martillo_Rata_Muerta","Isla_Martillo_Zona_Erosionada","Isla_Martillo_Zona_Nueva","Isla_Martillo_Zona_Turistica", | |
"Isla_de_los_Estados_PM_01","Isla_de_los_Estados_PM_02","Isla_de_los_Estados_PPA_02","Isla_de_los_Estados_PPA_03","Isla_de_los_Estados_PPA_05","Isla_de_los_Estados_PPA_06","Isla_de_los_Estados_PPA_07") | |
IM019FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/019",full.names=TRUE,recursive=TRUE) | |
IMBalizaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Baliza",full.names=TRUE,recursive=TRUE) | |
IMP53FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/P53",full.names=TRUE,recursive=TRUE) | |
IMRataMuertaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Rata_Muerta",full.names=TRUE,recursive=TRUE) | |
IMZonaErosionadaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Zona_Erosionada",full.names=TRUE,recursive=TRUE) | |
IMZonaNuevaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Zona_Nueva",full.names=TRUE,recursive=TRUE) | |
IMZonaTuristicaFilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_Martillo/Zona_Turistica",full.names=TRUE,recursive=TRUE) | |
IDLEPM01FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PM_01",full.names=TRUE,recursive=TRUE) | |
IDLEPM02FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PM_02",full.names=TRUE,recursive=TRUE) | |
IDLEPPA02FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_02",full.names=TRUE,recursive=TRUE) | |
IDLEPPA03FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_03",full.names=TRUE,recursive=TRUE) | |
IDLEPPA05FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_05",full.names=TRUE,recursive=TRUE) | |
IDLEPPA06FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_06",full.names=TRUE,recursive=TRUE) | |
IDLEPPA07FilePaths<-list.files("J:/Dante/Pinguinos/Datos_Acusticos/Isla_de_los_Estados/Trimmed_Data/PPA_07",full.names=TRUE,recursive=TRUE) | |
listAllFilePaths<-list(IM019FilePaths,IMBalizaFilePaths,IMP53FilePaths,IMRataMuertaFilePaths,IMZonaErosionadaFilePaths,IMZonaNuevaFilePaths,IMZonaTuristicaFilePaths, | |
IDLEPM01FilePaths,IDLEPM02FilePaths,IDLEPPA02FilePaths,IDLEPPA03FilePaths,IDLEPPA05FilePaths,IDLEPPA06FilePaths,IDLEPPA07FilePaths) | |
# Create list to receive inter-site comparison data | |
allSiteTimeList<-as.list(rep(NA,14)) | |
names(allSiteTimeList)<-siteList | |
# Run loop to output recording times at each site and create per-site figures | |
for(i in 1:length(listAllFilePaths)) | |
{ | |
# Convert file paths to recording times | |
fileList<-listAllFilePaths[[i]] | |
# Extract YYYYMMDD_HHMMSS and find total difference between start and end | |
if(i<8) rawDateTime<-sapply(fileList,function(x) substr(strsplit(x,"/S4A")[[1]][[3]],7,21)) | |
if(i>7) rawDateTime<-sapply(fileList,function(x) substr(strsplit(x,"/S4A")[[1]][[2]],7,21)) | |
dateTime<-strptime(rawDateTime,"%Y%m%d_%H%M%S",tz="UTC") | |
totalLength<-difftime(dateTime[length(dateTime)],dateTime[1],units="mins") | |
# Generate a vector of all possible recording times | |
possibleTimes<-NULL | |
for(h in 0:(unclass(totalLength)[1]/5)) | |
{ | |
possibleTimes[h+1]<-format(dateTime[1]+(h*300),"%Y%m%d_%H%M%S") | |
} | |
# Check for correspondance between possible and actual | |
dataTest<-apply(as.matrix(possibleTimes),1,function(x) isTRUE(grep(x,rawDateTime,fixed=TRUE)>0)) | |
temporalTable<-as.data.frame(cbind(substr(possibleTimes,1,8),substr(possibleTimes,10,15),dataTest)) | |
colnames(temporalTable)<-c("Date","Time","Test") | |
allSiteTimeList[[i]]<-temporalTable | |
temporalTableCast<-dcast(temporalTable,Date~Time,value.var="Test") | |
temporalTableCastBinary<-as.matrix(ifelse(is.na(temporalTableCast[,2:289])|temporalTableCast[,2:289]!=TRUE,0,1)) | |
rownames(temporalTableCastBinary)<-temporalTableCast[,1] | |
# Determine percent missing data between start and stop times | |
nTrue<-dim(temporalTable[which((temporalTable[,3])==TRUE),])[1] | |
nFalse<-dim(temporalTable[which((temporalTable[,3])==FALSE),])[1] | |
if(length(nFalse)==0) nFalse<-0 | |
proportionMissing<-round(nFalse/(nTrue+nFalse),2) | |
if(proportionMissing<0.01) proportionMissing<-"0.00" | |
# Write and plot results | |
fullTemporalTable<-melt(setDT(as.data.table(temporalTableCastBinary,keep.rownames=TRUE)),id.vars="rn",variable.factor=TRUE) | |
fullTemporalTable<-as.data.frame(apply(fullTemporalTable,2,as.factor)) | |
colnames(fullTemporalTable)<-c("Date","Time","Test") | |
write.csv(arrange(fullTemporalTable,Date,Time,Test),paste("E:/Dante/Penguins/Data_Coverage_Results/",siteList[i],".csv",sep=""),quote=TRUE,row.names=FALSE) | |
pdf(paste("E:/Dante/Penguins/Data_Coverage_Results/",siteList[i],".pdf",sep=""),width=7.25,height=(length(unique(temporalTable$Date))/4)+1,pointsize=6) | |
par(mai=c(0.5,0.75,0.5,0.5),xpd=FALSE) | |
image(seq(0:287),seq(1:length(unique(temporalTable$Date))),t(apply(temporalTableCastBinary,2,rev)),xlab="",ylab="",xaxt="n",yaxt="n",col=c("white","grey80")) | |
abline(v=seq(12.5,276.5,12),col="grey60") | |
abline(h=seq(0.5,length(unique(temporalTable$Date))-0.5,1),col="grey60") | |
title(main=gsub("_"," ",siteList[i]),adj=0,cex.main=2,col.main="grey20") | |
title(xlab="hour of day",cex.lab=1.5,col.lab="grey30") | |
mtext("date",side=2,line=5.5,cex=1.5,col="grey30") | |
axis(1,at=seq(0.5,287,12),labels=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23),cex.axis=1,col.axis="grey30",col.ticks="grey30") | |
axis(2,at=seq(0:(length(unique(temporalTable$Date))-1)),labels=rev(rownames(temporalTableCastBinary)),las=2.5,cex.axis=1,col.axis="grey30",col.ticks="grey30") | |
legend(x=150.6,y=length(unique(temporalTable$Date))+2.2,legend=c("recorded","missing",paste("number of missing files =",nFalse),paste("proportion of missing files =",proportionMissing)), | |
fill=c("grey80","white",NA,NA),border=c("grey30","grey30",NA,NA),ncol=2,text.col="grey30",box.col="grey30",xpd=TRUE) | |
box(col="grey30") | |
dev.off() | |
} | |
# Generate start and end times over multiple sites | |
IMOverallStartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:7)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IMOverallEndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:7)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IMPart1StartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(5:7)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IMPart1EndTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:4)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IMPart2StartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:4)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IMPart2EndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(1:7)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IDLEOverallStartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(8:14)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IDLEOverallEndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(8:14)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IDLEPMStartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(8:9)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IDLEPMEndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(8:9)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IDLEPPAStartTime<-min(as.POSIXct(unlist(lapply(allSiteTimeList[c(10:14)],function(x) min(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
IDLEPPAEndTime<-max(as.POSIXct(unlist(lapply(allSiteTimeList[c(10:14)],function(x) max(as.POSIXct(strptime(paste(x[,1],x[,2],sep="_"),"%Y%m%d_%H%M%S",tz="UTC"))))),origin="1970-01-01 00:00.00 UTC",tz="UTC")) | |
# Set up data to create inter-site figures | |
interSiteCategories<-c("Isla_Martillo_Overall","Isla_Martillo_Part_1","Isla_Martillo_Part_2","Isla_de_los_Estados_Overall","Isla_de_los_Estados_Magellanic","Isla_de_los_Estados_Rockhopper") | |
IMOverallList<-list(IMOverallStartTime,IMOverallEndTime,allSiteTimeList[c(1:7)]) | |
IMPart1List<-list(IMPart1StartTime,IMPart1EndTime,allSiteTimeList[c(5:7)]) | |
IMPart2List<-list(IMPart2StartTime,IMPart2EndTime,allSiteTimeList[c(1:7)]) | |
IDLEOverallList<-list(IDLEOverallStartTime,IDLEOverallEndTime,allSiteTimeList[c(8:14)]) | |
IDLEPMList<-list(IDLEPMStartTime,IDLEPMEndTime,allSiteTimeList[c(8:9)]) | |
IDLEPPAList<-list(IDLEPPAStartTime,IDLEPPAEndTime,allSiteTimeList[c(10:14)]) | |
interSiteList<-list(IMOverallList,IMPart1List,IMPart2List,IDLEOverallList,IDLEPMList,IDLEPPAList) | |
names(interSiteList)<-interSiteCategories | |
# Run loop to create inter-site figures | |
for(i in 1:length(interSiteList)) | |
{ | |
# Find total difference between start and end | |
totalLength<-difftime(interSiteList[[i]][[2]],interSiteList[[i]][[1]],units="mins") | |
# Generate a vector of all possible recording times | |
possibleTimes<-NULL | |
for(h in 0:(unclass(totalLength)[1]/5)) | |
{ | |
possibleTimes[h+1]<-format(interSiteList[[i]][[1]]+(h*300),"%Y%m%d_%H%M%S") | |
} | |
# Convert all site date/times to a searchable character class | |
timesPerSite<-sapply(interSiteList[[i]][[3]],function(x) cbind(paste(x[,1],x[,2],sep="_"),as.logical(x[,3]))) | |
trueTimesPerSite<-sapply(timesPerSite,function(x) x[which(x[,2]==TRUE),]) | |
# Check for correspondance between possible and actual | |
perSiteDataTest<-ifelse(sapply(trueTimesPerSite,function(x) apply(as.matrix(possibleTimes),1,function(y) isTRUE(grep(y,x,fixed=TRUE)>0)))!=TRUE,0,1) | |
overallDataTest<-ifelse(rowSums(perSiteDataTest)==length(interSiteList[[i]][[3]]),TRUE,FALSE) | |
temporalTable<-as.data.frame(cbind(substr(possibleTimes,1,8),substr(possibleTimes,10,15),overallDataTest)) | |
colnames(temporalTable)<-c("Date","Time","Test") | |
temporalTableCast<-dcast(temporalTable,Date~Time,value.var="Test") | |
temporalTableCastBinary<-as.matrix(ifelse(is.na(temporalTableCast[,2:289])|temporalTableCast[,2:289]!=TRUE,0,1)) | |
rownames(temporalTableCastBinary)<-temporalTableCast[,1] | |
# Determine percent missing data between start and stop times | |
nTrue<-dim(temporalTable[which((temporalTable[,3])==TRUE),])[1] | |
nFalse<-dim(temporalTable[which((temporalTable[,3])==FALSE),])[1] | |
if(length(nFalse)==0) nFalse<-0 | |
proportionMissing<-round(nFalse/(nTrue+nFalse),2) | |
if(proportionMissing<0.01) proportionMissing<-"0.00" | |
# Write and plot results | |
fullTemporalTable<-melt(setDT(as.data.table(temporalTableCastBinary,keep.rownames=TRUE)),id.vars="rn",variable.factor=TRUE) | |
fullTemporalTable<-as.data.frame(apply(fullTemporalTable,2,as.factor)) | |
colnames(fullTemporalTable)<-c("Date","Time","Test") | |
write.csv(arrange(fullTemporalTable,Date,Time,Test),paste("E:/Dante/Penguins/Data_Coverage_Results/",interSiteCategories[i],".csv",sep=""),quote=TRUE,row.names=FALSE) | |
pdf(paste("E:/Dante/Penguins/Data_Coverage_Results/",interSiteCategories[i],".pdf",sep=""),width=7.25,height=(length(unique(temporalTable$Date))/4)+1,pointsize=6) | |
par(mai=c(0.5,0.75,0.5,0.5),xpd=FALSE) | |
image(seq(0:287),seq(1:length(unique(temporalTable$Date))),t(apply(temporalTableCastBinary,2,rev)),xlab="",ylab="",xaxt="n",yaxt="n",col=c("white","grey80")) | |
abline(v=seq(12.5,276.5,12),col="grey60") | |
abline(h=seq(0.5,length(unique(temporalTable$Date))-0.5,1),col="grey60") | |
title(main=gsub("_"," ",interSiteCategories[i]),adj=0,cex.main=2,col.main="grey20") | |
title(xlab="hour of day",cex.lab=1.5,col.lab="grey30") | |
mtext("date",side=2,line=5.5,cex=1.5,col="grey30") | |
axis(1,at=seq(0.5,287,12),labels=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23),cex.axis=1,col.axis="grey30",col.ticks="grey30") | |
axis(2,at=seq(0:(length(unique(temporalTable$Date))-1)),labels=rev(rownames(temporalTableCastBinary)),las=2.5,cex.axis=1,col.axis="grey30",col.ticks="grey30") | |
legend(x=150.6,y=length(unique(temporalTable$Date))+2.2,legend=c("recorded","missing",paste("number of missing files =",nFalse),paste("proportion of missing files =",proportionMissing)), | |
fill=c("grey80","white",NA,NA),border=c("grey30","grey30",NA,NA),ncol=2,text.col="grey30",box.col="grey30",xpd=TRUE) | |
box(col="grey30") | |
dev.off() | |
} | |