library(tidyverse)
library(velociraptr)
library(fossil)
library(RColorBrewer)
library(ggthemes)


Complete.pollen.data <- read_csv("__") # pollen data as a tibble

Pollen.genus.data <- read_csv("__") # pollen genus names and codes as a tibble 

No.Moss.Pollen.data <- read_csv("__") # pollen data without moss

Leaf.data <- read_csv("__")

Leaf.genus.data <- read_csv("__")


      # Making the formation.information dataframe
          pollen.formations <- Complete.pollen.data %>%
            distinct(Formation, `Paleolat (76 Ma)`, `Paleolon (76 Ma)`, `Paleolat (70.5 Ma)`, `Paleolon (70.5 Ma)`, Lat, Lon, `Quantitative Age`) %>%
            group_by(Formation) %>%
            summarise_all(mean)
          
          leaf.formations <- Leaf.data %>%
            distinct(Formation, `Paleolat (76 Ma)`, `Paleolon (76 Ma)`, `Paleolat (70.5 Ma)`, `Paleolon (70.5 Ma)`, Lat, Lon, `Quantitative Age`) %>%
            group_by(Formation) %>%
            summarise_all(mean) %>%
            filter(Formation != "unknown")
          
          formation.information <- full_join(pollen.formations, leaf.formations)
          formation.information <- formation.information %>%
            distinct() %>%
            group_by(Formation) %>%
            summarise_all(mean)
          
          write.csv(formation.information, "file")


formation.information <- read.csv("file") # This file is purposely read in as a regular dataframe instead of a tibble


###  Make presence-absence tables of all plants ####


plant.pres.ab.matrices <- function(primary.plant.data, plant.genus.data, filename) { # Opens plant.pres.ab.matrices function
   # the first two files to include are tibbles of distribution data. filename is a character string with .csv ending for the final PA matrix. 
  
#This makes a genus column by matching the genus code from the genus list in another sheet
primary.plant.data <- primary.plant.data %>%
  mutate('Genus' = plant.genus.data$Genus[ match(`Genus Code`, plant.genus.data$`Genus Code`)])

primary.plant.data <- as.data.frame(primary.plant.data)
    

      # Make the plant presence-absence matrix from the available formation and taxon list
      partial.plant.matrix1 <- presenceMatrix(primary.plant.data, "Formation", "Genus")
      partial.plant.matrix2 <- presenceMatrix(primary.plant.data, "Formation", "Genus/Species")
      
      
      plant.pMatrix <- cbind(partial.plant.matrix1, partial.plant.matrix2)
      write.csv(plant.pMatrix, filename)
      return(plant.pMatrix)
} #Closes plant.pres.ab.matrices function


      ## Make the PA Matrices
pollen.pMatrix <- plant.pres.ab.matrices(Complete.pollen.data, Pollen.genus.data, filename ="Pollen PA Matrix.csv")
no.moss.pollen.pMatrix <- plant.pres.ab.matrices(No.Moss.Pollen.data ,Pollen.genus.data, filename ="No Moss Pollen PA Matrix.csv")
leaf.pMatrix <-plant.pres.ab.matrices(Leaf.data, Leaf.genus.data, filename = "Leaf PA Matrix.csv")


            # A little clean up
            leaf.pMatrix <- leaf.pMatrix[ ,-1]
            write.csv(leaf.pMatrix, "Leaf PA Matrix.csv")
            
            
      pollen.pMatrix <- read.csv("Pollen PA Matrix.csv", header = T, row.names = 1) # as a regular dataframe
      no.moss.pollen.pMatrix <- read.csv("No Moss Pollen PA Matrix.csv", header = T, row.names = 1)
      leaf.pMatrix <- read.csv("Leaf PA Matrix.csv", header = T, row.names = 1)

#####################     Boundary Latitude Vector    #####################################

Hypothesized.Boundaries <- seq(40, 60 ,1)

###########################################################################################


################### Analysis ######################

## Function to compute the individual similarities for plants.

Taxon.Similarity.Func <- function(Pres.matrix, Taxon, Boundary.Lat, formation.information){    # Pres.matrix is the presence-absence matrix; Taxon is a character string name of the general taxonomic group being analyzed
  # Boundary.Lat is a vector of the hypothesized boundaries between two biogeographical provinces
  # time.period is either Campanian or Maastrichtian, depending on which paleolatitudes you want to use
 
  
  p <- nrow(Pres.matrix) # defining with a single object keeps code clean
  
  
  # Make a final results matrix. This needs to have a unique combination of formations. It also needs to have four columns: Formation 1, Formation2, distance between, and similarity between
  
  Sim.Dist.Results.Matrix.Names <- c("Formation 1", "Formation 2", "Boundary Latitude", "Campanian Provincial Comparison", "Maastrichtian Provincial Comparison", "Line Distance", "Mean Age Formation 1", "Mean Age Formation 2", "Time Gap", "Similarity", "Taxon")
  
  Sim.Dist.Results.Matrix <- matrix(ncol=length(Sim.Dist.Results.Matrix.Names), nrow=(p*(p-1)/2*length(Boundary.Lat))) 
  
  colnames(Sim.Dist.Results.Matrix) <- Sim.Dist.Results.Matrix.Names
  
  
  # Three loops here. The largest loop, Boundary.Loop, controls the ciruclation of the two inner loops across all proposed latitudinal boundaries.
  #  Inner Loop One One will control the Formation 1 column and the Inner Loop Two will control Formation 2
  # loop calculates the physical distance and similarity between all formations
  
  
  rowNumber <- 0  # this sets the initial row number to 0. Everytime it goes through the loop the next row number is added
  
  for (Boundary.Loop in 1:length(Boundary.Lat)) {  # Opens Boundary.Loop        
    
    print(Boundary.Loop)
    
    for (Form1 in 1:(p-1)){ # The end value is one away from the end of the vector, you cant compare the last formation to itself
      print(Form1)
      Formation_1 <- row.names(Pres.matrix)[Form1] # Defining the Formation
      Formation_1_row <- which ( formation.information$`Formation`== Formation_1 ) # finds the row number for Formation_1
      
      for (Form2 in (Form1+1):p){ # The beginning value chooses the adjacent formation to the Form 1 formation
        print(Form2)
        Formation_2 <- row.names(Pres.matrix)[Form2]
        Formation_2_row<- which ( formation.information$`Formation`== Formation_2 )  # Finds the row number for Formation_2
        
        
        rowNumber <- rowNumber+1  # Starts the results matrix row progression
        
        
        # Input names and abbreviations into the results matrix
        Sim.Dist.Results.Matrix [rowNumber,'Formation 1'] <-  Formation_1
        #Form_1_Abbrev <- as.character(formation.information [ Formation_1_row, "Formation.Abbreviation"])
       # Sim.Dist.Results.Matrix [rowNumber, "Formation 1 Abbreviation"] <- Form_1_Abbrev
        # The above code identifies the abbreviation for Formation_1 by locating the row of that designated formation in the
        # formation.information dataframe, then choosing the abbreviation from that row.
        
        
        Sim.Dist.Results.Matrix [rowNumber, 'Formation 2'] <- Formation_2
       # Form_2_Abbrev <- as.character(formation.information [ Formation_2_row, "Formation.Abbreviation"])
       # Sim.Dist.Results.Matrix [rowNumber, "Formation 2 Abbreviation"] <- Form_2_Abbrev
        
        
        ## Below is the Boundary Latitude for the current comparison
        Sim.Dist.Results.Matrix [rowNumber, "Boundary Latitude"] <- Boundary.Lat[Boundary.Loop]
        
        
        ## Below are the Provincial comparison columns                   
        Sim.Dist.Results.Matrix [rowNumber, "Campanian Provincial Comparison"] <- if (formation.information [Formation_1_row, "Paleolat..76.Ma."] >= Boundary.Lat[Boundary.Loop] & formation.information [Formation_2_row , "Paleolat..76.Ma."] >= Boundary.Lat[Boundary.Loop]) { "North-North" } else {
          if (formation.information [Formation_1_row, "Paleolat..76.Ma."] < Boundary.Lat[Boundary.Loop] & formation.information [Formation_2_row , "Paleolat..76.Ma."] < Boundary.Lat[Boundary.Loop]) { "South-South"} else { 
            "North-South"} } 
        
        
        Sim.Dist.Results.Matrix [rowNumber, "Maastrichtian Provincial Comparison"] <- if (formation.information [Formation_1_row, "Paleolat..70.5.Ma."] >= Boundary.Lat[Boundary.Loop] & formation.information [Formation_2_row , "Paleolat..70.5.Ma."] >= Boundary.Lat[Boundary.Loop]) { "North-North" } else {
              if (formation.information [Formation_1_row, "Paleolat..70.5.Ma."] < Boundary.Lat[Boundary.Loop] & formation.information [Formation_2_row , "Paleolat..70.5.Ma."] < Boundary.Lat[Boundary.Loop]) { "South-South"} else { 
                "North-South"} } 
        
        
        ## below is the distance formula (in km) between formations...this is from the 'fossil' package.
        Sim.Dist.Results.Matrix [rowNumber, "Line Distance"] <- as.numeric( round(deg.dist(formation.information[Formation_1_row, "Lat"], formation.information[Formation_1_row, "Lon"], formation.information[Formation_2_row, "Lat"], formation.information[Formation_2_row, "Lon"]), 2))
        
        ## below is the Mean Age for Formation 1
        Sim.Dist.Results.Matrix [rowNumber, "Mean Age Formation 1"] <- as.numeric(formation.information$`Quantitative.Age`[Formation_1_row]) 
        
        Sim.Dist.Results.Matrix [rowNumber, "Mean Age Formation 2"] <- as.numeric(formation.information$`Quantitative.Age`[Formation_2_row])
        
        ## below is the difference in time between the two formations being compared.
        Sim.Dist.Results.Matrix [rowNumber, "Time Gap"] <- as.numeric( abs(formation.information [ Formation_1_row, "Quantitative.Age"] - formation.information [ Formation_2_row, "Quantitative.Age"]))
        
        
        # The Dice-Sorenson similarity coefficient was used in this study because it was found to be one of the 
        # best similarity measures to use across all levels of data saturation by Hurbalek (1982), Archer and Maples (1987), and Maples and Archer (1988).
        # Shi (1993), found strong support or the Jaccard index but the previous authors did not consistently recomment 
        # its use. Additionally, we use the Chao estimator for the Sorenson index (chao et al., 2005) to increase accuracy  
        
        
        Sim.Dist.Results.Matrix [rowNumber, "Similarity"] <- as.numeric(round(sorenson(Pres.matrix[Form1, ], Pres.matrix[Form2, ]), 2) ) # the notation between the brackets designates the entire row of the matrix
        
        
        Sim.Dist.Results.Matrix [rowNumber, "Taxon"] <- Taxon
        
        
      } # closes Form2 loop, good practice to label which loops close what so you don't forget
    } # closes Form1 loop
    
  }  # closes Boundary.Loop
  
  Sim.Dist.Results.Matrix.Tib <- as_tibble(Sim.Dist.Results.Matrix)
  #Sim.Dist.Results.Matrix.Tib <- unite (Sim.Dist.Results.Matrix.Tib, 'Comparison Code', c("Formation 1 Abbreviation", "Formation 2 Abbreviation"), sep="-", remove=F) # This is combining the formation abbreviations for use in plots below
  
  Sim.Dist.Results.Matrix.Tib$`Line Distance` <- as.numeric(Sim.Dist.Results.Matrix.Tib$`Line Distance`)
  Sim.Dist.Results.Matrix.Tib <- mutate(Sim.Dist.Results.Matrix.Tib, "Relative Line Distance" = Sim.Dist.Results.Matrix.Tib$"Line Distance"/max(Sim.Dist.Results.Matrix.Tib$"Line Distance"))
  
  Sim.Dist.Results.Matrix.Tib$`Similarity` <- as.numeric(Sim.Dist.Results.Matrix.Tib$`Similarity`)
  Sim.Dist.Results.Matrix.Tib$`Mean Age Formation 1` <- as.numeric(Sim.Dist.Results.Matrix.Tib$`Mean Age Formation 1`)
  Sim.Dist.Results.Matrix.Tib$`Time Gap` <- as.numeric(Sim.Dist.Results.Matrix.Tib$`Time Gap`)
  return(Sim.Dist.Results.Matrix.Tib)
  
}  # Closes Taxon.Similarity.Func function


#######  Compiling the individual simialrity results  ###########

## Applying the functions on plant data

      Pollen.Biogeo.Results <- Taxon.Similarity.Func ( Pres.matrix = pollen.pMatrix, Taxon = "Complete pollen", Boundary.Lat = Hypothesized.Boundaries, formation.information = formation.information)
      No.Moss.Pollen.Biogeo.Results <- Taxon.Similarity.Func ( Pres.matrix = no.moss.pollen.pMatrix, Taxon = "No moss pollen", Boundary.Lat = Hypothesized.Boundaries, formation.information = formation.information)
      Leaf.Biogeo.Results <- Taxon.Similarity.Func ( Pres.matrix = leaf.pMatrix, Taxon = "Leaves", Boundary.Lat = Hypothesized.Boundaries, formation.information = formation.information)
      

      Pollen.Biogeo.Results <- as_tibble(Pollen.Biogeo.Results)
      No.Moss.Pollen.Biogeo.Results <- as_tibble(No.Moss.Pollen.Biogeo.Results)
      Leaf.Biogeo.Results <- as_tibble(Leaf.Biogeo.Results)


######### Save data to file
############################  Save the data to a csv file

write.csv(Pollen.Biogeo.Results, "Pollen Biogeography Similarity Results.csv")
write.csv(No.Moss.Pollen.Biogeo.Results, "No Moss Pollen Biogeography Similarity Results.csv")
write.csv(Leaf.Biogeo.Results, "Leaf Biogeography Similarity Results.csv")


############################################     Hypothesized Boundary Similarity Variance     ###################
###############################################                                              #####################

      ##  The below function calculates the varaince in similarity values between formations north and south of a proposed boundary latitude
         #    Further, the function automatically produces graphics showing all of the data and a plot showing Loess smoothed results
          #  Note that this function is written to be run by a CSV file that is read into R as a base dataframe...i.e., with all spaces in column names replaced with periods

Similarity.Variance <- function (Similarity.Results.Tibble, Hypothesized.Boundaries, Num.Random.Samples, Time.Period, plant.type) { # Opens Similarity Variance function 
  Bound.Var.Matrix.Names <- Hypothesized.Boundaries
  
  Var.Results.Subsample.Names <- c("Latitude", "North", "South", "Variance Difference", "Boundary Ratio")
  Var.Results.Subsample <- matrix (ncol=length(Var.Results.Subsample.Names), nrow=1)
  colnames(Var.Results.Subsample) <- Var.Results.Subsample.Names
  
  
  Var.Results.Raw.Names <- c("Latitude", "North", "South", "Cross-Boundary Variance", "Subsample Mean")
  Var.Results.Raw <- matrix (ncol=length(Var.Results.Raw.Names), nrow=length(Bound.Var.Matrix.Names))
  colnames(Var.Results.Raw) <- Var.Results.Raw.Names
  
  
  for (test.bnd in 1:length(Hypothesized.Boundaries) ) { # opens test.bnd FORLOOP 
    
    
    temp.data <- Similarity.Results.Tibble %>%
      filter(`Boundary.Latitude` == Hypothesized.Boundaries[test.bnd]) 
    
    
    Var.Results.Raw[test.bnd, "Latitude"] <- Hypothesized.Boundaries[test.bnd]
    
    if (Time.Period == "Campanian") {
    
    temp.data.North <- temp.data %>%
      filter(`Campanian.Provincial.Comparison` == "North-North")
    Var.Results.Raw[test.bnd, "North"] <- var(temp.data.North$`Similarity`)
    
    
    temp.data.South  <- temp.data %>%
      filter(`Campanian.Provincial.Comparison` == "South-South")
    Var.Results.Raw[test.bnd, "South"] <- var(temp.data.South$`Similarity`)
    
    
    temp.data.NS <- temp.data %>%
      filter(`Campanian.Provincial.Comparison` == "North-South" & temp.data$`Line.Distance` < 500) # This sets a maximum distance (in km) to the cross-boundary comparisons
    
  } else {
    
    temp.data.North <- temp.data %>%
      filter(`Maastrichtian.Provincial.Comparison` == "North-North")
    Var.Results.Raw[test.bnd, "North"] <- var(temp.data.North$`Similarity`)
    
    
    temp.data.South  <- temp.data %>%
      filter(`Maastrichtian.Provincial.Comparison` == "South-South")
    Var.Results.Raw[test.bnd, "South"] <- var(temp.data.South$`Similarity`)
    
    
    temp.data.NS <- temp.data %>%
      filter(`Maastrichtian.Provincial.Comparison` == "North-South" & temp.data$`Line.Distance` < 500) # This sets a maximum distance (in km) to the cross-boundary comparisons
  }
    
    
    Var.Results.Raw[test.bnd, "Cross-Boundary Variance"] <- var(temp.data.NS$`Similarity`)
    
    
    if (length (temp.data.North$`Similarity`) > length (temp.data.South$`Similarity`)) { 
      
      North.Larger <- 1
      larger.sample <- temp.data.North
      smaller.sample <- temp.data.South} else {
        North.Larger <- 0
        larger.sample <- temp.data.South
        smaller.sample <- temp.data.North
        
      } # closes length (temp.data.North$Similarity IFELSE
    
    
    mean.results.vector <- vector(length=Num.Random.Samples)
    
    for (e in 1:Num.Random.Samples) { #Opens e FORLOOP
      
      temp.results.vector <- vector(length=length(Var.Results.Subsample.Names))
      names(temp.results.vector) <- Var.Results.Subsample.Names
      
      
      subsample.vector <- sample (x = seq(1, length(larger.sample$Similarity)), size = length(smaller.sample$Similarity))
      
      
      var.larger.sample <- var(larger.sample$Similarity[subsample.vector])  # This finds the variance of randomly chosen Similarity values from the larger sample equal to the number of samples in the smaller sample 
      
      var.smaller.sample <- var(smaller.sample$Similarity) # This finds the variance of the entire smaller sample
      
      temp.results.vector["Latitude"] <- Hypothesized.Boundaries[test.bnd]
      temp.results.vector["North"] <-  if (North.Larger == 1) { var.larger.sample } else { var.smaller.sample }
      temp.results.vector["South"] <- if (North.Larger == 0) { var.larger.sample} else { var.smaller.sample }
      
      temp.results.vector["Variance Difference"] <- abs( var.larger.sample - var.smaller.sample )
      
      temp.results.vector["Boundary Ratio"] <- Var.Results.Raw[test.bnd, "Cross-Boundary Variance"] / temp.results.vector["Variance Difference"]
      
      mean.results.vector[e] <-   temp.results.vector["Variance Difference"] # This is storing the Variance Difference from the subsampled similarities
      
      Var.Results.Subsample <-   rbind (Var.Results.Subsample, temp.results.vector)
      
    } # Closes e FORLOOP
    
    
    Var.Results.Raw[test.bnd, "Subsample Mean"] <- mean(mean.results.vector)
    
  } # opens test.bnd FORLOOP
  
  
  Var.Results.Raw <- as_tibble(Var.Results.Raw)
  Var.Results.Raw <- Var.Results.Raw %>%
    mutate("Raw Difference" = abs(North - South)) 
  
  
  Var.Results.Subsample <- as_tibble(Var.Results.Subsample)
  Var.Results.Subsample$Latitude <- as.numeric(Var.Results.Subsample$Latitude)
  
  
  Var.Results.Raw <-   Var.Results.Raw %>%
    filter(`Cross-Boundary Variance` > 0) 
  
  
  ## LOESS analysis of subsampled variance
  Var.Diff.Loess <- loess(Var.Results.Subsample$`Variance Difference` ~ Var.Results.Subsample$Latitude, span = 0.75, method = "loess" )

  
  Camp.Subsamp <- Var.Results.Subsample %>%
    filter(North >= 0) %>%
    mutate("LOESS Var Diff" = Var.Diff.Loess$fitted) %>%
    distinct(Latitude, `LOESS Var Diff`)
  
  
  Var.Results.Raw <- Var.Results.Raw %>%
    mutate("LOESS Var Diff" = Camp.Subsamp$`LOESS Var Diff` [match(Latitude, Camp.Subsamp$Latitude)]) %>%
    mutate("LOESS Difference" = `LOESS Var Diff` - `Cross-Boundary Variance`)
  
  
  ###  Plots of Data
  
  Plot.Subsample <- drop_na(Var.Results.Subsample)
  
  
  Plot.Raw <- Var.Results.Raw %>%
    drop_na()
  
  
  Variance.Plot.Raw <- Plot.Raw %>%  
    select(Latitude, North, South) %>%
    gather(North, South, key="Region", value = "Raw Variance")
  
  
  plot.title <- paste(plant.type, "Variance", Time.Period, sep = " ")
  plot.file.title.SuppMat <- paste(Time.Period, plant.type, "Similarity Variance Supp Material.pdf", sep=" ") 
  plot.file.title <- paste(Time.Period, plant.type, "Similarity Variance.pdf", sep = " ")
  
  Total_Plot <- ggplot( ) +  # Figure for Supplementary Material
    
    geom_col(data = Variance.Plot.Raw, mapping = aes( x= `Latitude`, y=`Raw Variance`, fill = `Region`), position = "dodge") + # Variance columns
    scale_fill_brewer(palette = "GnBu") + # setting the color scale
    theme_classic() + # plain white background
    geom_point(data = Plot.Subsample, mapping = aes( x = Latitude, y =`Variance Difference`), position = "jitter", color = "dark green", shape = 16, size= 1, alpha = 0.5) + # Subsampled points
    geom_smooth(data = Plot.Raw, mapping = aes( x = Latitude, y =`Subsample Mean`, weight = 0.5), color = "royal blue", linetype = 2, alpha = 0.1) + 
    geom_point(data = Plot.Raw, mapping = aes(x = Latitude, y = `Raw Difference`), color = "black", size = 2, alpha = 0.5) + 
    geom_line(data = Plot.Raw, mapping = aes(x = Latitude, y = `Raw Difference`), color = "black", linetype = 1, alpha = 0.5) + 
    geom_line(data = Plot.Raw, mapping = aes(x = Latitude, y = `Cross-Boundary Variance`) , color = "orange", linetype = 1, alpha = .5) + # Line of Cross-Boundary variance
    geom_point(data = Plot.Raw, mapping = aes(x = Latitude, y = `Cross-Boundary Variance`) , color = "orange", size = 2, shape = 16, alpha = .5) + # Points of Cross-Boundary variance
    labs(title = plot.title, x = "Latitude", y = "Similarity Variance") +
    ggsave(plot.file.title.SuppMat, device="pdf", width = 30, height = 30, units="cm")
  
  
  Simple_Plot <- ggplot( ) +  # Figure for Paper
    scale_fill_brewer(palette = "GnBu") + # setting the color scale
    theme_classic() + # plain white background
    stat_smooth(data = Plot.Raw, mapping = aes( x = Latitude, y =`Subsample Mean`, weight = 0.5), color = "royal blue", linetype = 2, alpha = 0.1) + 
    stat_smooth(data = Plot.Raw, mapping = aes( x = Latitude, y =`Cross-Boundary Variance`, weight = 0.5), color = "orange", linetype = 2, alpha = 0.1) + 
    #geom_line(data = Plot.Raw, mapping = aes(x = Latitude, y = `Cross-Boundary Variance`) , color = "orange", linetype = 1, alpha = .5) + # Line of Cross-Boundary variance
    labs(title = plot.title, x = "Latitude", y = "Similarity Variance") +
    ggsave(plot.file.title, device="pdf", width = 30, height = 30, units="cm")
  
  
  Raw.File.Name <- paste(plant.type, " Raw Variance Results ", Time.Period, ".csv", sep="")
  write.csv(Var.Results.Raw, Raw.File.Name)
  
  Subsample.Var.File.Name <- paste(plant.type, " Subsample Variance Results ", Time.Period, ".csv", sep="")
  write.csv(Var.Results.Subsample, Subsample.Var.File.Name)
  
  Var.Results.List <- list(Var.Results.Raw = Var.Results.Raw, Var.Results.Subsample = Var.Results.Subsample, Simple.Plot = Simple_Plot, Total.Plot = Total_Plot)
  
  return (Var.Results.List)
  
} ## closes Similarity Variance function


             ##  Filter the data into appropriate bins for analysis
Pollen.Biogeo.Results$`Mean Age Formation 2` <- as.numeric(Pollen.Biogeo.Results$`Mean Age Formation 2`)
No.Moss.Pollen.Biogeo.Results$`Mean Age Formation 2` <- as.numeric(No.Moss.Pollen.Biogeo.Results$`Mean Age Formation 2`)
Leaf.Biogeo.Results$`Mean Age Formation 2` <- as.numeric(Leaf.Biogeo.Results$`Mean Age Formation 2`)


Plant.Results.Filtered.Time <- function(Plant.Biogeo.Results, Camp.Young.Age, Camp.Old.Age, EM.Young.Age, EM.Old.Age, LM.Young.Age, LM.Old.Age, Time.Gap) { # opens Plant.Results.Filtered.Time FUNCTION

  Plant.Results.Camp <- Plant.Biogeo.Results %>%   # Campanian bin, between 83 - 72 Ma
  filter( between(`Mean.Age.Formation.1`, left = Camp.Young.Age, right = Camp.Old.Age) & between(`Mean.Age.Formation.2`, Camp.Young.Age, Camp.Old.Age) & `Time.Gap` <= Time.Gap)

Plant.Results.EMaastricht <- Plant.Biogeo.Results %>%  # E Maastrichtian bin, betwen 80 and 69 Ma
  filter( between(`Mean.Age.Formation.1`, left = EM.Young.Age, right = EM.Old.Age) & between(`Mean.Age.Formation.2`, EM.Young.Age, EM.Old.Age) & `Time.Gap` <= Time.Gap)

Plant.Results.LMaastricht <- Plant.Biogeo.Results %>%  # L. Maastrichtian bin, between 77 and 66 Ma
  filter( between(`Mean.Age.Formation.1`, left = LM.Young.Age, right = LM.Old.Age) & between(`Mean.Age.Formation.2`, LM.Young.Age, LM.Old.Age) & `Time.Gap` <= Time.Gap)

Return.List <- list("Campanian Data" = Plant.Results.Camp, "Early Maastrichtian Data" = Plant.Results.EMaastricht, "Late Maastrichtian Data" = Plant.Results.LMaastricht)

return(Return.List)

} # closes Plant.Results.Filtered.Time FUNCTION


Comp.Pollen.Results.Filtered <- Plant.Results.Filtered.Time(Pollen.Biogeo.Results, Camp.Young.Age = 72, Camp.Old.Age = 80, EM.Young.Age = 69, EM.Old.Age = 78, LM.Young.Age = 66, LM.Old.Age = 74, Time.Gap = 3)
No.Moss.Pollen.Results.Filtered <- Plant.Results.Filtered.Time(No.Moss.Pollen.Biogeo.Results, Camp.Young.Age = 72, Camp.Old.Age = 80, EM.Young.Age = 69, EM.Old.Age = 78, LM.Young.Age = 66, LM.Old.Age = 74, Time.Gap = 3)
Leaf.Results.Filtered <- Plant.Results.Filtered.Time(Leaf.Biogeo.Results, Camp.Young.Age = 72, Camp.Old.Age = 80, EM.Young.Age = 69, EM.Old.Age = 78, LM.Young.Age = 66, LM.Old.Age = 74, Time.Gap = 3)


      #####  Run the Similarity.variance function for Pollen  ####
          Var.Results.Pollen.Camp <- Similarity.Variance(Comp.Pollen.Results.Filtered[[1]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "Campanian", plant.type = "Pollen")

          Var.Results.Pollen.EMaastricht <- Similarity.Variance(Comp.Pollen.Results.Filtered[[2]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "E. Maastrichtian", plant.type = "Pollen")

          Var.Results.Pollen.LMaastricht <- Similarity.Variance(Comp.Pollen.Results.Filtered[[3]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "L. Maastrichtian", plant.type = "Pollen")      

          Pollen.Data.Plots <- ggarrange(Var.Results.Pollen.Camp$Simple.Plot, Var.Results.Pollen.EMaastricht$Simple.Plot, Var.Results.Pollen.LMaastricht$Simple.Plot, Var.Results.Pollen.Camp$Total.Plot, Var.Results.Pollen.EMaastricht$Total.Plot, Var.Results.Pollen.LMaastricht$Total.Plot, nrow = 2, ncol = 3, common.legend = T, legend = "right")
          ggsave("Pollen Data Plots.pdf", plot = Pollen.Data.Plots, width = 40, height = 40, units = "cm")
          

      ##  Run the Similarity.variance function for No Moss Pollen
          Var.Results.No.Moss.Camp <- Similarity.Variance(No.Moss.Pollen.Results.Filtered[[1]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "Campanian", plant.type = "No Moss Pollen")

          Var.Results.No.Moss.EMaastricht <- Similarity.Variance(No.Moss.Pollen.Results.Filtered[[2]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "E. Maastrichtian", plant.type = "No Moss Pollen")

          Var.Results.No.Moss.LMaastricht <- Similarity.Variance(No.Moss.Pollen.Results.Filtered[[3]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "L. Maastrichtian", plant.type = "No Moss Pollen")      

          No.Moss.Data.Plot <- ggarrange(Var.Results.No.Moss.Camp$Simple.Plot, Var.Results.No.Moss.EMaastricht$Simple.Plot, Var.Results.No.Moss.LMaastricht$Simple.Plot, Var.Results.No.Moss.Camp$Total.Plot, Var.Results.No.Moss.EMaastricht$Total.Plot, Var.Results.No.Moss.LMaastricht$Total.Plot, nrow = 2, ncol = 3)
          ggsave("No Moss Data Plots.pdf", plot = No.Moss.Data.Plot, width = 40, height = 40, units = "cm")

      ##  Run the Similarity.variance function for Leaves
          Var.Results.Leaves.Camp <- Similarity.Variance(Leaf.Results.Filtered[[1]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "Campanian", plant.type = "Leaf")

          Var.Results.Leaves.EMaastricht <- Similarity.Variance(Leaf.Results.Filtered[[2]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "E. Maastrichtian", plant.type = "Leaf")

          Var.Results.Leaves.LMaastricht <- Similarity.Variance(Leaf.Results.Filtered[[3]], Hypothesized.Boundaries = Hypothesized.Boundaries, Num.Random.Samples = 100, Time.Period = "L. Maastrichtian", plant.type = "Leaf")      

          Leaf.Data.Plot <- ggarrange(Var.Results.Leaves.Camp$Simple.Plot , Var.Results.Leaves.Camp$Total.Plot,  nrow = 2, ncol = 2)

          ggsave("Leaf Variance Data Plots.pdf", plot = Leaf.Data.Plot, width = 20, height = 20, units = "cm")
##########

line.distance.plot <- function(biogeo.results, time.gap, plant.type, high.color, med.color, low.color) { # this function plots the relative distance between formations versus their similarity 
 
  plotfilename <- paste(plant.type, "Simiarlity v Distance Plot.pdf", sep = " ")
 
  
  Camp.plot.data <- biogeo.results %>%
    filter(between(`Mean.Age.Formation.1`, left = 72, right = 83) & between(`Mean.Age.Formation.2`, 72, 83) & `Time.Gap` <= 3)
  
  Maas.plot.data <- biogeo.results %>%
    filter(between(`Mean.Age.Formation.1`, left = 66, right = 72) & between(`Mean.Age.Formation.2`, 66, 72) & `Time.Gap` <= 3)
    
    
  Camp.scat <- ggplot(Camp.plot.data) +
    
    geom_jitter( aes(x = `Line.Distance`, y = Similarity, fill = `Mean.Age.Formation.1`, size = `Time.Gap`), shape = 21, color = med.color, alpha = 1/10)  +
    scale_fill_gradient(high = high.color , low = med.color) +
    scale_size(trans = "reverse")+
    theme_dark() +
    guides(size = F, alpha = F) +
    xlab("Distance (km)") +
    labs(fill = "Age (Ma)") +
    ggtitle("Campanian")
    ggsave(plotfilename, width = 20, height = 20, units = "cm")
  
  Maas.scat <- ggplot(Maas.plot.data) +
    
    geom_jitter( aes(x = `Line.Distance`, y = Similarity, fill = `Mean.Age.Formation.1`, size = `Time.Gap`), shape = 21, color = med.color, alpha = 1/10)  +
    scale_fill_gradient(high = med.color, low = low.color) +
    scale_size(trans="reverse") +
    theme_dark() +
    guides(size = F, alpha = F) +
    xlab("Distance (km)") +
    labs(fill = "Age (Ma)") +
    ggtitle("Maastrichtian")
    ggsave(plotfilename, width = 20, height = 20, units = "cm")
  
  combinedplotname <- paste(plant.type, "Camp Maas Similarity Plots.pdf", sep = " ")
  Camp.Maas.scat <- ggarrange(plots = list(Camp.scat, Maas.scat), align = "h" )
  ggsave(combinedplotname, Camp.Maas.scat, width = 20, height = 20, units = "cm", bg = "transparent")
  
  
} # closes line.distance.plot function
          Pollen.Biogeo.Results <- read.csv("Pollen Biogeography Similarity Results.csv")      
          No.Moss.Pollen.Biogeo.Results <- read.csv("No Moss Pollen Biogeography Similarity Results.csv")
          Leaf.Biogeo.Results <- read.csv("Leaf Biogeography Similarity Results.csv")
          
          Pollen.Raw.Variances.Camp <- read.csv("Pollen Raw Variance Results Campanian.csv")
          Pollen.Raw.Variances.EMass <- read.csv("Pollen Raw Variance Results E. Maastrichtian.csv")
          Pollen.Raw.Variances.LMass <- read.csv("Pollen Raw Variance Results L. Maastrichtian.csv")
          Leaf.Raw.Variances <- read.csv("Leaf Raw Variance Results Campanian.csv")
          
 
          line.distance.plot(Pollen.Biogeo.Results, 3, plant.type = "Pollen", high.color = "goldenrod3", med.color = "gold", low.color = "lightgoldenrodyellow") #pollen plot
          line.distance.plot(No.Moss.Pollen.Biogeo.Results, 3, plant.type = "No moss pollen") # no moss pollen plot
          line.distance.plot(Leaf.Biogeo.Results, 3, plant.type = "Leaf", high.color = "darkslategray", med.color = "olivedrab4", low.color = "olivedrab1") # leaf plot
          
          Complete.plot.data.pollen <- Pollen.Biogeo.Results %>%
            filter(`Time.Gap` <= 3)
          Complete.plot.data.leaves <- Leaf.Biogeo.Results %>%
            filter(`Time.Gap` <= 3)
          
          Pollen.scat.comp <- ggplot(Complete.plot.data.pollen) +
            
            geom_jitter( aes(x = `Line.Distance`, y = Similarity, fill = `Mean.Age.Formation.1`, size = `Time.Gap`), shape = 21, color = "gold")  +
            scale_fill_gradient(high = "goldenrod3" , low = "lightgoldenrodyellow") +
            scale_size(trans = "reverse")+
            theme_dark() +
            guides(size = F) +
            xlab("Distance (km)") +
            labs(fill = "Age (Ma)") +
            ggtitle("Pollen") +
            theme(plot.title = element_text(size = rel(1.5), hjust = 20))
          ggsave("Pollen Similarity Distribution.pdf", width = 20, height = 20, units = "cm")
          
          Leaf.scat.comp <- ggplot(Complete.plot.data.leaves) +
            
            geom_jitter( aes(x = `Line.Distance`, y = Similarity, fill = `Mean.Age.Formation.1`, size = `Time.Gap`), shape = 21, color = "olivedrab4")  +
            scale_fill_gradient(high = "forestgreen", low = "olivedrab1") +
            scale_size(trans="reverse") +
            theme_dark() +
            guides(size = F, alpha = F) +
            xlab("Distance (km)") +
            labs(fill = "Age (Ma)", title = "Leaves") +
            ggtitle("Leaves") +
            theme(plot.title = element_text(size = rel(1.5), hjust = 20))
          ggsave("Leaf Similarity Distribution.pdf", width = 20, height = 20, units = "cm")
          
          
          Pollen.Leaf.scat <- ggarrange(plots = list(Pollen.scat.comp, Leaf.scat.comp), align = "h" )
          ggsave("Combined Pollen Leaf Similarity Distributions.ps", Pollen.Leaf.scat, width = 15, height = 15, units = "cm", bg = "transparent")
          
          
          ## Plotting Latitude and Standardized Variance Difference
          
          Stand.Var.Diff.Camp <- tibble(Latitude = Pollen.Raw.Variances.Camp$Latitude, "Standard Variance Difference" = Pollen.Raw.Variances.Camp$Standardized.LOESS.Difference, Time = "Campanian")
          
          Stand.Var.Diff.EM <- tibble(Latitude = Pollen.Raw.Variances.EMass$Latitude, "Standard Variance Difference" = Pollen.Raw.Variances.EMass$Standardized.LOESS.Difference, Time = "Early Maastrichtian")
         
          Stand.Var.Diff.LM <- tibble(Latitude = Pollen.Raw.Variances.LMass$Latitude, "Standard Variance Difference" = Pollen.Raw.Variances.LMass$Standardized.LOESS.Difference, Time = "Late Maastrichtian")
          
          Stand.Var.Combined <- full_join(Stand.Var.Diff.Camp, Stand.Var.Diff.EM)
          Stand.Var.Combined <- full_join(Stand.Var.Combined, Stand.Var.Diff.LM)
          Stand.Var.Combined <- filter(Stand.Var.Combined, `Standard Variance Difference` >= 0)
          levels(Stand.Var.Combined$Time)
          
          ggplot(data = Stand.Var.Combined, aes(y=`Standard Variance Difference`, x = Latitude)) +
            geom_smooth(se = F, aes(color = Time)) +
            theme_dark() +
            ylab("Standardized Variance Difference") +
            scale_color_manual(values = c( "aliceblue", "darkslategray1", "deepskyblue"))+
            guides(color=guide_legend(title = NULL)) 
            
            ggsave("Boundary Variance Late Cretaceous.ps", bg="transparent", width = 20, height = 20, units = "cm")