User Tools

Site Tools


wiki:landuse_bp

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

wiki:landuse_bp [2018/05/13 21:44] (current)
Line 1: Line 1:
 +===== Land Use and Land Cover Change =====
  
 +
 +<code bash| exercise4_land_cover_change_05112018.R>​
 +
 +#################################### ​  Land Use and Land Cover Change ​  #######################################​
 +############################ ​ Analyze Land Cover change in Houston ​ #######################################​
 +#This script performs analyses for a land cover change model using NLCD aggregated values.
 +#The goal is to assess land cover change using two land cover maps in the Houston areas.
 +#Additional datasets are provided for the land cover change modeling. A model is built for Harris county.
 +#
 +#AUTHORS: Benoit Parmentier ​                                            
 +#DATE CREATED: 03/​16/​2018 ​
 +#DATE MODIFIED: 05/11/2018
 +#Version: 1
 +#PROJECT: SESYNC and AAG 2018 Geospatial Short Course, Geo-Computation and Environmental Analysis Yale.  ​
 +#TO DO:
 +#
 +#COMMIT: fixing error
 +#
 +#################################################################################################​
 +
 +###Loading R library and packages ​                                                     ​
 +
 +library(sp) # spatial/​geographfic objects and functions
 +library(rgdal) #GDAL/OGR binding for R with functionalities
 +library(spdep) #spatial analyses operations, functions etc.
 +library(gtools) # contains mixsort and other useful functions
 +library(maptools) # tools to manipulate spatial data
 +library(parallel) # parallel computation,​ part of base package no
 +library(rasterVis) # raster visualization operations
 +library(raster) # raster functionalities
 +library(forecast) #ARIMA forecasting ​
 +library(xts) #extension for time series object and analyses
 +library(zoo) # time series object and analysis
 +library(lubridate) # dates functionality
 +library(colorRamps) #contains matlab.like color palette
 +library(rgeos) #contains topological operations
 +library(sphet) #contains spreg, spatial regression modeling
 +library(BMS) #contains hex2bin and bin2hex, Bayesian methods
 +library(bitops) # function for bitwise operations
 +library(foreign) # import datasets from SAS, spss, stata and other sources
 +library(gdata) #read xls, dbf etc., not recently updated but useful
 +library(classInt) #methods to generate class limits
 +library(plyr) #data wrangling: various operations for splitting, combining data
 +#​library(gstat) #spatial interpolation and kriging methods
 +library(readxl) #​functionalities to read in excel type data
 +library(psych) #​pca/​eigenvector decomposition functionalities
 +library(sf) #spatial objects and functionalities
 +library(plotrix) #various graphic functions e.g. draw.circle
 +library(TOC) # TOC and ROC for raster images
 +library(ROCR) # ROCR general for data.frame
 +
 +###### Functions used in this script
 +
 +create_dir_fun <- function(outDir,​out_suffix=NULL){
 +  #if out_suffix is not null then append out_suffix string
 +  if(!is.null(out_suffix)){
 +    out_name <- paste("​output_",​out_suffix,​sep=""​)
 +    outDir <- file.path(outDir,​out_name)
 +  }
 +  #create if does not exists
 +  if(!file.exists(outDir)){
 +    dir.create(outDir)
 +  }
 +  return(outDir)
 +}
 +
 +#####  Parameters and argument set up ###########
 +
 +#Separate inputs and outputs directories
 +in_dir_var <- "/​home/​user/​ost4sem/​exercise/​Exercise_4/​data/"​
 +out_dir <- "/​home/​user/​ost4sem/​exercise/​Exercise_4/​outputs"​
 +
 +### General parameters
 +
 +#NLCD coordinate reference system: we will use this projection rather than TX.
 +CRS_reg <- "​+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,​0,​0,​0,​0,​0,​0 +units=m +no_defs"​
 +file_format <- "​.tif"​ #raster output format ​
 +NA_flag_val <- -9999 # NA value assigned to output raster
 +out_suffix <​-"​exercise4_05102018"​ #output suffix for the files and ouptu folder #PARAM 8
 +create_out_dir_param=TRUE # if TRUE, a output dir using output suffix will be created
 +method_proj_val <- "​bilinear"​ # method option for the reprojection and resampling ​
 +gdal_installed <- TRUE #if TRUE, GDAL is used to generate distance files
 +
 +### Input data files
 +rastername_county_harris <- "​harris_county_mask.tif"​ #Region of interest: extent of Harris County
 +elevation_fname <- "​srtm_Houston_area_90m.tif"​ #SRTM elevation
 +roads_fname <- "​r_roads_Harris.tif"​ #Road count for Harris county
 +
 +### Aggreagate NLCD input files
 +infile_land_cover_date1 <- "​agg_3_r_nlcd2001_Houston.tif"​
 +infile_land_cover_date2 <- "​agg_3_r_nlcd2006_Houston.tif"​
 +infile_land_cover_date3 <- "​agg_3_r_nlcd2011_Houston.tif"​
 +
 +infile_name_nlcd_legend <- "​nlcd_legend.txt"​
 +infile_name_nlcd_classification_system <- "​classification_system_nlcd_legend.xlsx"​
 +
 +#########################​ START SCRIPT ###############################​
 +
 +## First create an output directory to separate inputs and outputs
 +
 +if(is.null(out_dir)){
 +  out_dir <- dirname(in_dir) #output will be created in the input dir
 +}
 +
 +out_suffix_s <- out_suffix #can modify name of output suffix
 +if(create_out_dir_param==TRUE){
 +  out_dir <- create_dir_fun(out_dir,​out_suffix_s)
 +  setwd(out_dir)
 +}else{
 +  setwd(out_dir) #use previoulsy defined directory
 +}
 +
 +###########################################​
 +### PART I: READ AND VISUALIZE DATA #######
 +
 +r_lc_date1 <- raster(file.path(in_dir_var,​infile_land_cover_date1)) #NLCD 2001 
 +r_lc_date2 <- raster(file.path(in_dir_var,​infile_land_cover_date2)) #NLCD 2006
 +r_lc_date3 <- raster(file.path(in_dir_var,​infile_land_cover_date2)) #NLCD 2011
 +
 +lc_legend_df <- read.table(file.path(in_dir_var,​infile_name_nlcd_legend),​
 +                           ​stringsAsFactors = F,
 +                           ​sep=","​)
 +
 +head(lc_legend_df) # Inspect data
 +
 +plot(r_lc_date2) # View NLCD 2006, we will need to add the legend use the appropriate palette!!
 +
 +### Let's add legend and examine existing land cover categories
 +
 +freq_tb_date2 <- freq(r_lc_date2)
 +head(freq_tb_date2) #view first 5 rows, note this is a matrix object.
 +
 +### Let's generate a palette from the NLCD legend information to view the existing land cover for 2006.
 +names(lc_legend_df)
 +dim(lc_legend_df) #contains a lot of empty rows
 +
 +lc_legend_df<​- subset(lc_legend_df,​COUNT>​0) #subset the data to remove unsured rows
 +### Generate a palette color from the input Red, Green and Blue information using RGB encoding:
 +
 +
 +lc_legend_df$rgb <- paste(lc_legend_df$Red,​lc_legend_df$Green,​lc_legend_df$Blue,​sep=","​) #combine
 +
 +### row 2 correspond to the "open water" category
 +color_val_water <- rgb(lc_legend_df$Red[2],​lc_legend_df$Green[2],​lc_legend_df$Blue[2],​maxColorValue = 255)
 +color_val_developed_high <- rgb(lc_legend_df$Red[7],​lc_legend_df$Green[7],​lc_legend_df$Blue[7],​maxColorValue = 255)
 +
 +lc_col_palette <- c(color_val_water,​color_val_developed_high)
 +
 +barplot(c(1,​1), ​
 +        col=lc_col_palette,​
 +        main="​Visualization of color palette for NLCD land cover",​
 +        names.arg=c("​Open water",​ "​Developed,​ High Intensity"​),​las=1)
 +
 +### Let's generate a color for all the land cover categories by using lapply and function
 +n_cat <- nrow(lc_legend_df)
 +lc_col_palette <- lapply(1:​n_cat,​
 +                 ​FUN=function(i){rgb(lc_legend_df$Red[i],​lc_legend_df$Green[i],​lc_legend_df$Blue[i],​maxColorValue = 255)})
 +lc_col_palette <- unlist(lc_col_palette)
 +
 +lc_legend_df$palette <- lc_col_palette
 +
 +r_lc_date2 <- ratify(r_lc_date2) # create a raster layer with categorical information
 +rat <- levels(r_lc_date2)[[1]] #This is a data.frame with the categories present in the raster
 +
 +lc_legend_df_date2 <- subset(lc_legend_df,​lc_legend_df$ID%in% (rat[,1])) #find the land cover types present in date 2 (2006)
 +rat$legend <- lc_legend_df_date2$NLCD.2006.Land.Cover.Class #assign it back in case it is missing
 +levels(r_lc_date2) <- rat #add the information to the raster layer
 +
 +### Now generate a plot of land cover with the NLCD legend and palette
 +levelplot(r_lc_date2, ​
 +          col.regions = lc_legend_df_date2$palette,​
 +          scales=list(draw=FALSE),​
 +          main = "NLCD 2006")
 +
 +################################################​
 +###  PART II : Analyze change and transitions
 +
 +## As the plot shows for 2006, we have 15 land cover types. Analyzing such complex categories in terms of decreasse (loss), increase (gain), ​
 +# persistence in land cover will generate a large number of transitions (potential up to 15*15=225 transitions in this case!)
 +
 +## To generalize the information,​ let's aggregate leveraging the hierachical nature of NLCD Anderson Classification system.
 +
 +lc_system_nlcd_df <- read_xlsx(file.path(in_dir_var,​infile_name_nlcd_classification_system))
 +head(lc_system_nlcd_df) #inspect data
 +
 +### Let's identify existing cover and compute change:
 +r_stack_nlcd <- stack(r_lc_date1,​r_lc_date2)
 +freq_tb_nlcd <- as.data.frame(freq(r_stack_nlcd,​merge=T))
 +head(freq_tb_nlcd)
 +
 +dim(lc_system_nlcd_df) # We have categories that are not relevant to the study area and time period.
 +lc_system_nlcd_df <- subset(lc_system_nlcd_df,​id_l2%in%freq_tb_nlcd$value ) 
 +dim(lc_system_nlcd_df) # Now 15 land categories instead of 20.
 +
 +### Selectet relevant columns for the reclassification
 +rec_df <- lc_system_nlcd_df[,​c(2,​1)]
 +r_date1_rec <- subs(r_lc_date1,​rec_df,​by="​id_l2","​id_l1"​)
 +r_date2_rec <- subs(r_lc_date2,​rec_df,​by="​id_l2","​id_l1"​)
 +
 +plot(r_date1_rec)
 +
 +rec_xtab_df <- crosstab(r_date1_rec,​r_date2_rec,​long=T)
 +names(rec_xtab_df) <- c("​2001","​2011","​freq"​)
 +
 +head(rec_xtab_df)
 +dim(rec_xtab_df) #9*9 possible transitions if we include NA values
 +print(rec_xtab_df) # View the full table
 +
 +which.max(rec_xtab_df$freq)
 +rec_xtab_df[11,​] # Note the most important transition is persistence!!
 +
 +### Let's rank the transition:
 +class(rec_xtab_df)
 +is.na(rec_xtab_df$freq)
 +rec_xtab_df_ranked <- rec_xtab_df[order(rec_xtab_df$freq,​decreasing=T) , ]
 +head(rec_xtab_df_ranked) # Unsurprsingly,​ top transitions are persistence categories
 +
 +### Let's examine the overall change in categories rather than transitions
 +
 +label_legend_df <- data.frame(ID=lc_system_nlcd_df$id_l1,​name=lc_system_nlcd_df$name_l1)
 +r_stack <- stack(r_date1_rec,​r_date2_rec)
 +
 +lc_df <- freq(r_stack,​merge=T)
 +names(lc_df) <- c("​value","​date1","​date2"​)
 +lc_df$diff <- lc_df$date2 - lc_df$date1 #difference for each land cover categories over the 2001-2011 time period
 +head(lc_df) # Quickly examine the output
 +
 +### Add relevant categories
 +lc_df <- merge(lc_df,​label_legend_df,​by.x="​value",​by.y="​ID",​all.y=F)
 +lc_df <- lc_df[!duplicated(lc_df),​] #remove duplictates
 +head(lc_df) # Note the overall cahnge
 +
 +#### Now visualize the overall land cover changes
 +barplot(lc_df$diff,​names.arg=lc_df$name,​las=2)
 +total_val ​ <- sum(lc_df$date1)
 +lc_df$perc_change <- 100*lc_df$diff/​total_val ​
 +barplot(lc_df$perc_change,​names.arg=lc_df$name,​las=2)
 +
 +### Create a change image to map all pixels that transitioned to the developed category:  ​
 +
 +r_cat2 <- r_date2_rec==2 # developped on date 2
 +r_not_cat2 <- r_date1_rec!=2 #remove areas that were already developed in date1, we do not want persistence
 +
 +r_change <- r_cat2 * r_not_cat2 #mask
 +plot(r_change,​main="​Land transitions to developed over 2001-2011"​)
 +change_tb <- freq(r_change) #Find out how many pixels transitions to developped
 +
 +#####################################​
 +#############​ PART III: Process and Prepare variables for land change modeling ##############​
 +
 +## y= 1 if change to urban over 2001-2011
 +### Explanatory variables:
 +#var1: distance to existing urban in 2001
 +#var2: distance to road in 2001
 +#var3: elevation, low slope better for new development
 +#var4: past land cover state that may influence future land change
 +
 +## 1) Generate var1 and var2 : distance to developped and distance to roads
 +
 +### Distance to existing in 2001: prepare information
 +r_cat2<- r_date1_rec==2 #developped in 2001
 +plot(r_cat2)
 +cat_bool_fname <- "​developped_2001.tif"​ #input for the distance to road computation
 +writeRaster(r_cat2,​filename = cat_bool_fname,​overwrite=T)
 +
 +### Read in data for road count
 +r_roads <- raster(file.path(in_dir_var,​roads_fname))
 +plot(r_roads,​colNA="​black"​)
 +res(r_roads)
 +
 +#### Aggregate to match the NLCD data resolution
 +r_roads_90m <- aggregate(r_roads,​
 +                         ​fact=3,​ #factor of aggregation in x and y
 +                         ​fun=mean) #function used in aggregation values
 +plot(r_roads_90m)
 +
 +r_roads_bool <- r_roads_90m > 0
 +plot(r_roads_bool)
 +
 +roads_bool_fname <- "​roads_bool.tif"​ #input for the distance to road computation
 +writeRaster(r_roads_bool,​filename = roads_bool_fname,​overwrite=T)
 +
 +### This part could be transformed into a function but we keep it for clarity and learning:
 +if(gdal_installed==TRUE){
 +  ​
 +  ## Distance from developped land in 2001
 +  srcfile <- cat_bool_fname  ​
 +  dstfile_developped <- file.path(out_dir,​paste("​developped_distance_",​out_suffix,​file_format,​sep=""​))
 +  n_values <- "​1"​
 +  ​
 +  ### Prepare GDAL command: note that gdal_proximity doesn'​t like when path is too long
 +  cmd_developped_str <- paste("​gdal_proximity.py",​
 +                              basename(srcfile),​
 +                              basename(dstfile_developped),​
 +                              "​-values",​n_values,​sep="​ ")
 +
 +  ### Distance from roads
 +  srcfile <- roads_bool_fname ​
 +  dstfile_roads <- file.path(out_dir,​paste("​roads_bool_distance_",​out_suffix,​file_format,​sep=""​))
 +  n_values <- "​1"​
 +  ​
 +  ### Prepare GDAL command: note that gdal_proximity doesn'​t like when path is too long
 +  cmd_roads_str <- paste("​gdal_proximity.py",​basename(srcfile),​
 +                         ​basename(dstfile_roads),​
 +                         "​-values",​n_values,​sep="​ ")
 +
 +  sys_os <- as.list(Sys.info())$sysname #Find what OS system is in use.
 +  ​
 +  if(sys_os=="​Windows"​){
 +    shell(cmd_developped_str)
 +    shell(cmd_roads_str)
 +  }else{
 +    system(cmd_developped_str)
 +    system(cmd_roads_str)
 +  }
 +  r_roads_distance <- raster(dstfile_roads)
 +  r_developped_distance <- raster(dstfile_developped)
 +  ​
 +}else{
 +  r_developped_distance <- raster(file.path(in_dir,​paste("​developped_distance",​file_format,​sep=""​)))
 +  r_roads_distance <- raster(file.path(in_dir_var,​paste("​roads_bool_distance",​file_format,​sep=""​)))
 +}
 +
 +plot(r_developped_distance) #This is at 90m.
 +plot(r_roads_distance) #This is at 90m.
 +
 +#Now rescale the distance...
 +min_val <- cellStats(r_roads_distance,​min) ​
 +max_val <- cellStats(r_roads_distance,​max)
 +r_roads_dist <-  (max_val - r_roads_distance) / (max_val - min_val) #high values close to 1 for areas close to roads
 +
 +
 +min_val <- cellStats(r_developped_distance,​min) ​
 +max_val <- cellStats(r_developped_distance,​max)
 +r_developped_dist <-  (max_val - r_developped_distance) / (max_val - min_val)
 +
 +## 2) Generate var3 : slope
 +r_elevation <- raster(file.path(in_dir_var,​elevation_fname))
 +
 +projection(r_elevation) # This is not in the same projection as the study area. 
 +r_elevation_reg <- projectRaster(from= r_elevation,​ #input raster to reproject
 +                                 to= r_date1_rec,​ #raster with desired extent, resolution and projection system
 +                                 ​method= method_proj_val) #method used in the reprojection
 +
 +r_slope <- terrain(r_elevation_reg,​unit="​degrees"​)
 +
 +## 3) Generate var4 : past land cover state
 +
 +### reclass Land cover
 +r_mask <- r_date1_rec==2 #Remove developed land from  ​
 +r_date1_rec_masked <- mask(r_date1_rec,​r_mask,​maskvalue=1)
 +plot(r_date1_rec_masked)
 +
 +#####################################​
 +#############​ PART IV: Run Model and perform assessment ##############​
 +
 +##############​
 +###### Step 1: Consistent masking and generate mask removing water (1) and developped (2) in 2001
 +
 +#r_mask <- (r_date1_rec!=2)*(r_date1_rec!=1)*r_county_harris
 +r_mask <- (r_date1_rec!=2)*(r_date1_rec!=1)
 +plot(r_mask)
 +NAvalue(r_mask) <- 0 
 +
 +### Read in focus area for the modeling:
 +r_county_harris <- raster(file.path(in_dir_var,​rastername_county_harris))
 +### Screen for area of interest
 +r_mask <- r_mask * r_county_harris
 +r_mask[r_mask==0]<​-NA
 +plot(r_mask)
 +
 +### Check the number of NA and pixels in the study area:
 +tb_study_area <- freq(r_mask)
 +print(tb_study_area)
 +
 +## Generate dataset for Harris county
 +r_variables <- stack(r_change,​r_date1_rec_masked,​r_slope,​r_roads_dist,​r_developped_dist)
 +r_variables <- mask(r_variables,​mask=r_mask) # mask to keep relevant area
 +names(r_variables) <- c("​change","​land_cover","​slope","​roads_dist","​developped_dist"​)
 +NAvalue(r_variables) <- NA_flag_val
 +
 +## Examine all the variables
 +plot(r_variables)
 +
 +### Check for consistency in mask:
 +NA_freq_tb <- freq(r_variables,​value=NA,​merge=T)
 +### Notice that the number of NA is not consistent.
 +### Let's recombine all NA for consstencies:​
 +plot(r_variables)
 +r_NA <- r_variables > -1 #There are no negative values in the raster stack
 +
 +r_valid_pixels <- overlay(r_NA,​fun=sum)
 +plot(r_valid_pixels)
 +dim(r_NA)
 +freq(r_valid_pixels)
 +
 +r_mask <- r_valid_pixels > 0
 +r_variables <- mask(r_variables,​r_mask)
 +#​r_variables <- freq(r_test2,​value=NA,​merge=T)
 +names(r_variables) <- c("​change","​land_cover","​slope","​roads_dist","​developped_dist"​)
 +
 +###############​
 +###### Step 2: Fit glm model and generate predictions
 +
 +variables_df <- na.omit(as.data.frame(r_variables))
 +dim(variables_df)
 +variables_df$land_cover <- as.factor(variables_df$land_cover)
 +variables_df$change <- as.factor(variables_df$change)
 +
 +names(variables_df)
 +#​names(variables_df) <- c("​change","​land_cover","​elevation","​roads_dist","​developped_dist"​)
 +
 +mod_glm <- glm(change ~ land_cover + slope + roads_dist + developped_dist, ​
 +           ​data=variables_df , family=binomial())
 +
 +print(mod_glm)
 +summary(mod_glm)
 +
 +r_p <- predict(r_variables,​ mod_glm, type="​response"​)
 +plot(r_p)
 +histogram(r_p)
 +
 +###############​
 +###### Step 3: Model assessment with ROC
 +
 +## We use the TOC package since it allows for the use of raster layers.
 +
 +r_change_harris <- subset(r_variables,"​change"​)
 +
 +## These are the inputs for the assessment
 +plot(r_change_harris) # boolean reference variable
 +plot(r_mask) # mask for relevant observation
 +plot(r_p) # index variable to assess
 +
 +roc_rast <- ROC(index=r_p, ​
 +                  boolean=r_change_harris, ​
 +                  mask=r_mask,​
 +                  nthres=100)
 +
 +plot(roc_rast)
 +slot(roc_rast,"​AUC"​) #this is the AUC from ROC for the logistic modeling
 +
 +toc_rast <- TOC(index=r_p, ​
 +                  boolean=r_change_harris, ​
 +                  mask=r_mask,​
 +                  nthres=100)
 +
 +plot(toc_rast)
 +slot(toc_rast,"​AUC"​) #this is the AUC from TOC for the logistic modeling
 +
 +############################### ​ End of script ​ #####################################​
 +
 +</​code>​
wiki/landuse_bp.txt ยท Last modified: 2018/05/13 21:44 (external edit)