#Electronic Appendix S2 - python script for running 3D MDE # --------------------------------------------------------------------------- # MDE python script # last revised on: 16-07-2007 # Created by: Jeremy VanDerWal # email: jjvanderwal@gmail.com # --------------------------------------------------------------------------- # #Description: #creates Mid domain effect predictions in 3-dimensions #use with python version 2.5 or later... tested and runs on winXP # #Currently set up for use on North America. Some constants with respect to projections #need to be changed for use elsewhere. from __future__ import with_statement import win32com.client, os, time, sys, math, operator, shutil from random import randint #need to get the DEM dem_file = raw_input('\n' + 'enter the location of the DEM in ascii raster format \n') if dem_file == '': dem_file = 'c:/working/MDE/Base_Data/dem.asc'#sys.exit('you must enter a dem file') #need to get the output save folder savefolder = raw_input('\n' + 'enter the location to save output files \n') if savefolder == '': savefolder = 'C:/Working/MDE/Outputs/16_07_07_run/mamm'#['C:/Working/MDE/Outputs/16_07_07_run/amph','C:/Working/MDE/Outputs/16_07_07_run/bird','C:/Working/MDE/Outputs/16_07_07_run/mamm','C:/Working/MDE/Outputs/16_07_07_run/tree'] #sys.exit('you must enter a save folder') #need to get the species list print '\n' + 'NOTE: species grids must be in ASCII grid format with cells within range being = 1!!!' spp_folder = raw_input('enter the location of the species specific ascii files \n') if spp_folder == '': spp_folder = 'c:/working/MDE/Base_Data/spp/ASCII/mamm'#['c:/working/MDE/Base_Data/spp/ASCII/amph','c:/working/MDE/Base_Data/spp/ASCII/bird','c:/working/MDE/Base_Data/spp/ASCII/mamm','c:/working/MDE/Base_Data/spp/ASCII/tree',] #sys.exit('you must enter the species folder') #read in a theoretical spp print '\n\ntheoretical species list take a long time to create. If you have \npreviously created on and want to use it, enter a location of the file' theor_spp_file = raw_input('however to create the species list, hit return without entering anything. \n') if theor_spp_file == '': create_theor_spp = True else: create_theor_spp = False #set some constants num_theor_spp = 1000000 reps = 100 def main(): #setup the start time and log for each taxa curtime = str(time.strftime('%X %x')) #get the current time print '\n','process started at',curtime,'\n' t1 = time.clock() start_log() #start the log file write_log('\nprocess started at' + str(curtime) + '\n') #add the starting time to the log file #read the DEM fin = open(dem_file,'r') # open the file header,data = read_asc(fin) #read in the header adn data from the asc file # data variable is -- #0-alt,1-tcount,2-lon,3-lat,4-area,5-xband,6-yband,7-zband del fin #free up memory as is no longer needed #clean up the DEM data by removing cells that have no neighbors remove_no_neighbors(data,header) #extract specific data and store it in xyz [id_num,x,y,z,area,lat,lon,yband,xband,zband] xyz = process_data(data,header) xband = [] #will store xband information [band number, max x, area] yband = [] #will store xband information [band number, max y, area] zband = [] #will store xband information [band number, max z, area] create_bands(xyz,xband,yband,zband) # create the x y and z bands #get a dictionary of the area for each comination of bands x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data = setup_data_storage(xyz) write_xyz_data(xyz) #write out the xyz data del xyz #free up memory as basic data is no longer needed write_band_info(xband,yband,zband)#output the banding information apply_bands(data,xband,yband,zband,header) #apply the banding information to the basic DEM data del xband del yband del zband #write out the band ascii files write_asc(header,data,'x_band.asc',5) write_asc(header,data,'y_band.asc',6) write_asc(header,data,'z_band.asc',7) #start processing the species files spp_info = process_spp(data,header,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data) #process the spp asc files to get the spp specific info write_spp_info(spp_info) #write out the spp info write_richness_maps(data,header,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data,'empirical_richness','empirical') #write out the empirical richness maps #need to either read in the theoretical species list or create it if create_theor_spp == True: #now create theoretical spp theor_spp = create_theoretical_spp(data,header,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data) #create theoretical spp distributions write_theor_spp_info(theor_spp,'All_theor_spp.csv') else: #read in the theoretical species theor_spp = read_in_theor_spp() #now sort the list and continue processing the species theor_spp.sort(key=operator.itemgetter('xyz_area')) #sort the distributions based on xyz area theor_spp_list = create_theoretical_spp_subsets(theor_spp,spp_info)#get a list of spp for each rep of the model run MDE_spp = process_theor_spp(data,header,theor_spp,theor_spp_list,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data) write_theor_spp_info(MDE_spp,'MDE_theor_spp.csv') write_richness_maps(data,header,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data,'mean_theor_richness','MDE_mean') #write out the mean richness maps from models write_data_for_analysis(x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data) #finishing touches convert_asc_ESRI() curtime = str(time.strftime('%X %x')) elapsedtime = (time.clock() - t1) / 60 print '\n','completed task at',curtime,'- some',elapsedtime,'minutes later\n' write_log('\ncompleted task at ' + str(curtime) + ' - some ' + str(elapsedtime) + ' minutes later\n') def start_log(): fout = open(savefolder + '/README.TXT','w') tstr = 'MDE.py Script\nCreated by Jeremy VanDerWal (jjvanderwal@gmail.com) revised 16/07/2007\n' tstr += '\nCreates 3D MDE predictions based on the range size frequency distribution of an empirical group of species...\n' tstr += '\nExplanation of output files:' tstr += '\n1. ascii folder ? outputs / visualizations of data in Ascii grid format' tstr += '\n\ta. x_band, y_band, z_band ? maps of banding patterns in each single dimension' tstr += '\n\tb. empirical_* - maps of empirical species richness for each of the seven models' tstr += '\n\tc. MDE_mean_* - maps of mean MDE predicted species richness for each of the seven models' tstr += '\n\td. MDE_se_* - maps of se associated with MDE predicted species richness for each of the seven models' tstr += '\n2. Esri_grid folder ? same as ascii folder but only in esri grid format for visualization in ArcGIS' tstr += '\n3. xyz_data.csv ? basic information about the domain such that for each unique location, area of grid cell (modified by elevation of neighboring cells) and banding information is given' tstr += '\n4. xband.csv, yband.csv,zband.csv ? gives for each of x,y and z, the band number, area of the associated band and maximum value associated with the band (e.g., max longitude for the longitudinal band)' tstr += '\n5. spp_info.csv ? information about the empirical spp? information includes' tstr += '\n\ta. spp name' tstr += '\n\tb. extents of range (xband_max, xband_min, yband_max, yband_min, zband_max, zband_min)' tstr += '\n\tc. area based on empirical range (empirical_area) and based on extents (x_area, y_area, z_area, xy_area, xz_area, yz_area, xyz_area)' tstr += '\n6. theor_spp_info.csv ? same as empirical spp_info but for theoretical spp' tstr += '\n7. *_model.csv - stored information which includes, for each model, band info, area of the band, empirical richness, mean and SE of the MDE model predicted richness' tstr += '\n8. *.gal ? neighborhood files to be used in R for Spatial Autoregressive Models. One for each of the 7 MDE models created.' tstr += '\n\nNOTE: * represents a wildcard here...' fout.write(tstr + '\n') del fout def write_log(tstr): fout = open(savefolder + '/README.TXT','a') fout.write(tstr + '\n') del fout def read_asc(fin): #returns the 6 line header as a dict header = {} line = fin.readline().split() header['ncol'] = int(line[1]) line = fin.readline().split() header['nrow'] = int(line[1]) line = fin.readline().split() header['xllcorner'] = float(line[1]) line = fin.readline().split() header['yllcorner'] = float(line[1]) line = fin.readline().split() header['cellsize'] = float(line[1]) line = fin.readline().split() header['NODATA_value'] = int(line[1]) #read in the data as an vector of vectors data = [] for row in range(header['nrow']): tt = fin.readline().split() ttt = [] for i in range(len(tt)): try: ttt.append([int(tt[i])]) except: print tt[i] ttt.append([0]) data.append(ttt) return(header,data) def remove_no_neighbors(data,header): #if a value as no neighbors, remove it from the possible locations for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: #if it is not a nodata value t = 0 try: if data[i-1][j][0] != header['NODATA_value']: t = 1 except: pass try: if data[i+1][j][0] != header['NODATA_value']: t = 1 except: pass try: if data[i][j-1][0] != header['NODATA_value']: t = 1 except: pass try: if data[i][j+1][0] != header['NODATA_value']: t = 1 except: pass try: if data[i-1][j+1][0] != header['NODATA_value']: t = 1 except: pass try: if data[i-1][j-1][0] != header['NODATA_value']: t = 1 except: pass try: if data[i+1][j-1][0] != header['NODATA_value']: t = 1 except: pass try: if data[i+1][j+1][0] != header['NODATA_value']: t = 1 except: pass if t == 0: data[i][j][0] = header['NODATA_value'] def process_data(data,header): print '\nProcessing the DEM data' tcount = 0 xyz = [] #cycle through each cell... for i in range(len(data)): if i % 50 == 0: print '.', for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: y = (header['cellsize'] * 0.5) + header['yllcorner'] + ((header['nrow'] - 1 -i) * header['cellsize']) x = (header['cellsize'] * 0.5) + header['xllcorner'] + (j * header['cellsize']) z = int(data[i][j][0]) #caculate area #area estimated based onJenness, J.S. 2004. Wildlife Society Bulletin 32:829-839 #the algorithm uses elevation of the surrounding 8 cells to extimate the cell of interests area #t0 - elevation at center #t1 - u, t2 - ur, t3 - r, t4 - lr, t5 - l, t6 - ll, t7 - l, t8 - ul t0 = z #cell of interest elevation try: t1 = data[i-1][j][0] #get the value immediately to the north (up) of the cell of interest if t1 == header['NODATA_value']: t1 = t0 #if the value is nodata, then set it equal to the cell of interests value except: t1 = t0 #if cannot get value of upper cell (e.g., top of map), set it equalt to the cell of interest value try: t2 = data[i-1][j+1][0] if t2 == header['NODATA_value']: t2 = t0 except: t2 = t0 try: t3 = data[i][j+1][0] if t3 == header['NODATA_value']: t3 = t0 except: t3 = t0 try: t4 = data[i+1][j+1][0] if t4 == header['NODATA_value']: t4 = t0 except: t4 = t0 try: t5 = data[i+1][j][0] if t5 == header['NODATA_value']: t5 = t0 except: t5 = t0 try: t6 = data[i+1][j-1][0] if t6 == header['NODATA_value']: t6 = t0 except: t6 = t0 try: t7 = data[i][j-1][0] if t7 == header['NODATA_value']: t7 = t0 except: t7 = t0 try: t8 = data[i-1][j-1][0] if t8 == header['NODATA_value']: t8 = t0 except: t8 = t0 tlength = header['cellsize'] #estimate areas based on elevation a = 0.5 * math.sqrt(tlength**2 + (t0-t1)**2) b = 0.5 * math.sqrt(tlength**2 + (t1-t2)**2) c = 0.5 * math.sqrt(tlength**2 + (t2-t0)**2) s = (a+b+c)/2 area = math.sqrt(s*(s-a)*(s-b)*(s-c)) a = 0.5 * math.sqrt(tlength**2 + (t0-t2)**2) b = 0.5 * math.sqrt(tlength**2 + (t2-t3)**2) c = 0.5 * math.sqrt(tlength**2 + (t3-t0)**2) s = (a+b+c)/2 area += math.sqrt(s*(s-a)*(s-b)*(s-c)) a = 0.5 * math.sqrt(tlength**2 + (t0-t3)**2) b = 0.5 * math.sqrt(tlength**2 + (t3-t4)**2) c = 0.5 * math.sqrt(tlength**2 + (t4-t0)**2) s = (a+b+c)/2 area += math.sqrt(s*(s-a)*(s-b)*(s-c)) a = 0.5 * math.sqrt(tlength**2 + (t0-t4)**2) b = 0.5 * math.sqrt(tlength**2 + (t4-t5)**2) c = 0.5 * math.sqrt(tlength**2 + (t5-t0)**2) s = (a+b+c)/2 area += math.sqrt(s*(s-a)*(s-b)*(s-c)) a = 0.5 * math.sqrt(tlength**2 + (t0-t5)**2) b = 0.5 * math.sqrt(tlength**2 + (t5-t6)**2) c = 0.5 * math.sqrt(tlength**2 + (t6-t0)**2) s = (a+b+c)/2 area += math.sqrt(s*(s-a)*(s-b)*(s-c)) a = 0.5 * math.sqrt(tlength**2 + (t0-t6)**2) b = 0.5 * math.sqrt(tlength**2 + (t6-t7)**2) c = 0.5 * math.sqrt(tlength**2 + (t7-t0)**2) s = (a+b+c)/2 area += math.sqrt(s*(s-a)*(s-b)*(s-c)) a = 0.5 * math.sqrt(tlength**2 + (t0-t7)**2) b = 0.5 * math.sqrt(tlength**2 + (t7-t8)**2) c = 0.5 * math.sqrt(tlength**2 + (t8-t0)**2) s = (a+b+c)/2 area += math.sqrt(s*(s-a)*(s-b)*(s-c)) a = 0.5 * math.sqrt(tlength**2 + (t0-t8)**2) b = 0.5 * math.sqrt(tlength**2 + (t8-t1)**2) c = 0.5 * math.sqrt(tlength**2 + (t1-t0)**2) s = (a+b+c)/2 area += math.sqrt(s*(s-a)*(s-b)*(s-c)) #work out lat and long for the cells location (lat,lon) = Albers_to_latlong(x,y) #if j % 1000 == 0: print x,' - ',y,' -- ',lat,' - ',lon #store the data data[i][j].append(tcount) data[i][j].append(lon) data[i][j].append(lat) data[i][j].append(area) xyz.append([tcount,i,j,z,area,lat,lon]) tcount = tcount + 1 print '\n' return(xyz) def Albers_to_latlong(x,y): #default basics for NA Albers equal area projection E = x #easting N = y #northing Ef = 0 #false easting Nf = 0 #false northing cm = math.radians(-96) #central meridian sp0 = math.radians(40) #latitude of origen sp1 = math.radians(20) #standard parallel 1 sp2 = math.radians(60) #standard parallel 2 #WGS84 ellipsoid info a = 6378137 # radius at equator f = 1 / 298.257223563 # flattening (1/(1/f)) e_2 = ((2 * f) - (f ** 2)) e_1 = math.sqrt(e_2) # known as e #start the calculations m2 = math.cos(sp2) / ((1 - (e_2 * math.sin(sp2)**2))**0.5) m1 = math.cos(sp1) / ((1 - (e_2 * math.sin(sp1)**2))**0.5) q2 = (1 - e_2) * ((math.sin(sp2) / (1 - (e_2 * (math.sin(sp2)**2)))) - ((1 / (2 * e_1)) * math.log((1 - (e_1 * math.sin(sp2))) / (1 + (e_1 * math.sin(sp2)))))) q1 = (1 - e_2) * ((math.sin(sp1) / (1 - (e_2 * (math.sin(sp1)**2)))) - ((1 / (2 * e_1)) * math.log((1 - (e_1 * math.sin(sp1))) / (1 + (e_1 * math.sin(sp1)))))) q0 = (1 - e_2) * ((math.sin(sp0) / (1 - (e_2 * (math.sin(sp0)**2)))) - ((1 / (2 * e_1)) * math.log((1 - (e_1 * math.sin(sp0))) / (1 + (e_1 * math.sin(sp0)))))) n = (m1**2 - m2**2) / (q2 - q1) C = m1**2 + (n * q1) p0 = (a * (C - (n * q0))**0.5) / n theta = math.atan((E-Ef) / (p0 - (N-Nf))) p = ((E - Ef)**2 + (p0 - (N-Nf))**2)**0.5 q = (C - ((p**2 * n**2) / a**2)) / n # q is also known as alpha prime lon = math.degrees(cm + theta / n) lat2 = sp0 lat1 = 1-sp0 while abs(lat1 - lat2) > 0.0000001: lat1 = lat2 tt1 = ((1 - (e_2 * (math.sin(lat1)**2)))**2) / (2 * math.cos(lat1)) tt2 = q / (1 - e_2) tt3 = math.sin(lat1) / (1 - (e_2 * (math.sin(lat1)**2))) tt4 = 1 / (2 * e_1) tt5 = math.log((1 - (e_1 * math.sin(lat1))) / (1 + (e_1 * math.sin(lat1)))) lat2 = lat1 + (tt1 * (tt2 - tt3 + (tt4 * tt5))) lat = math.degrees(lat2) return(lat,lon) def create_bands(xyz,xband,yband,zband): print '\nProcessing banding patterns' #now create banding patterns total_area = 0 for i in xyz: total_area += i[4] band_area = total_area / 50 #get some 50 equal area bands... #calculate lat band (y band information) xyz.sort(key=operator.itemgetter(5)) #sort the xyz data by latitude tarea1 = 0 tarea2 = 0 band = 0 for i in range(len(xyz)): #cycle through the latitudes working out bands if i % 10000 == 0: print '.', tarea1 = tarea2 tarea2 += xyz[i][4] if tarea1 < band_area and tarea2 > band_area: #add latitudes to a specific band until the target area is reached if abs(tarea1 - band_area) > abs(tarea2 - band_area): yband.append([band,xyz[i][5],tarea2]) xyz[i].append(band) band += 1 tarea2 = 0 else: yband.append([band,xyz[i-1][5],tarea1]) band += 1 xyz[i].append(band) tarea2 = xyz[i][4] else: xyz[i].append(band) if i == len(xyz) - 1: yband.append([band,xyz[i][5],tarea2]) #calculate lon band (xband) xyz.sort(key=operator.itemgetter(6)) #repeat of latitude but with longitude tarea1 = 0 tarea2 = 0 band = 0 for i in range(len(xyz)): if i % 10000 == 0: print '.', tarea1 = tarea2 tarea2 += xyz[i][4] if tarea1 < band_area and tarea2 > band_area: if abs(tarea1 - band_area) > abs(tarea2 - band_area): xband.append([band,xyz[i][6],tarea2]) xyz[i].append(band) band += 1 tarea2 =0 else: xband.append([band,xyz[i-1][6],tarea1]) band += 1 xyz[i].append(band) tarea2 = xyz[i][4] else: xyz[i].append(band) if i == len(xyz) - 1: xband.append([band,xyz[i][6],tarea2]) #calculate elevational band (zband) xyz.sort(key=operator.itemgetter(3)) talt = [] # get all the possible altitudes alt_max = xyz[len(xyz)-1][3] for i in range(alt_max + 1): talt.append([0]) #get the area of each elevational band for i in xyz: talt[i[3]][0] += i[4] #cycle through and assign band to each elevation tarea1 = 0 tarea2 = 0 band = 0 for i in range(len(talt)): if i % 10000 == 0: print '.', tarea1 = tarea2 tarea2 += talt[i][0] if tarea1 < band_area and tarea2 > band_area: if abs(tarea1 - band_area) > abs(tarea2 - band_area): talt[i].append(band) band += 1 tarea2 = 0 else: band += 1 talt[i].append(band) tarea2 = talt[i][0] else: talt[i].append(band) #now go through and get the max of each band and append it to the bands list tband = 0 tarea = 0 for i in range(len(talt)): if i % 10000 == 0: print '.', if talt[i][1] == tband + 1: zband.append([tband,i-1,tarea]) tarea = 0 tband += 1 tarea += talt[i][0] if i == len(talt) - 1 : zband.append([tband,i,tarea]) #apply this zband info to the xyz data for i in range(len(xyz)): if i % 10000 == 0: print '.', tt = 0 for j in range(len(zband)): if j == 0: if xyz[i][3] <= zband[j][1]: xyz[i].append(zband[j][0]) tt = 1 if j != len(zband) - 1 and tt == 0: if xyz[i][3] > zband[j][1] and xyz[i][3] <= zband[j+1][1]: xyz[i].append(zband[j+1][0]) tt = 1 else: if tt == 0: xyz[i].append(zband[j][0]) print '\n' def setup_data_storage(xyz): #xyz setup is id_num,x,y,z,area,lat,lon,yband,xband,zband x_data = {} y_data = {} z_data = {} xy_data = {} xz_data = {} yz_data = {} xyz_data = {} #get area for all unique values of x,y and z in all combinations for row in xyz: try: x_data[(row[8])]['area'] += row[4] except: x_data[(row[8])] = {} x_data[(row[8])]['area'] = row[4] x_data[(row[8])]['empirical_richness'] = 0.0 x_data[(row[8])]['mean_theor_richness'] = 0.0 try: y_data[(row[7])]['area'] += row[4] except: y_data[(row[7])] = {} y_data[(row[7])]['area'] = row[4] y_data[(row[7])]['empirical_richness'] = 0.0 y_data[(row[7])]['mean_theor_richness'] = 0.0 try: z_data[(row[9])]['area'] += row[4] except: z_data[(row[9])] = {} z_data[(row[9])]['area'] = row[4] z_data[(row[9])]['empirical_richness'] = 0.0 z_data[(row[9])]['mean_theor_richness'] = 0.0 try: xy_data[(row[8],row[7])]['area'] += row[4] except: xy_data[(row[8],row[7])] = {} xy_data[(row[8],row[7])]['area'] = row[4] xy_data[(row[8],row[7])]['empirical_richness'] = 0.0 xy_data[(row[8],row[7])]['mean_theor_richness'] = 0.0 try: xz_data[(row[8],row[9])]['area'] += row[4] except: xz_data[(row[8],row[9])] = {} xz_data[(row[8],row[9])]['area'] = row[4] xz_data[(row[8],row[9])]['empirical_richness'] = 0.0 xz_data[(row[8],row[9])]['mean_theor_richness'] = 0.0 try: yz_data[(row[7],row[9])]['area'] += row[4] except: yz_data[(row[7],row[9])] = {} yz_data[(row[7],row[9])]['area'] = row[4] yz_data[(row[7],row[9])]['empirical_richness'] = 0.0 yz_data[(row[7],row[9])]['mean_theor_richness'] = 0.0 try: xyz_data[(row[8],row[7],row[9])]['area'] += row[4] except: xyz_data[(row[8],row[7],row[9])] = {} xyz_data[(row[8],row[7],row[9])]['area'] = row[4] xyz_data[(row[8],row[7],row[9])]['empirical_richness'] = 0.0 xyz_data[(row[8],row[7],row[9])]['mean_theor_richness'] = 0.0 return(x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data) def write_asc(header,data,filename,list_num): #create the folder for ascii files try: os.mkdir(savefolder + '/ascii') except: pass print '\n' + 'writing file ' + filename toutfile = savefolder + '/ascii' + '/' + filename #open the file outfile = file(toutfile,'w') theader = '' #write out the header information theader = theader + "ncols " + str(header['ncol']) + "\n" theader = theader + "nrows " + str(header['nrow']) + "\n" theader = theader + "xllcorner " + str(header['xllcorner']) + "\n" theader = theader + "yllcorner " + str(header['yllcorner']) + "\n" theader = theader + "cellsize " + str(header['cellsize']) + "\n" theader = theader + "NODATA_value " + str(-9) + "\n" outfile.write(theader) #write out the data for row in range(len(data)): if row % 50 == 0: print '.', tstr = '' for col in range(len(data[row])): if data[row][col][0] != header['NODATA_value']: tstr += ' ' + str(data[row][col][list_num]) else: tstr += ' ' + str(-9) outfile.write(tstr + '\n') #close the file del outfile print '\n' def write_band_info(xband,yband,zband): #output the xband print '\n' + 'writing file xband.csv' toutfile = savefolder + '/xband.csv' #open the file outfile = file(toutfile,'w') theader = 'band_num,max_long,band_area\n' outfile.write(theader) for band in xband: outfile.write(str(band[0]) + ',' + str(band[1]) + ',' + str(int(band[2]/1000000)) + '\n') del outfile #output the yband print '\n' + 'writing file yband.csv' toutfile = savefolder + '/yband.csv' #open the file outfile = file(toutfile,'w') theader = 'band_num,max_lat,band_area\n' outfile.write(theader) for band in yband: outfile.write(str(band[0]) + ',' + str(band[1]) + ',' + str(int(band[2]/1000000)) + '\n') del outfile #output the zband print '\n' + 'writing file zband.csv' toutfile = savefolder + '/zband.csv' #open the file outfile = file(toutfile,'w') theader = 'band_num,max_elevation,band_area\n' outfile.write(theader) for band in zband: outfile.write(str(band[0]) + ',' + str(band[1]) + ',' + str(int(band[2]/1000000)) + '\n') del outfile print '\n' def apply_bands(data,xband,yband,zband,header): print '\nApplying band info to base dem' num_xband = len(xband) num_yband = len(yband) num_zband = len(zband) for i in range(len(data)): if i % 50 == 0: print '.', for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: #if not nodata #assign band info x = data[i][j][2] y = data[i][j][3] z = data[i][j][0] #xband for k in range(num_xband): if k != 0: if x <= xband[k][1] and x > xband[k - 1][1]: data[i][j].append(k) break else: if x <= xband[0][1]: data[i][j].append(k) #yband for k in range(num_yband): if k != 0: if y <= yband[k][1] and y > yband[k - 1][1]: data[i][j].append(k) break else: if y <= yband[0][1]: data[i][j].append(k) #zband for k in range(num_zband): if k != 0: if z <= zband[k][1] and z > zband[k - 1][1]: data[i][j].append(k) break else: if z <= zband[0][1]: data[i][j].append(k) print '\n' def process_spp(data,header,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data): print '\nProcessing the species files -- patience is a virtue here...' #create the folder for excluding spp try: os.mkdir(spp_folder + '/excluded') except: pass #get the spp list filelist = os.listdir(spp_folder) #setup spp_data ...list will hold spp_info =[] tcount = 0 print 'working on spp ' #start cycling through spp for tfile in filelist: if '.asc' in tfile: if tcount % 10 == 0: print str(tcount),'...', ## print ' ' + tfile spp_fin = open(spp_folder + '/' + tfile,'r') # open the file spp_header,spp_data = read_asc(spp_fin) #read in the header/data del spp_fin #free up memory as is no longer needed #create a temp_data variable same size as dem a = [] for i in range(len(data)): tmp_var = [] for j in range(len(data[i])): tmp_var.append([0]) a.append(tmp_var) del tmp_var #cycle through the spp_data to find out if at least 80% of spp range in domain num_in = 0 num_out = 0 for i in range(len(spp_data)): for j in range(len(spp_data[i])): if spp_data[i][j][0] == 1: #get the UTM coordinates y = (spp_header['cellsize'] * 0.5) + spp_header['yllcorner'] + ((spp_header['nrow'] - 1 -i) * spp_header['cellsize']) x = (spp_header['cellsize'] * 0.5) + spp_header['xllcorner'] + (j * spp_header['cellsize']) #check if these are within the DEM extents if y >= header['yllcorner'] and x >= header['xllcorner']: #check if point is within the domain tx = int((x - (header['cellsize'] * 0.5) - header['xllcorner']) / header['cellsize']) ty = int(header['nrow'] - 1 - ((y - (header['cellsize'] * 0.5) - header['yllcorner']) / header['cellsize'])) try: if data[ty][tx][0] != header['NODATA_value']: num_in += 1 a[i][j][0] = 1 #append long, lat, alt (x,y,z) bands spp_data[i][j].append(data[ty][tx][5]) spp_data[i][j].append(data[ty][tx][6]) spp_data[i][j].append(data[ty][tx][7]) else: num_out += 1 except: num_out += 1 else: num_out += 1 #if less than 80% of range in domain.. move the file to the excluded folder try: tnum = float(num_in) / float((num_in + num_out)) except: tnum = 0 if tnum <= 0.80: shutil.copyfile(spp_folder + '/' + tfile,spp_folder + '/excluded/' + tfile) os.remove(spp_folder + '/' + tfile) #if greater than 80% of range in domain... continue processing else: spp_info.append({}) spp_info[tcount]['spp'] = tfile.replace('.asc','') spp_info[tcount]['xband_max'] = 0 spp_info[tcount]['xband_min'] = 199 spp_info[tcount]['yband_max'] = 0 spp_info[tcount]['yband_min'] = 199 spp_info[tcount]['zband_max'] = 0 spp_info[tcount]['zband_min'] = 199 spp_info[tcount]['empirical_area'] = 0 spp_info[tcount]['p_domain'] = tnum #now cycle through to get band information for i in range(len(spp_data)): for j in range(len(spp_data[i])): if spp_data[i][j][0] == 1: #get the UTM coordinates y = (spp_header['cellsize'] * 0.5) + spp_header['yllcorner'] + ((spp_header['nrow'] - 1 -i) * spp_header['cellsize']) x = (spp_header['cellsize'] * 0.5) + spp_header['xllcorner'] + (j * spp_header['cellsize']) #check if these are within the DEM extents if y >= header['yllcorner'] and x >= header['xllcorner']: #check if point is within the domain tx = int((x - (header['cellsize'] * 0.5) - header['xllcorner']) / header['cellsize']) ty = int(header['nrow'] - 1 - ((y - (header['cellsize'] * 0.5) - header['yllcorner']) / header['cellsize'])) try: if data[ty][tx][0] != header['NODATA_value']: #get the band extent info xband = spp_data[i][j][1] yband = spp_data[i][j][2] zband = spp_data[i][j][3] if xband >= spp_info[tcount]['xband_max']: spp_info[tcount]['xband_max'] = xband if xband <= spp_info[tcount]['xband_min']: spp_info[tcount]['xband_min'] = xband if yband >= spp_info[tcount]['yband_max']: spp_info[tcount]['yband_max'] = yband if yband <= spp_info[tcount]['yband_min']: spp_info[tcount]['yband_min'] = yband if zband >= spp_info[tcount]['zband_max']: spp_info[tcount]['zband_max'] = zband if zband <= spp_info[tcount]['zband_min']: spp_info[tcount]['zband_min'] = zband #add on empirical area spp_info[tcount]['empirical_area'] += data[ty][tx][4] except: pass #get the number of patches and average area of the empirical areas #first need neighbor list of the empirical data nb = {} for i in range(len(a)): for j in range(len(a[i])): if a[i][j][0] == 1: nb[(i,j)] = [] try: if a[i-1][j][0] == 1: nb[(i,j)].append((i-1,j)) except: pass try: if a[i+1][j][0] == 1: nb[(i,j)].append((i+1,j)) except: pass try: if a[i][j-1][0] == 1: nb[(i,j)].append((i,j-1)) except: pass try: if a[i][j+1][0] == 1: nb[(i,j)].append((i,j+1)) except: pass try: if a[i-1][j-1][0] == 1: nb[(i,j)].append((i-1,j-1)) except: pass try: if a[i-1][j+1][0] == 1: nb[(i,j)].append((i-1,j+1)) except: pass try: if a[i+1][j-1][0] == 1: nb[(i,j)].append((i+1,j-1)) except: pass try: if a[i+1][j+1][0] == 1: nb[(i,j)].append((i+1,j+1)) except: pass spp_info[tcount]['emp_mean_patch_size'],spp_info[tcount]['emp_patch_variance'],spp_info[tcount]['emp_num_patches'] = get_num_patches(nb) #now cycle through to get the band areas and empirical richness for x, y, z, xy, xz, yz, xyz spp_info[tcount]['x_area'] = 0 spp_info[tcount]['y_area'] = 0 spp_info[tcount]['z_area'] = 0 spp_info[tcount]['xy_area'] = 0 spp_info[tcount]['xz_area'] = 0 spp_info[tcount]['yz_area'] = 0 spp_info[tcount]['xyz_area'] = 0 for tx in range(spp_info[tcount]['xband_min'],spp_info[tcount]['xband_max']+1): try: spp_info[tcount]['x_area'] += x_data[(tx)]['area'] #add all the x band area info x_data[(tx)]['empirical_richness'] += 1 except: pass for ty in range(spp_info[tcount]['yband_min'],spp_info[tcount]['yband_max']+1): try: spp_info[tcount]['xy_area'] += xy_data[(tx,ty)]['area'] #add all the xyband info xy_data[(tx,ty)]['empirical_richness'] += 1 except: pass for tz in range(spp_info[tcount]['zband_min'],spp_info[tcount]['zband_max']+1): try: spp_info[tcount]['xyz_area'] += xyz_data[(tx,ty,tz)]['area'] #add all the xyz band info xyz_data[(tx,ty,tz)]['empirical_richness'] += 1 except: pass for tz in range(spp_info[tcount]['zband_min'],spp_info[tcount]['zband_max']+1): try: spp_info[tcount]['xz_area'] += xz_data[(tx,tz)]['area'] #add all the xzband info xz_data[(tx,tz)]['empirical_richness'] += 1 except: pass for ty in range(spp_info[tcount]['yband_min'],spp_info[tcount]['yband_max']+1): try: spp_info[tcount]['y_area'] += y_data[(ty)]['area'] #add all the yband info y_data[(ty)]['empirical_richness'] += 1 except: pass for tz in range(spp_info[tcount]['zband_min'],spp_info[tcount]['zband_max']+1): try: spp_info[tcount]['yz_area'] += yz_data[(ty,tz)]['area'] #add all the yz band info yz_data[(ty,tz)]['empirical_richness'] += 1 except: pass for tz in range(spp_info[tcount]['zband_min'],spp_info[tcount]['zband_max']+1): try: spp_info[tcount]['z_area'] += z_data[(tz)]['area'] #add all the z band info z_data[(tz)]['empirical_richness'] += 1 except: pass #get the number of patches and average area for the area defined by extents #first need neighbor list of the data for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: if data[i][j][5] >= spp_info[tcount]['xband_min'] and data[i][j][5] <= spp_info[tcount]['xband_max']: if data[i][j][6] >= spp_info[tcount]['yband_min'] and data[i][j][6] <= spp_info[tcount]['yband_max']: if data[i][j][7] >= spp_info[tcount]['zband_min'] and data[i][j][7] <= spp_info[tcount]['zband_max']: a[i][j][0] = 1 nb = {} for i in range(len(a)): for j in range(len(a[i])): if a[i][j][0] == 1: nb[(i,j)] = [] try: if a[i-1][j][0] == 1: nb[(i,j)].append((i-1,j)) except: pass try: if a[i+1][j][0] == 1: nb[(i,j)].append((i+1,j)) except: pass try: if a[i][j-1][0] == 1: nb[(i,j)].append((i,j-1)) except: pass try: if a[i][j+1][0] == 1: nb[(i,j)].append((i,j+1)) except: pass try: if a[i-1][j-1][0] == 1: nb[(i,j)].append((i-1,j-1)) except: pass try: if a[i-1][j+1][0] == 1: nb[(i,j)].append((i-1,j+1)) except: pass try: if a[i+1][j-1][0] == 1: nb[(i,j)].append((i+1,j-1)) except: pass try: if a[i+1][j+1][0] == 1: nb[(i,j)].append((i+1,j+1)) except: pass spp_info[tcount]['mean_patch_size'],spp_info[tcount]['patch_variance'],spp_info[tcount]['num_patches'] = get_num_patches(nb) tcount += 1 #standardize the empirical richness to a proportion of the species info n = len(spp_info) for key in x_data.keys(): x_data[key]['empirical_richness'] /= n for key in y_data.keys(): y_data[key]['empirical_richness'] /= n for key in z_data.keys(): z_data[key]['empirical_richness'] /= n for key in xy_data.keys(): xy_data[key]['empirical_richness'] /= n for key in xz_data.keys(): xz_data[key]['empirical_richness'] /= n for key in yz_data.keys(): yz_data[key]['empirical_richness'] /= n for key in xyz_data.keys(): xyz_data[key]['empirical_richness'] /= n print '\n' return(spp_info) def get_num_patches(nb): #supply and neighbors list and this will define the number of patches #get the values of the keys of the neighbors tkeys = nb.keys() #this is variable that will store patch information patches = [] used_keys = set([]) #cycle through each of the cells within the distribution(keys) for key in tkeys: #check if key has already been identified as part of a patch skip_key = False if key in used_keys: skip_key = True #if the key is not already part of a patch if skip_key == False: if len(nb[key])==0: #if the patch consist of a single cell... identify it as a single patch patches.append(set([key])) used_keys.update(set([key])) else: #if the patch is > a single cell.. identify all connected neighbors tpatch = set([]) #new patch set tpatch.add(key) #append the original key tpatch.update(set(nb[key])) tpatch2 = set([]) tpatch2.update(tpatch) new_added = True #default for neighbors of neighbors while new_added == True: #while new nieghbors are being added to the new patch vector new_added = False tnew = set([]) #hold keys of new neighbors to check for tp in tpatch2: #cycle through the new neighbors tnew.update(set(nb[tp])) tpatch2 = tnew.difference(tpatch) if len(tpatch2) > 0: new_added = True tpatch.update(tnew) patches.append(tpatch) used_keys.update(tpatch) #get the mean size, variance and n of the patches data4stats = [] for i in range(len(patches)): data4stats.append(len(patches[i])) tmean, tvar, n = stats(data4stats) return(tmean,tvar,n) def stats(data): #get the mean, variance and n of a dataset n = float(len(data)) sum = 0.0 for value in data: sum += value mean = sum/n sum = 0.0 for value in data: sum += (value - mean)**2 try: variance = sum/(n - 1) except: variance = 0 return (mean, variance,n) def write_spp_info(spp_info): print '\n' + 'writing file spp_info.csv' toutfile = savefolder + '/spp_info.csv' #open the file outfile = file(toutfile,'w') theader = 'spp,empirical_area,prop_domain,emp_mean_patch_size,emp_patch_variance,emp_num_patches,mean_patch_size,patch_variance,num_patches,xband_max,xband_min,yband_max,yband_min,zband_max,zband_min,x_area,y_area,z_area,xy_area,xz_area,yz_area,xyz_area\n' outfile.write(theader) for data in spp_info: tstr = data['spp'] + ',' tstr += str(int(data['empirical_area']/1000000)) + ',' tstr += str(data['p_domain']) + ',' tstr += str(data['emp_mean_patch_size']) + ',' tstr += str(data['emp_patch_variance']) + ',' tstr += str(data['emp_num_patches']) + ',' tstr += str(data['mean_patch_size']) + ',' tstr += str(data['patch_variance']) + ',' tstr += str(data['num_patches']) + ',' tstr += str(data['xband_max']) + ',' tstr += str(data['xband_min']) + ',' tstr += str(data['yband_max']) + ',' tstr += str(data['yband_min']) + ',' tstr += str(data['zband_max']) + ',' tstr += str(data['zband_min']) + ',' tstr += str(int(data['x_area']/1000000)) + ',' tstr += str(int(data['y_area']/1000000)) + ',' tstr += str(int(data['z_area']/1000000)) + ',' tstr += str(int(data['xy_area']/1000000)) + ',' tstr += str(int(data['xz_area']/1000000)) + ',' tstr += str(int(data['yz_area']/1000000)) + ',' tstr += str(int(data['xyz_area']/1000000)) + '\n' outfile.write(tstr) del outfile print '\n' def write_xyz_data(xyz): #write out basic xyz data print '\nWriting basic xyz data tp xyz_data.csv' toutfile = savefolder + '/xyz_data.csv' outfile = file(toutfile,'w') theader = 'tcount,i,j,DEM,area,lat,lon,yband,xband,zband\n' outfile.write(theader) for info in xyz: tstr = '' for i in range(len(info)): if i != len(info)-1: tstr += str(info[i]) + ',' else: tstr += str(info[i]) + '\n' outfile.write(tstr) del outfile print '\n' def write_richness_maps(data,header,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data,tstr,filename): #check to see if there is an eighth variable on data... if so set it to 0 ... if not append a 0 for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: try: data[i][j][8] = 0 except: data[i][j].append(0) #process the x model for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: data[i][j][8] = round(x_data[(data[i][j][5])][tstr],3) write_asc(header,data,filename + '_x.asc',8) #process the y model for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: data[i][j][8] = round(y_data[(data[i][j][6])][tstr],3) write_asc(header,data,filename + '_y.asc',8) #process the z model for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: data[i][j][8] = round(z_data[(data[i][j][7])][tstr],3) write_asc(header,data,filename + '_z.asc',8) #process the xy model for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: data[i][j][8] = round(xy_data[(data[i][j][5],data[i][j][6])][tstr],3) write_asc(header,data,filename + '_xy.asc',8) #process the xz model for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: data[i][j][8] = round(xz_data[(data[i][j][5],data[i][j][7])][tstr],3) write_asc(header,data,filename + '_xz.asc',8) #process the yz model for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: data[i][j][8] = round(yz_data[(data[i][j][6],data[i][j][7])][tstr],3) write_asc(header,data,filename + '_yz.asc',8) #process the xyz model for i in range(len(data)): for j in range(len(data[i])): if data[i][j][0] != header['NODATA_value']: data[i][j][8] = round(xyz_data[(data[i][j][5],data[i][j][6],data[i][j][7])][tstr],3) write_asc(header,data,filename + '_xyz.asc',8) print '\n' def create_theoretical_spp(data,header,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data): print '\nCreating theoretical spp -- patience is a virtue here' valid_ranges = set(xyz_data.keys()) theor_spp = [] tcount = 0 while len(theor_spp) <= num_theor_spp: if tcount % 1000 == 0: print str(tcount),'...', tspp = {} Valid_range = False while Valid_range == False: #create a theoretical spp limits t1 = randint(0,len(x_data)-1) t2 = randint(0,len(x_data)-1) if t1 >= t2: xmax = t1 xmin = t2 else: xmax = t1 xmin = t2 t1 = randint(0,len(y_data)-1) t2 = randint(0,len(y_data)-1) if t1 >= t2: ymax = t1 ymin = t2 else: ymax = t1 ymin = t2 t1 = randint(0,len(z_data)-1) t2 = randint(0,len(z_data)-1) if t1 >= t2: zmax = t1 zmin = t2 else: zmax = t1 zmin = t2 #check if valid range for i in range(xmin,xmax+1): for j in range(ymin,ymax+1): for k in range(zmin,zmax+1): if (i,j,k) in valid_ranges: Valid_range = True #now that we have a valid range tspp['xband_max'] = 0 tspp['xband_min'] = 199 tspp['yband_max'] = 0 tspp['yband_min'] = 199 tspp['zband_max'] = 0 tspp['zband_min'] = 199 tspp['x_area'] = 0 tspp['y_area'] = 0 tspp['z_area'] = 0 tspp['xy_area'] = 0 tspp['xz_area'] = 0 tspp['yz_area'] = 0 tspp['xyz_area'] = 0 #get the band max and mins if location exists for i in range(xmin,xmax+1): for j in range(ymin,ymax+1): for k in range(zmin,zmax+1): if (i,j,k) in valid_ranges: if tspp['xband_max'] <= i: tspp['xband_max'] = i if tspp['xband_min'] >= i: tspp['xband_min'] = i if tspp['yband_max'] <= j: tspp['yband_max'] = j if tspp['yband_min'] >= j: tspp['yband_min'] = j if tspp['zband_max'] <= k: tspp['zband_max'] = k if tspp['zband_min'] >= k: tspp['zband_min'] = k #get the areas associated with the new spp for i in range(tspp['xband_min'],tspp['xband_max']+1): try: tspp['x_area'] += x_data[(i)]['area'] except: pass for j in range(tspp['yband_min'],tspp['yband_max']+1): try: tspp['xy_area'] += xy_data[(i,j)]['area'] except: pass for k in range(tspp['zband_min'],tspp['zband_max']+1): try: tspp['xyz_area'] += xyz_data[(i,j,k)]['area'] except: pass for j in range(tspp['zband_min'],tspp['zband_max']+1): try: tspp['xz_area'] += xz_data[(i,j)]['area'] except: pass for i in range(tspp['yband_min'],tspp['yband_max']+1): try: tspp['y_area'] += y_data[(i)]['area'] except: pass for j in range(tspp['zband_min'],tspp['zband_max']+1): try: tspp['yz_area'] += yz_data[(i,j)]['area'] except: pass for i in range(tspp['zband_min'],tspp['zband_max']+1): try: tspp['z_area'] += z_data[(i)]['area'] except: pass #uncomment if you want to track the number of patches, etc of the theoretical species ## #get the number of patches and average area for the area defined by extents ## #create a temp_data variable same size as dem ## a = [] ## for i in range(len(data)): ## tmp_var = [] ## for j in range(len(data[i])): ## tmp_var.append([0]) ## a.append(tmp_var) ## del tmp_var ## #first need neighbor list of the data ## for i in range(len(data)): ## for j in range(len(data[i])): ## if data[i][j][0] != header['NODATA_value']: ## if data[i][j][5] >= tspp['xband_min'] and data[i][j][5] <= tspp['xband_max']: ## if data[i][j][6] >= tspp['yband_min'] and data[i][j][6] <= tspp['yband_max']: ## if data[i][j][7] >= tspp['zband_min'] and data[i][j][7] <= tspp['zband_max']: ## a[i][j][0] = 1 ## nb = {} ## for i in range(len(a)): ## for j in range(len(a[i])): ## if a[i][j][0] == 1: ## nb[(i,j)] = [] ## try: ## if a[i-1][j][0] == 1: nb[(i,j)].append((i-1,j)) ## except: pass ## try: ## if a[i+1][j][0] == 1: nb[(i,j)].append((i+1,j)) ## except: pass ## try: ## if a[i][j-1][0] == 1: nb[(i,j)].append((i,j-1)) ## except: pass ## try: ## if a[i][j+1][0] == 1: nb[(i,j)].append((i,j+1)) ## except: pass ## try: ## if a[i-1][j-1][0] == 1: nb[(i,j)].append((i-1,j-1)) ## except: pass ## try: ## if a[i-1][j+1][0] == 1: nb[(i,j)].append((i-1,j+1)) ## except: pass ## try: ## if a[i+1][j-1][0] == 1: nb[(i,j)].append((i+1,j-1)) ## except: pass ## try: ## if a[i+1][j+1][0] == 1: nb[(i,j)].append((i+1,j+1)) ## except: pass ## ## tspp['mean_patch_size'],tspp['patch_variance'],tspp['num_patches'] = get_num_patches(nb) ## ## if tspp['num_patches']<= 100: #add the species only if it has less than 100 patches ## theor_spp.append(tspp) ## tcount = tcount + 1 ## del tspp #comment out the next 6 lines if tracking number of patches, etc. tspp['mean_patch_size'] = -99 tspp['patch_variance'] = -99 tspp['num_patches'] = -99 theor_spp.append(tspp) tcount += 1 del tspp print '\n' return(theor_spp) def create_theoretical_spp_subsets(theor_spp,spp_info): print '\nCreating 100 spp subsets for models\n' theor_spp_list = [] print str(len(theor_spp)),'theor spp' print str(len(spp_info)),'emp spp' for spp in spp_info: if len(theor_spp_list) % 1000 == 0: print '.', trange = [] #uncomment if you want to use the number of patches for limiting the theoretical spp ## for i in range(len(theor_spp)): #first get list of theoretical species that is similar in area and number of patches ## if (0.90 * spp['xyz_area']) <= theor_spp[i]['xyz_area'] <= (1.10 * spp['xyz_area']): ## if theor_spp[i]['num_patches'] <= (spp['num_patches'] + 10): ## trange.append(i) if len(trange) <= 25: # if the n is low reduce the restrictions for i in range(len(theor_spp)): if (0.90 * spp['xyz_area']) <= theor_spp[i]['xyz_area'] <= (1.10 * spp['xyz_area']): trange.append(i) if len(trange) <= 25: # if n is still low, further reduce the restrictions for i in range(len(theor_spp)): if (0.80 * spp['xyz_area']) <= theor_spp[i]['xyz_area'] <= (1.20 * spp['xyz_area']): trange.append(i) if len(trange) <= 25: # if n is still low, further reduce the restrictions for i in range(len(theor_spp)): if (0.70 * spp['xyz_area']) <= theor_spp[i]['xyz_area'] <= (1.30 * spp['xyz_area']): trange.append(i) if len(trange) <= 25: # if n is still low, further reduce the restrictions for i in range(len(theor_spp)): if (0.60 * spp['xyz_area']) <= theor_spp[i]['xyz_area'] <= (1.40 * spp['xyz_area']): trange.append(i) if len(trange) <= 25: # if n is still low, further reduce the restrictions for i in range(len(theor_spp)): if (0.50 * spp['xyz_area']) <= theor_spp[i]['xyz_area'] <= (1.50 * spp['xyz_area']): trange.append(i) for i in range(reps): # now grab a random subset of these equal in number to the number of reps theor_spp_list.append(trange[randint(0,len(trange)-1)]) return(theor_spp_list) def process_theor_spp(data,header,theor_spp,theor_spp_list,x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data): #now cycle through the theoretical species and get the mean prediction print '\nprocessing the theoretical spp and getting theor richness\n' MDE_spp = [] for tval in theor_spp_list: tspp = theor_spp[tval] #get the band max and mins xmax = tspp['xband_max'] xmin = tspp['xband_min'] ymax = tspp['yband_max'] ymin = tspp['yband_min'] zmax = tspp['zband_max'] zmin = tspp['zband_min'] for tx in range(tspp['xband_min'],tspp['xband_max']+1): try: x_data[(tx)]['mean_theor_richness'] += 1 except: pass for ty in range(tspp['yband_min'],tspp['yband_max']+1): try: xy_data[(tx,ty)]['mean_theor_richness'] += 1 except: pass for tz in range(tspp['zband_min'],tspp['zband_max']+1): try: xyz_data[(tx,ty,tz)]['mean_theor_richness'] += 1 except: pass for tz in range(tspp['zband_min'],tspp['zband_max']+1): try: xz_data[(tx,tz)]['mean_theor_richness'] += 1 except: pass for ty in range(tspp['yband_min'],tspp['yband_max']+1): try: y_data[(ty)]['mean_theor_richness'] += 1 except: pass for tz in range(tspp['zband_min'],tspp['zband_max']+1): try: yz_data[(ty,tz)]['mean_theor_richness'] += 1 except: pass for tz in range(tspp['zband_min'],tspp['zband_max']+1): try: z_data[(tz)]['mean_theor_richness'] += 1 except: pass if len(MDE_spp) % 1000 == 0: print '.', #add the spp to the MDE species list MDE_spp.append(tspp) #standardize the mde mean richness to a proportion of the species info n = len(MDE_spp) for key in x_data.keys(): x_data[key]['mean_theor_richness'] /= n for key in y_data.keys(): y_data[key]['mean_theor_richness'] /= n for key in z_data.keys(): z_data[key]['mean_theor_richness'] /= n for key in xy_data.keys(): xy_data[key]['mean_theor_richness'] /= n for key in xz_data.keys(): xz_data[key]['mean_theor_richness'] /= n for key in yz_data.keys(): yz_data[key]['mean_theor_richness'] /= n for key in xyz_data.keys(): xyz_data[key]['mean_theor_richness'] /= n #return the mde spp list print '\n' return(MDE_spp) def write_theor_spp_info(spp_info,filename): print '\n' + 'writing file spp_info.csv' toutfile = savefolder + '/' + filename #open the file outfile = file(toutfile,'w') theader = 'spp,mean_patch_size,patch_variance,num_patches,xband_max,xband_min,yband_max,yband_min,zband_max,zband_min,x_area,y_area,z_area,xy_area,xz_area,yz_area,xyz_area\n' outfile.write(theader) tcount = 0 for data in spp_info: tstr = str(tcount) + ',' tstr += str(data['mean_patch_size']) + ',' tstr += str(data['patch_variance']) + ',' tstr += str(data['num_patches']) + ',' tstr += str(data['xband_max']) + ',' tstr += str(data['xband_min']) + ',' tstr += str(data['yband_max']) + ',' tstr += str(data['yband_min']) + ',' tstr += str(data['zband_max']) + ',' tstr += str(data['zband_min']) + ',' tstr += str(int(data['x_area']/1000000)) + ',' tstr += str(int(data['y_area']/1000000)) + ',' tstr += str(int(data['z_area']/1000000)) + ',' tstr += str(int(data['xy_area']/1000000)) + ',' tstr += str(int(data['xz_area']/1000000)) + ',' tstr += str(int(data['yz_area']/1000000)) + ',' tstr += str(int(data['xyz_area']/1000000)) + '\n' outfile.write(tstr) tcount += 1 del outfile print '\n' def read_in_theor_spp(): #read in the theor spp list theor_spp = [] with open(theor_spp_file) as f: f.readline() for line in f: data = {} tt = line.split(',') data['mean_patch_size'] = float(tt[1]) data['patch_variance'] = float(tt[2]) data['num_patches'] = float(tt[3]) data['xband_max'] = int(tt[4]) data['xband_min'] = int(tt[5]) data['yband_max'] = int(tt[6]) data['yband_min'] = int(tt[7]) data['zband_max'] = int(tt[8]) data['zband_min'] = int(tt[9]) data['x_area'] = int(float(tt[10]) * 1000000) data['y_area'] = int(float(tt[11]) * 1000000) data['z_area'] = int(float(tt[12]) * 1000000) data['xy_area'] = int(float(tt[13]) * 1000000) data['xz_area'] = int(float(tt[14]) * 1000000) data['yz_area'] = int(float(tt[15]) * 1000000) data['xyz_area'] = int(float(tt[16].replace('\n','')) * 1000000) theor_spp.append(data) return(theor_spp) def write_data_for_analysis(x_data,y_data,z_data,xy_data,xz_data,yz_data,xyz_data): #write out the x model print '\n' + 'writing theoretical species lists to x_model.csv' toutfile = savefolder + '/x_model.csv' outfile = file(toutfile,'w') theader = 'xband,area,empirical_richness,mean_model_richness\n' outfile.write(theader) for key in x_data.keys(): tstr = str(key) + ',' + str(int(x_data[key]['area']/1000000)) + ',' tstr += str(x_data[key]['empirical_richness']) + ',' tstr += str(x_data[key]['mean_theor_richness']) + '\n' outfile.write(tstr) del outfile #write out the y model print '\n' + 'writing theoretical species lists to y_model.csv' toutfile = savefolder + '/y_model.csv' outfile = file(toutfile,'w') theader = 'yband,area,empirical_richness,mean_model_richness\n' outfile.write(theader) for key in y_data.keys(): tstr = str(key) + ',' + str(int(y_data[key]['area']/1000000)) + ',' tstr += str(y_data[key]['empirical_richness']) + ',' tstr += str(y_data[key]['mean_theor_richness']) + '\n' outfile.write(tstr) del outfile #write out the z model print '\n' + 'writing theoretical species lists to z_model.csv' toutfile = savefolder + '/z_model.csv' outfile = file(toutfile,'w') theader = 'zband,area,empirical_richness,mean_model_richness\n' outfile.write(theader) for key in z_data.keys(): tstr = str(key) + ',' + str(int(z_data[key]['area']/1000000)) + ',' tstr += str(z_data[key]['empirical_richness']) + ',' tstr += str(z_data[key]['mean_theor_richness']) + '\n' outfile.write(tstr) del outfile #write out the xy model print '\n' + 'writing theoretical species lists to xy_model.csv' toutfile = savefolder + '/xy_model.csv' outfile = file(toutfile,'w') theader = 'xband,yband,area,empirical_richness,mean_model_richness\n' outfile.write(theader) for key in xy_data.keys(): tstr = str(key[0]) + ',' + str(key[1]) + ',' + str(int(xy_data[key]['area']/1000000)) + ',' tstr += str(xy_data[key]['empirical_richness']) + ',' tstr += str(xy_data[key]['mean_theor_richness']) + '\n' outfile.write(tstr) del outfile #write out the xz model print '\n' + 'writing theoretical species lists to xz_model.csv' toutfile = savefolder + '/xz_model.csv' outfile = file(toutfile,'w') theader = 'xband,zband,area,empirical_richness,mean_model_richness\n' outfile.write(theader) for key in xz_data.keys(): tstr = str(key[0]) + ',' + str(key[1]) + ',' + str(int(xz_data[key]['area']/1000000)) + ',' tstr += str(xz_data[key]['empirical_richness']) + ',' tstr += str(xz_data[key]['mean_theor_richness']) + '\n' outfile.write(tstr) del outfile #write out the yz model print '\n' + 'writing theoretical species lists to yz_model.csv' toutfile = savefolder + '/yz_model.csv' outfile = file(toutfile,'w') theader = 'yband,zband,area,empirical_richness,mean_model_richness\n' outfile.write(theader) for key in yz_data.keys(): tstr = str(key[0]) + ',' + str(key[1]) + ',' + str(int(yz_data[key]['area']/1000000)) + ',' tstr += str(yz_data[key]['empirical_richness']) + ',' tstr += str(yz_data[key]['mean_theor_richness']) + '\n' outfile.write(tstr) del outfile #write out the xyz model print '\n' + 'writing theoretical species lists to xyz_model.csv' toutfile = savefolder + '/xyz_model.csv' outfile = file(toutfile,'w') theader = 'xband,yband,zband,area,empirical_richness,mean_model_richness\n' outfile.write(theader) for key in xyz_data.keys(): tstr = str(key[0]) + ',' + str(key[1]) + ',' + str(key[2]) + ',' + str(int(xyz_data[key]['area']/1000000)) + ',' tstr = tstr + str(xyz_data[key]['empirical_richness']) + ',' tstr = tstr + str(xyz_data[key]['mean_theor_richness']) + '\n' outfile.write(tstr) del outfile print '\n' if __name__ == '__main__': try: # If psyco is installed this script runs much faster... import psyco psyco.full() print 'using psyco' except ImportError: # Oh, well, it's not installed. We don't need it. pass main()