### Site Tools

wiki:multicore_python

# Differences

This shows you the differences between two versions of the page.

 wiki:multicore_python [2019/07/05 15:42] wiki:multicore_python [2020/07/17 06:39] (current) Line 1: Line 1: + ====== Multicore processing in python - Zonal Statistic ​ ====== + Calculate zonal statistic (min mean max standard deviation) ​ + + cd /​home/​user/​ost4sem/​exercise/​python_multicore + + ==== One core ===== + + Running zonal statistical and the get the mean of the pixels fallen inside each poligon.  ​ + + home/​capooti/​webapps/​spatialecology_static/​ost4sem/​exercise/​ + + python zonal_stats_basic_sp.py borders_tribes.shp popc_0ADProj.tif zonal_stat_sp.txt + + ​ + from osgeo import gdal, ogr, osr + import numpy, sys + import pandas as pd + + def fmtFloat (x) : + return float("​{0:​.2f}"​.format(x)) + + def proc( ): + raster = gdal.Open(input_value_raster) + shape = ogr.Open(input_zone_polygon) + lyr = shape.GetLayer() + statsArr = [] + for fid in range(lyr.GetFeatureCount()):​ + feat = lyr.GetFeature(fid) + # Get extent of feat + geom = feat.GetGeometryRef() + if geom.GetGeometryName() == "​MULTIPOLYGON":​ + count = 0 + pointsX = [] + pointsY = [] + for polygon in geom: + geomInner = geom.GetGeometryRef(count) + ring = geomInner.GetGeometryRef(0) + numpoints = ring.GetPointCount() + for p in range(numpoints):​ + lon, lat, z = ring.GetPoint(p) + pointsX.append(lon) + pointsY.append(lat) + count += 1 + elif geom.GetGeometryName() == "​POLYGON":​ + ring = geom.GetGeometryRef(0) + numpoints = ring.GetPointCount() + pointsX = [] + pointsY = [] + for p in range(numpoints):​ + lon, lat, z = ring.GetPoint(p) + pointsX.append(lon) + pointsY.append(lat) + else: + sys.exit() + + xmin = min(pointsX) + xmax = max(pointsX) + ymin = min(pointsY) + ymax = max(pointsY) + + # Specify offset and rows and columns to read + xoff = int((xmin - xOrigin) / pixelWidth) + yoff = int((yOrigin - ymax) / pixelWidth) + xcount = int((xmax - xmin) / pixelWidth) + 1 + ycount = int((ymax - ymin) / pixelWidth) + 1 + + # Create memory target raster + target_ds = gdal.GetDriverByName("​MEM"​).Create("",​ xcount, ycount, gdal.GDT_Byte) + target_ds.SetGeoTransform((xmin,​ pixelWidth, 0, ymax, 0, pixelHeight)) + + # Create for target raster the same projection as for the value raster + raster_srs = osr.SpatialReference() + raster_srs.ImportFromWkt(raster.GetProjectionRef()) + target_ds.SetProjection(raster_srs.ExportToWkt()) + + # Rasterize zone polygon to raster + gdal.RasterizeLayer(target_ds,​ [1], lyr, burn_values=[1]) + + # Read raster as arrays + banddataraster = raster.GetRasterBand(1) + dataraster = banddataraster.ReadAsArray(xoff,​ yoff, xcount, ycount).astype( + numpy.float + ) + + bandmask = target_ds.GetRasterBand(1) + datamask = bandmask.ReadAsArray(0,​ 0, xcount, ycount).astype(numpy.float) + + # Mask zone of raster + zoneraster = numpy.ma.masked_array(dataraster,​ numpy.logical_not(datamask)) + + feature_stats = { + '​min':​ fmtFloat(numpy.min(zoneraster)),​ + '​mean':​ fmtFloat(numpy.mean(zoneraster)),​ + '​max':​ fmtFloat(numpy.max(zoneraster)),​ + '​std':​ fmtFloat(numpy.std(zoneraster)),​ + '​sum':​ fmtFloat(numpy.sum(zoneraster)),​ + '​fid':​ int(feat.GetFID())} + statsArr.append(feature_stats) ​   ​ + return statsArr + + # Open data + input_zone_polygon = sys.argv[1] + input_value_raster = sys.argv[2] + output_stats = sys.argv[3] + + rast = gdal.Open(input_value_raster) + shp = ogr.Open(input_zone_polygon) + + # Get raster georeference info + transform = rast.GetGeoTransform() + xOrigin = transform[0] + yOrigin = transform[3] + pixelWidth = transform[1] + pixelHeight = transform[5] + + # Start the processes + layer = shp.GetLayer() + statsOut = proc() + dfOut = pd.DataFrame(list(statsOut)) + dfOut.to_csv(output_stats,​sep="​\t",​index=False) + ​ + + + ==== Multi core ===== + + python zonal_stats_basic_my.py borders_tribes.shp popc_0ADProj.tif zonal_stat_mp.txt + + ​ + from osgeo import gdal, ogr, osr + import numpy, sys + from multiprocessing import Pool + import pandas as pd + + + def fmtFloat (x) : + return float("​{0:​.2f}"​.format(x)) + + def proc(fid): + shape = ogr.Open(input_zone_polygon) + lyr = shape.GetLayer() + feat = lyr.GetFeature(fid) + raster = gdal.Open(input_value_raster) + geom = feat.GetGeometryRef() + if geom.GetGeometryName() == "​MULTIPOLYGON":​ + count = 0 + pointsX = [] + pointsY = [] + for polygon in geom: + geomInner = geom.GetGeometryRef(count) + ring = geomInner.GetGeometryRef(0) + numpoints = ring.GetPointCount() + for p in range(numpoints):​ + lon, lat, z = ring.GetPoint(p) + pointsX.append(lon) + pointsY.append(lat) + count += 1 + elif geom.GetGeometryName() == "​POLYGON":​ + ring = geom.GetGeometryRef(0) + numpoints = ring.GetPointCount() + pointsX = [] + pointsY = [] + for p in range(numpoints):​ + lon, lat, z = ring.GetPoint(p) + pointsX.append(lon) + pointsY.append(lat) + + else: + sys.exit() + + xmin = min(pointsX) + xmax = max(pointsX) + ymin = min(pointsY) + ymax = max(pointsY) + + # Specify offset and rows and columns to read + xoff = int((xmin - xOrigin) / pixelWidth) + yoff = int((yOrigin - ymax) / pixelWidth) + xcount = int((xmax - xmin) / pixelWidth) + 1 + ycount = int((ymax - ymin) / pixelWidth) + 1 + + # Create memory target raster + target_ds = gdal.GetDriverByName("​MEM"​).Create("",​ xcount, ycount, gdal.GDT_Byte) + target_ds.SetGeoTransform((xmin,​ pixelWidth, 0, ymax, 0, pixelHeight)) + + # Create for target raster the same projection as for the value raster + raster_srs = osr.SpatialReference() + raster_srs.ImportFromWkt(raster.GetProjectionRef()) + target_ds.SetProjection(raster_srs.ExportToWkt()) + + # Rasterize zone polygon to raster + gdal.RasterizeLayer(target_ds,​ [1], lyr, burn_values=[1]) + + # Read raster as arrays + banddataraster = raster.GetRasterBand(1) + dataraster = banddataraster.ReadAsArray(xoff,​ yoff, xcount, ycount).astype( + numpy.float + ) + + bandmask = target_ds.GetRasterBand(1) + datamask = bandmask.ReadAsArray(0,​ 0, xcount, ycount).astype(numpy.float) + + # Mask zone of raster + zoneraster = numpy.ma.masked_array(dataraster,​ numpy.logical_not(datamask)) + + feature_stats = { + '​min':​ fmtFloat(numpy.min(zoneraster)),​ + '​mean':​ fmtFloat(numpy.mean(zoneraster)),​ + '​max':​ fmtFloat(numpy.max(zoneraster)),​ + '​std':​ fmtFloat(numpy.std(zoneraster)),​ + '​sum':​ fmtFloat(numpy.sum(zoneraster)),​ + '​fid':​ int(feat.GetFID())} + return feature_stats + + # Open data + input_zone_polygon = sys.argv[1] + input_value_raster = sys.argv[2] + output_stats = sys.argv[3] + + rast = gdal.Open(input_value_raster) + shp = ogr.Open(input_zone_polygon) + + # Get raster georeference info + transform = rast.GetGeoTransform() + xOrigin = transform[0] + yOrigin = transform[3] + pixelWidth = transform[1] + pixelHeight = transform[5] + + + # Start the processes + layer = shp.GetLayer() + featList = range(layer.GetFeatureCount()) + p = Pool(processes=8) + pOut = p.map(proc, featList) + dfOut = pd.DataFrame(list(pOut)) + dfOut.to_csv(output_stats,​sep="​\t",​index=False) +