User Tools

Site Tools


wiki:multicore_python

Multicore processing in python - Zonal Statistic

Calculate zonal statistic (min mean max standard deviation)

cd /home/user/ost4sem/exercise/python_multicore

One core

Running zonal statistical and the get the mean of the pixels fallen inside each poligon.
home/capooti/webapps/spatialecology_static/ost4sem/exercise/

python zonal_stats_basic_sp.py borders_tribes.shp popc_0ADProj.tif zonal_stat_sp.txt
zonal_stats_basic_sp.py
from osgeo import gdal, ogr, osr
import numpy, sys
import pandas as pd
 
def fmtFloat (x) :
    return float("{0:.2f}".format(x))
 
def proc( ):
    raster = gdal.Open(input_value_raster)
    shape = ogr.Open(input_zone_polygon)
    lyr = shape.GetLayer()
    statsArr = []
    for fid in range(lyr.GetFeatureCount()):
        feat = lyr.GetFeature(fid)
    # Get extent of feat
        geom = feat.GetGeometryRef()
        if geom.GetGeometryName() == "MULTIPOLYGON":
            count = 0
            pointsX = []
            pointsY = []
            for polygon in geom:
                geomInner = geom.GetGeometryRef(count)
                ring = geomInner.GetGeometryRef(0)
                numpoints = ring.GetPointCount()
                for p in range(numpoints):
                    lon, lat, z = ring.GetPoint(p)
                    pointsX.append(lon)
                    pointsY.append(lat)
                count += 1
        elif geom.GetGeometryName() == "POLYGON":
            ring = geom.GetGeometryRef(0)
            numpoints = ring.GetPointCount()
            pointsX = []
            pointsY = []
            for p in range(numpoints):
                lon, lat, z = ring.GetPoint(p)
                pointsX.append(lon)
                pointsY.append(lat)
        else:
            sys.exit()
 
        xmin = min(pointsX)
        xmax = max(pointsX)
        ymin = min(pointsY)
        ymax = max(pointsY)
 
        # Specify offset and rows and columns to read
        xoff = int((xmin - xOrigin) / pixelWidth)
        yoff = int((yOrigin - ymax) / pixelWidth)
        xcount = int((xmax - xmin) / pixelWidth) + 1
        ycount = int((ymax - ymin) / pixelWidth) + 1
 
        # Create memory target raster
        target_ds = gdal.GetDriverByName("MEM").Create("", xcount, ycount, gdal.GDT_Byte)
        target_ds.SetGeoTransform((xmin, pixelWidth, 0, ymax, 0, pixelHeight))
 
        # Create for target raster the same projection as for the value raster
        raster_srs = osr.SpatialReference()
        raster_srs.ImportFromWkt(raster.GetProjectionRef())
        target_ds.SetProjection(raster_srs.ExportToWkt())
 
        # Rasterize zone polygon to raster
        gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])
 
        # Read raster as arrays
        banddataraster = raster.GetRasterBand(1)
        dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount).astype(
            numpy.float
        )
 
        bandmask = target_ds.GetRasterBand(1)
        datamask = bandmask.ReadAsArray(0, 0, xcount, ycount).astype(numpy.float)
 
        # Mask zone of raster
        zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask))
 
        feature_stats = {
        'min': fmtFloat(numpy.min(zoneraster)),
        'mean': fmtFloat(numpy.mean(zoneraster)),
        'max': fmtFloat(numpy.max(zoneraster)),
        'std': fmtFloat(numpy.std(zoneraster)),
        'sum': fmtFloat(numpy.sum(zoneraster)),
        'fid': int(feat.GetFID())}
        statsArr.append(feature_stats)    
    return statsArr
 
# Open data
input_zone_polygon = sys.argv[1]
input_value_raster = sys.argv[2]
output_stats = sys.argv[3]
 
rast = gdal.Open(input_value_raster)
shp = ogr.Open(input_zone_polygon)
 
# Get raster georeference info
transform = rast.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]
 
# Start the processes
layer = shp.GetLayer()
statsOut = proc()
dfOut = pd.DataFrame(list(statsOut))
dfOut.to_csv(output_stats,sep="\t",index=False)

Multi core

python zonal_stats_basic_my.py borders_tribes.shp popc_0ADProj.tif zonal_stat_mp.txt
zonal_stats_basic_mp.py
from osgeo import gdal, ogr, osr
import numpy, sys
from multiprocessing import Pool 
import pandas as pd
 
 
def fmtFloat (x) :
    return float("{0:.2f}".format(x))
 
def proc(fid):
    shape = ogr.Open(input_zone_polygon)
    lyr = shape.GetLayer()
    feat = lyr.GetFeature(fid)
    raster = gdal.Open(input_value_raster)
    geom = feat.GetGeometryRef()
    if geom.GetGeometryName() == "MULTIPOLYGON":
        count = 0
        pointsX = []
        pointsY = []
        for polygon in geom:
            geomInner = geom.GetGeometryRef(count)
            ring = geomInner.GetGeometryRef(0)
            numpoints = ring.GetPointCount()
            for p in range(numpoints):
                lon, lat, z = ring.GetPoint(p)
                pointsX.append(lon)
                pointsY.append(lat)
            count += 1
    elif geom.GetGeometryName() == "POLYGON":
        ring = geom.GetGeometryRef(0)
        numpoints = ring.GetPointCount()
        pointsX = []
        pointsY = []
        for p in range(numpoints):
            lon, lat, z = ring.GetPoint(p)
            pointsX.append(lon)
            pointsY.append(lat)
 
    else:
        sys.exit()
 
    xmin = min(pointsX)
    xmax = max(pointsX)
    ymin = min(pointsY)
    ymax = max(pointsY)
 
    # Specify offset and rows and columns to read
    xoff = int((xmin - xOrigin) / pixelWidth)
    yoff = int((yOrigin - ymax) / pixelWidth)
    xcount = int((xmax - xmin) / pixelWidth) + 1
    ycount = int((ymax - ymin) / pixelWidth) + 1
 
    # Create memory target raster
    target_ds = gdal.GetDriverByName("MEM").Create("", xcount, ycount, gdal.GDT_Byte)
    target_ds.SetGeoTransform((xmin, pixelWidth, 0, ymax, 0, pixelHeight))
 
    # Create for target raster the same projection as for the value raster
    raster_srs = osr.SpatialReference()
    raster_srs.ImportFromWkt(raster.GetProjectionRef())
    target_ds.SetProjection(raster_srs.ExportToWkt())
 
    # Rasterize zone polygon to raster
    gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])
 
    # Read raster as arrays
    banddataraster = raster.GetRasterBand(1)
    dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount).astype(
        numpy.float
    )
 
    bandmask = target_ds.GetRasterBand(1)
    datamask = bandmask.ReadAsArray(0, 0, xcount, ycount).astype(numpy.float)
 
    # Mask zone of raster
    zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask))
 
    feature_stats = {
    'min': fmtFloat(numpy.min(zoneraster)),
    'mean': fmtFloat(numpy.mean(zoneraster)),
    'max': fmtFloat(numpy.max(zoneraster)),
    'std': fmtFloat(numpy.std(zoneraster)),
    'sum': fmtFloat(numpy.sum(zoneraster)),
    'fid': int(feat.GetFID())}
    return feature_stats
 
# Open data
input_zone_polygon = sys.argv[1]
input_value_raster = sys.argv[2]
output_stats = sys.argv[3]
 
rast = gdal.Open(input_value_raster)
shp = ogr.Open(input_zone_polygon)
 
# Get raster georeference info
transform = rast.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]
 
 
# Start the processes
layer = shp.GetLayer()
featList = range(layer.GetFeatureCount())
p = Pool(processes=8)
pOut = p.map(proc, featList)
dfOut = pd.DataFrame(list(pOut))
dfOut.to_csv(output_stats,sep="\t",index=False)
wiki/multicore_python.txt · Last modified: 2021/01/20 20:36 (external edit)