User Tools

Site Tools


wiki:multicore_python

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

wiki:multicore_python [2019/07/05 15:42] (current)
Line 1: Line 1:
 +====== Multicore processing in python - Zonal Statistic ​ ======
  
 +Calculate zonal statistic (min mean max standard deviation) ​
 +
 +  cd /​home/​user/​ost4sem/​exercise/​python_multicore
 +
 +==== One core =====
 +
 +Running zonal statistical and the get the mean of the pixels fallen inside each poligon.  ​
 +
 +home/​capooti/​webapps/​spatialecology_static/​ost4sem/​exercise/​
 +
 +  python zonal_stats_basic_sp.py borders_tribes.shp popc_0ADProj.tif zonal_stat_sp.txt
 +
 +<code python| zonal_stats_basic_sp.py>​
 +from osgeo import gdal, ogr, osr
 +import numpy, sys
 +import pandas as pd
 +
 +def fmtFloat (x) :
 +    return float("​{0:​.2f}"​.format(x))
 +
 +def proc( ):
 +    raster = gdal.Open(input_value_raster)
 +    shape = ogr.Open(input_zone_polygon)
 +    lyr = shape.GetLayer()
 +    statsArr = []
 +    for fid in range(lyr.GetFeatureCount()):​
 +        feat = lyr.GetFeature(fid)
 +    # Get extent of feat
 +        geom = feat.GetGeometryRef()
 +        if geom.GetGeometryName() == "​MULTIPOLYGON":​
 +            count = 0
 +            pointsX = []
 +            pointsY = []
 +            for polygon in geom:
 +                geomInner = geom.GetGeometryRef(count)
 +                ring = geomInner.GetGeometryRef(0)
 +                numpoints = ring.GetPointCount()
 +                for p in range(numpoints):​
 +                    lon, lat, z = ring.GetPoint(p)
 +                    pointsX.append(lon)
 +                    pointsY.append(lat)
 +                count += 1
 +        elif geom.GetGeometryName() == "​POLYGON":​
 +            ring = geom.GetGeometryRef(0)
 +            numpoints = ring.GetPointCount()
 +            pointsX = []
 +            pointsY = []
 +            for p in range(numpoints):​
 +                lon, lat, z = ring.GetPoint(p)
 +                pointsX.append(lon)
 +                pointsY.append(lat)
 +        else:
 +            sys.exit()
 +
 +        xmin = min(pointsX)
 +        xmax = max(pointsX)
 +        ymin = min(pointsY)
 +        ymax = max(pointsY)
 +
 +        # Specify offset and rows and columns to read
 +        xoff = int((xmin - xOrigin) / pixelWidth)
 +        yoff = int((yOrigin - ymax) / pixelWidth)
 +        xcount = int((xmax - xmin) / pixelWidth) + 1
 +        ycount = int((ymax - ymin) / pixelWidth) + 1
 +
 +        # Create memory target raster
 +        target_ds = gdal.GetDriverByName("​MEM"​).Create("",​ xcount, ycount, gdal.GDT_Byte)
 +        target_ds.SetGeoTransform((xmin,​ pixelWidth, 0, ymax, 0, pixelHeight))
 +
 +        # Create for target raster the same projection as for the value raster
 +        raster_srs = osr.SpatialReference()
 +        raster_srs.ImportFromWkt(raster.GetProjectionRef())
 +        target_ds.SetProjection(raster_srs.ExportToWkt())
 +
 +        # Rasterize zone polygon to raster
 +        gdal.RasterizeLayer(target_ds,​ [1], lyr, burn_values=[1])
 +
 +        # Read raster as arrays
 +        banddataraster = raster.GetRasterBand(1)
 +        dataraster = banddataraster.ReadAsArray(xoff,​ yoff, xcount, ycount).astype(
 +            numpy.float
 +        )
 +
 +        bandmask = target_ds.GetRasterBand(1)
 +        datamask = bandmask.ReadAsArray(0,​ 0, xcount, ycount).astype(numpy.float)
 +
 +        # Mask zone of raster
 +        zoneraster = numpy.ma.masked_array(dataraster,​ numpy.logical_not(datamask))
 +
 +        feature_stats = {
 +        '​min':​ fmtFloat(numpy.min(zoneraster)),​
 +        '​mean':​ fmtFloat(numpy.mean(zoneraster)),​
 +        '​max':​ fmtFloat(numpy.max(zoneraster)),​
 +        '​std':​ fmtFloat(numpy.std(zoneraster)),​
 +        '​sum':​ fmtFloat(numpy.sum(zoneraster)),​
 +        '​fid':​ int(feat.GetFID())}
 +        statsArr.append(feature_stats) ​   ​
 +    return statsArr
 + 
 +# Open data
 +input_zone_polygon = sys.argv[1]
 +input_value_raster = sys.argv[2]
 +output_stats = sys.argv[3]
 +
 +rast = gdal.Open(input_value_raster)
 +shp = ogr.Open(input_zone_polygon)
 +
 +# Get raster georeference info
 +transform = rast.GetGeoTransform()
 +xOrigin = transform[0]
 +yOrigin = transform[3]
 +pixelWidth = transform[1]
 +pixelHeight = transform[5]
 +
 +# Start the processes
 +layer = shp.GetLayer()
 +statsOut = proc()
 +dfOut = pd.DataFrame(list(statsOut))
 +dfOut.to_csv(output_stats,​sep="​\t",​index=False)
 +</​code>​
 +
 +
 +==== Multi core =====
 +
 +  python zonal_stats_basic_my.py borders_tribes.shp popc_0ADProj.tif zonal_stat_mp.txt
 +
 +<code python| zonal_stats_basic_mp.py>​
 +from osgeo import gdal, ogr, osr
 +import numpy, sys
 +from multiprocessing import Pool 
 +import pandas as pd
 +
 +
 +def fmtFloat (x) :
 +    return float("​{0:​.2f}"​.format(x))
 +
 +def proc(fid):
 +    shape = ogr.Open(input_zone_polygon)
 +    lyr = shape.GetLayer()
 +    feat = lyr.GetFeature(fid)
 +    raster = gdal.Open(input_value_raster)
 +    geom = feat.GetGeometryRef()
 +    if geom.GetGeometryName() == "​MULTIPOLYGON":​
 +        count = 0
 +        pointsX = []
 +        pointsY = []
 +        for polygon in geom:
 +            geomInner = geom.GetGeometryRef(count)
 +            ring = geomInner.GetGeometryRef(0)
 +            numpoints = ring.GetPointCount()
 +            for p in range(numpoints):​
 +                lon, lat, z = ring.GetPoint(p)
 +                pointsX.append(lon)
 +                pointsY.append(lat)
 +            count += 1
 +    elif geom.GetGeometryName() == "​POLYGON":​
 +        ring = geom.GetGeometryRef(0)
 +        numpoints = ring.GetPointCount()
 +        pointsX = []
 +        pointsY = []
 +        for p in range(numpoints):​
 +            lon, lat, z = ring.GetPoint(p)
 +            pointsX.append(lon)
 +            pointsY.append(lat)
 +
 +    else:
 +        sys.exit()
 +
 +    xmin = min(pointsX)
 +    xmax = max(pointsX)
 +    ymin = min(pointsY)
 +    ymax = max(pointsY)
 +
 +    # Specify offset and rows and columns to read
 +    xoff = int((xmin - xOrigin) / pixelWidth)
 +    yoff = int((yOrigin - ymax) / pixelWidth)
 +    xcount = int((xmax - xmin) / pixelWidth) + 1
 +    ycount = int((ymax - ymin) / pixelWidth) + 1
 +
 +    # Create memory target raster
 +    target_ds = gdal.GetDriverByName("​MEM"​).Create("",​ xcount, ycount, gdal.GDT_Byte)
 +    target_ds.SetGeoTransform((xmin,​ pixelWidth, 0, ymax, 0, pixelHeight))
 +
 +    # Create for target raster the same projection as for the value raster
 +    raster_srs = osr.SpatialReference()
 +    raster_srs.ImportFromWkt(raster.GetProjectionRef())
 +    target_ds.SetProjection(raster_srs.ExportToWkt())
 +
 +    # Rasterize zone polygon to raster
 +    gdal.RasterizeLayer(target_ds,​ [1], lyr, burn_values=[1])
 +
 +    # Read raster as arrays
 +    banddataraster = raster.GetRasterBand(1)
 +    dataraster = banddataraster.ReadAsArray(xoff,​ yoff, xcount, ycount).astype(
 +        numpy.float
 +    )
 +
 +    bandmask = target_ds.GetRasterBand(1)
 +    datamask = bandmask.ReadAsArray(0,​ 0, xcount, ycount).astype(numpy.float)
 +
 +    # Mask zone of raster
 +    zoneraster = numpy.ma.masked_array(dataraster,​ numpy.logical_not(datamask))
 +
 +    feature_stats = {
 +    '​min':​ fmtFloat(numpy.min(zoneraster)),​
 +    '​mean':​ fmtFloat(numpy.mean(zoneraster)),​
 +    '​max':​ fmtFloat(numpy.max(zoneraster)),​
 +    '​std':​ fmtFloat(numpy.std(zoneraster)),​
 +    '​sum':​ fmtFloat(numpy.sum(zoneraster)),​
 +    '​fid':​ int(feat.GetFID())}
 +    return feature_stats
 +
 +# Open data
 +input_zone_polygon = sys.argv[1]
 +input_value_raster = sys.argv[2]
 +output_stats = sys.argv[3]
 +
 +rast = gdal.Open(input_value_raster)
 +shp = ogr.Open(input_zone_polygon)
 +
 +# Get raster georeference info
 +transform = rast.GetGeoTransform()
 +xOrigin = transform[0]
 +yOrigin = transform[3]
 +pixelWidth = transform[1]
 +pixelHeight = transform[5]
 +
 +
 +# Start the processes
 +layer = shp.GetLayer()
 +featList = range(layer.GetFeatureCount())
 +p = Pool(processes=8)
 +pOut = p.map(proc, featList)
 +dfOut = pd.DataFrame(list(pOut))
 +dfOut.to_csv(output_stats,​sep="​\t",​index=False)
 +</​code>​
wiki/multicore_python.txt ยท Last modified: 2019/07/05 15:42 (external edit)