import os
import h5py
import numpy as np
import pandas as pd
import geopandas as gp
from shapely.geometry import Point
import geoviews as gv
from geoviews import opts, tile_sources as gvts
import holoviews as hv
gv.extension('bokeh', 'matplotlib')


inDir = os.getcwd() + os.sep  # Set input directory to the current working directory


gediFiles = [g for g in os.listdir() if g.startswith('GEDI02_A') and g.endswith('.h5')]  # List GEDI L2A .h5 files in the inDir
gediFiles

['GEDI02_A_2019170155833_O02932_02_T02267_02_003_01_V002.h5']


L2A = 'GEDI02_A_2019170155833_O02932_02_T02267_02_003_01_V002.h5'
L2A

'GEDI02_A_2019170155833_O02932_02_T02267_02_003_01_V002.h5'


gediL2A = h5py.File(L2A, 'r')  # Read file using h5py


list(gediL2A.keys())

['BEAM0000',
 'BEAM0001',
 'BEAM0010',
 'BEAM0011',
 'BEAM0101',
 'BEAM0110',
 'BEAM1000',
 'BEAM1011',
 'METADATA']


list(gediL2A['METADATA'])

['DatasetIdentification']


for g in gediL2A['METADATA']['DatasetIdentification'].attrs: print(g)

PGEVersion
VersionID
abstract
characterSet
creationDate
credit
fileName
language
originatorOrganizationName
purpose
shortName
spatialRepresentationType
status
topicCategory
uuid


print(gediL2A['METADATA']['DatasetIdentification'].attrs['purpose'])

The purpose of the L2A dataset is to provide waveform interpretation and extracted products from each GEDI waveform.  This includes ground elevation, canopy top height, relative return energy metrics (describing canopy vertical structure, for example), and many other interpreted products from the return waveforms.


beamNames = [g for g in gediL2A.keys() if g.startswith('BEAM')]
beamNames

['BEAM0000',
 'BEAM0001',
 'BEAM0010',
 'BEAM0011',
 'BEAM0101',
 'BEAM0110',
 'BEAM1000',
 'BEAM1011']


for g in gediL2A['BEAM0000'].attrs: print(g)

description


for b in beamNames: 
    print(f"{b} is a {gediL2A[b].attrs['description']}")

BEAM0000 is a Coverage beam
BEAM0001 is a Coverage beam
BEAM0010 is a Coverage beam
BEAM0011 is a Coverage beam
BEAM0101 is a Full power beam
BEAM0110 is a Full power beam
BEAM1000 is a Full power beam
BEAM1011 is a Full power beam


beamNames = ['BEAM0110']


gediL2A_objs = []
gediL2A.visit(gediL2A_objs.append)                                           # Retrieve list of datasets
gediSDS = [o for o in gediL2A_objs if isinstance(gediL2A[o], h5py.Dataset)]  # Search for relevant SDS inside data file
[i for i in gediSDS if beamNames[0] in i][0:10]                              # Print the first 10 datasets for selected beam

['BEAM0110/ancillary/l2a_alg_count',
 'BEAM0110/beam',
 'BEAM0110/channel',
 'BEAM0110/degrade_flag',
 'BEAM0110/delta_time',
 'BEAM0110/digital_elevation_model',
 'BEAM0110/digital_elevation_model_srtm',
 'BEAM0110/elev_highestreturn',
 'BEAM0110/elev_lowestmode',
 'BEAM0110/elevation_bias_flag']


lonSample, latSample, shotSample, qualitySample, beamSample = [], [], [], [], []  # Set up lists to store data

# Open the SDS
lats = gediL2A[f'{beamNames[0]}/lat_lowestmode'][()]
lons = gediL2A[f'{beamNames[0]}/lon_lowestmode'][()]
shots = gediL2A[f'{beamNames[0]}/shot_number'][()]
quality = gediL2A[f'{beamNames[0]}/quality_flag'][()]

# Take every 100th shot and append to list
for i in range(len(shots)):
    if i % 100 == 0:
        shotSample.append(str(shots[i]))
        lonSample.append(lons[i])
        latSample.append(lats[i])
        qualitySample.append(quality[i])
        beamSample.append(beamNames[0])
            
# Write all of the sample shots to a dataframe
latslons = pd.DataFrame({'Beam': beamSample, 'Shot Number': shotSample, 'Longitude': lonSample, 'Latitude': latSample,
                         'Quality Flag': qualitySample})
latslons


# Clean up variables that will no longer be needed
del beamSample, quality, qualitySample, gediL2A_objs, latSample, lats, lonSample, lons, shotSample, shots


# Take the lat/lon dataframe and convert each lat/lon to a shapely point
latslons['geometry'] = latslons.apply(lambda row: Point(row.Longitude, row.Latitude), axis=1)


# Convert to a Geodataframe
latslons = gp.GeoDataFrame(latslons)
latslons = latslons.drop(columns=['Latitude','Longitude'])
latslons['geometry']

0       POINT (-142.75569 26.92390)
1       POINT (-142.73657 26.94324)
2       POINT (-142.71743 26.96257)
3       POINT (-142.69829 26.98189)
4       POINT (-142.67914 27.00119)
                   ...             
1066     POINT (-80.19845 51.79686)
1067     POINT (-80.11468 51.79703)
1068     POINT (-80.03217 51.79717)
1069     POINT (-79.94857 51.79725)
1070     POINT (-79.86570 51.79725)
Name: geometry, Length: 1071, dtype: geometry


latslons['geometry'][0]


# Define a function for visualizing GEDI points
def pointVisual(features, vdims):
    return (gvts.EsriImagery * gv.Points(features, vdims=vdims).options(tools=['hover'], height=500, width=900, size=5, 
                                                                        color='yellow', fontsize={'xticks': 10, 'yticks': 10, 
                                                                                                  'xlabel':16, 'ylabel': 16}))


redwoodNP = gp.GeoDataFrame.from_file('RedwoodNP.geojson')  # Import GeoJSON as GeoDataFrame


redwoodNP


redwoodNP['geometry'][0]  # Plot GeoDataFrame


# Create a list of geodataframe columns to be included as attributes in the output map
vdims = []
for f in latslons:
    if f not in ['geometry']:
        vdims.append(f)
vdims

['Beam', 'Shot Number', 'Quality Flag']


# Call the function for plotting the GEDI points
gv.Polygons(redwoodNP['geometry']).opts(line_color='red', color=None) * pointVisual(latslons, vdims = vdims)


print(f"Quality Flag: {gediL2A[b]['quality_flag'].attrs['description']}")

Quality Flag: Flag simpilfying selection of most useful data


del latslons  # No longer need the geodataframe used to visualize the full GEDI orbit


len(gediSDS)

4320


beamNames

['BEAM0110']


beamSDS = [g for g in gediSDS if beamNames[0] in g]  # Subset to a single beam
len(beamSDS)

540


shot = 29320600200465599


index = np.where(gediL2A[f'{beamNames[0]}/shot_number'][()]==shot)[0][0]  # Set the index for the shot identified above
index

45730


rh = gediL2A[[g for g in beamSDS if g.endswith('/rh')][0]]  # Relative Height Metrics


print(f"rh is {rh.attrs['description']}")

rh is Relative height metrics at 1 % interval


algo = gediL2A[f'{beamNames[0]}/selected_algorithm']  # selected algorithm
print(f"selected_algorithm is {algo.attrs['description']}")

selected_algorithm is ID of algorithm selected as identifying the lowest non-noise mode


# Bring in the desired SDS
lats = gediL2A[f'{beamNames[0]}/lat_lowestmode']  # Latitude
lons = gediL2A[f'{beamNames[0]}/lon_lowestmode']  # Longitude


rhLat = lats[index]
rhLon = lons[index]
rhShot1 = rh[index]
algoShot1 = algo[index]


print(f"The shot is located at: {str(rhLat)}, {str(rhLon)} (shot ID: {shot}, index {index}) and is from beam {beamNames[0]}.")
print(f"The selected algorithm is Algorithm Setting Group {str(algoShot1)}.")

The shot is located at: 41.284809604761435, -124.0311657853589 (shot ID: 29320600200465599, index 45730) and is from beam BEAM0110.
The selected algorithm is Algorithm Setting Group 2.


# Grab the elevation recorded at the start and end of the RH metrics
zElevation = gediL2A[[g for g in beamSDS if g.endswith('/elev_lowestmode')][0]][index]  # Elevation
zTop = gediL2A[[g for g in beamSDS if g.endswith('/elev_highestreturn')][0]][index]     # Elevation Highest Return


rhShot = [z + zElevation for z in rhShot1]  # To convert canopy height to canopy elevation, add the elevation to each value
rh25 = rhShot[24]                           # 25% 
rh50 = rhShot[49]                           # 50%  
rh75 = rhShot[74]                           # 75%


rhVis = hv.Curve(rhShot, label=f'Selected Algorithm (a{str(algoShot1)})')
rhVis = rhVis.opts(color='black', tools=['hover'], height=500, width=400, title='GEDI L2A Relative Height Metrics', 
                   xlabel='Percent Energy Returned', ylabel='Elevation (m)', xlim=(0,100),ylim=(np.min(rhShot),np.max(rhShot)), 
                   fontsize={'title':14, 'xlabel':16, 'ylabel': 16, 'legend': 14, 'xticks':12, 'yticks':12}, line_width=3.5)
rhVis


# Create plots for L2A Metrics
zX = [0,100]                   # set up list from 0 to 100 to create the line
zY = [zElevation, zElevation]  # ground elevation
zT = [zTop, zTop]              # highest return

# Set up plots for each of the desired values
zVis = hv.Curve((zX, zY), label='Ground Return').opts(color='saddlebrown', tools=['hover'], height=550, width=400, line_width=2)
ztVis = hv.Curve((zX, zT), label='RH100').opts(color='navy', tools=['hover'], height=550, width=400, line_width=2)
rh25Vis = hv.Curve((zX, [rh25,rh25]),label='RH25').opts(color='lightblue',tools=['hover'], height=550, width=400, line_width=2)
rh50Vis = hv.Curve((zX, [rh50,rh50]),label='RH50').opts(color='mediumblue',tools=['hover'], height=550, width=400, line_width=2)
rh75Vis = hv.Curve((zX, [rh75,rh75]),label='RH75').opts(color='darkblue',tools=['hover'], height=550, width=400, line_width=2)


# Plot all of the metrics together
l2aVis = rhVis * zVis * ztVis * rh25Vis * rh50Vis * rh75Vis
l2aVis.opts(show_legend=True, legend_position='bottom_right', title='GEDI L2A Relative Height Metrics', ylabel='Elevation (m)',
                   xlabel='Percent Energy Returned', xlim=(0, 100), ylim=(np.min(rhShot) + 1.5, np.max(rhShot) + 5), height=600,
                   fontsize={'title':16, 'xlabel':16, 'ylabel': 16, 'legend': 14, 'xticks':12, 'yticks':12}, width=400)


wvDF = pd.read_csv('waveform.csv')


wvDF


# Create a holoviews interactive Curve plot with additional parameters defining the plot aesthetics 
visL1B = hv.Curve(wvDF).opts(color='darkgreen', tools=['hover'], height=600, width=400,
           xlim=(np.min(wvDF['Amplitude (DN)']) - 10, np.max(wvDF['Amplitude (DN)']) + 10), 
           ylim=(np.min(wvDF['Elevation (m)']), np.max(wvDF['Elevation (m)'])),
           fontsize={'xticks':10, 'yticks':10,'xlabel':16, 'ylabel': 16, 'title':13}, line_width=2.5, title=f'{str(shot)}')
visL1B


visL1B.opts(height=600, width=400, ylim=(np.min(rhShot), np.max(rhShot)+5), ylabel='Elevation (m)', xlabel='Amplitude (DN)') \
+ l2aVis.opts(height=600, width=400, ylim=(np.min(rhShot), np.max(rhShot)+5))


geolocationSDS = [b for b in beamSDS if '/geolocation/' in b]  # Select all datasets within the geolocation subgroup


a3SDS = [g for g in geolocationSDS if g.endswith('a3')]  # Select algorithm 3 datasets
a3SDS

['BEAM0110/geolocation/elev_highestreturn_a3',
 'BEAM0110/geolocation/elev_lowestmode_a3',
 'BEAM0110/geolocation/elev_lowestreturn_a3',
 'BEAM0110/geolocation/elevs_allmodes_a3',
 'BEAM0110/geolocation/energy_lowestmode_a3',
 'BEAM0110/geolocation/lat_highestreturn_a3',
 'BEAM0110/geolocation/lat_lowestmode_a3',
 'BEAM0110/geolocation/lat_lowestreturn_a3',
 'BEAM0110/geolocation/lats_allmodes_a3',
 'BEAM0110/geolocation/lon_highestreturn_a3',
 'BEAM0110/geolocation/lon_lowestmode_a3',
 'BEAM0110/geolocation/lon_lowestreturn_a3',
 'BEAM0110/geolocation/lons_allmodes_a3',
 'BEAM0110/geolocation/num_detectedmodes_a3',
 'BEAM0110/geolocation/quality_flag_a3',
 'BEAM0110/geolocation/rh_a3',
 'BEAM0110/geolocation/sensitivity_a3']


# Bring in the desired SDS
lata3 = gediL2A[[a for a in a3SDS if a.endswith('/lat_lowestmode_a3')][0]][index]          # Latitude
lona3 = gediL2A[[a for a in a3SDS if a.endswith('/lon_lowestmode_a3')][0]][index]          # Latitude
rhShot1 = gediL2A[[a for a in a3SDS if a.endswith('/rh_a3')][0]][index] / 100              # Relative height metrics
zElevationa3 = gediL2A[[a for a in a3SDS if a.endswith('/elev_lowestmode_a3')][0]][index]  # Elevation
zTopa3 = gediL2A[[a for a in a3SDS if a.endswith('/elev_highestreturn_a3')][0]][index]     # Elevation Highest Return
rhShota3 = [z + zElevationa3 for z in rhShot1]  # To convert canopy height to canopy elevation, add the elevation to each value


rhVisA3 = hv.Curve(rhShota3, label='Algorithm Setting Group 3 (a3)')
rhVisA3 = rhVisA3.opts(color='green', tools=['hover'], height=500, width=400, title='GEDI L2A Relative Height Metrics (a3)', 
                   xlabel='Percent Energy Returned', ylabel='Elevation (m)', xlim=(0,100),ylim=(np.min(rhShot),np.max(rhShot)), 
                   fontsize={'title':14, 'xlabel':16, 'ylabel': 16, 'legend': 14, 'xticks':12, 'yticks':12}, line_width=3.5)
rhVisA3


(rhVis * rhVisA3).opts(show_legend=True, legend_position='bottom_right', title='GEDI L2A Relative Height Metrics by Algorithm',
                       ylabel='Elevation (m)', xlabel='Percent Energy Returned', xlim=(0, 100), 
                       ylim=(np.min(rhShot) - 1.5, np.max(rhShot) + 1.5), height=600, width=600,
                   fontsize={'title':16, 'xlabel':16, 'ylabel': 16, 'legend': 14, 'xticks':12, 'yticks':12})


# Open all of the desired SDS
dem = gediL2A[[g for g in beamSDS if g.endswith('/digital_elevation_model')][0]][()]
srtm = gediL2A[[g for g in beamSDS if g.endswith('/digital_elevation_model_srtm')][0]][()]
zElevation = gediL2A[[g for g in beamSDS if g.endswith('/elev_lowestmode')][0]][()]
zHigh = gediL2A[[g for g in beamSDS if g.endswith('/elev_highestreturn')][0]][()]
zLat = gediL2A[[g for g in beamSDS if g.endswith('/lat_lowestmode')][0]][()]
zLon = gediL2A[[g for g in beamSDS if g.endswith('/lon_lowestmode')][0]][()]
rh = gediL2A[[g for g in beamSDS if g.endswith('/rh')][0]][()]
quality = gediL2A[[g for g in beamSDS if g.endswith('/quality_flag')][0]][()]
degrade = gediL2A[[g for g in beamSDS if g.endswith('/degrade_flag')][0]][()]
sensitivity = gediL2A[[g for g in beamSDS if g.endswith('/sensitivity')][0]][()]
shotNums = gediL2A[f'{beamNames[0]}/shot_number'][()]

# Create a shot index
shotIndex = np.arange(shotNums.size)


canopyHeight = [r[100] for r in rh]  # Grab RH100 (index 100 for each RH metrics)


# Take the DEM, GEDI-produced Elevation, and RH Metrics and add to a Pandas dataframe
transectDF = pd.DataFrame({'Shot Index': shotIndex, 'Shot Number': shotNums, 'Latitude': zLat, 'Longitude': zLon, 
                           'Tandem-X DEM': dem, 'SRTM DEM': srtm, 'Elevation (m)': zElevation, 'Canopy Elevation (m)': zHigh, 
                           'Canopy Height (rh100)': canopyHeight, 'Quality Flag': quality, 'Degrade Flag': degrade, 
                           'Sensitivity': sensitivity})


transectDF


# Plot Canopy Height
canopyVis = hv.Scatter((transectDF['Shot Index'], transectDF['Canopy Height (rh100)']))
canopyVis.opts(color='darkgreen', height=500, width=900, title=f'GEDI L2A Full Transect {beamNames[0]}',
               fontsize={'title':16, 'xlabel':16, 'ylabel': 16}, size=0.1, xlabel='Shot Index', ylabel='Canopy Height (m)')


del canopyVis, canopyHeight, degrade, dem, quality, sensitivity, shotIndex, shotNums, zElevation, zHigh, zLat, zLon


transectDF = transectDF.where(transectDF['Quality Flag'].ne(0))  # Set any poor quality returns to NaN


transectDF


transectDF = transectDF.where(transectDF['Degrade Flag'] < 1)
transectDF = transectDF.where(transectDF['Sensitivity'] > 0.95)


transectDF = transectDF.dropna()  # Drop all of the rows (shots) that did not pass the quality filtering above


print(f"Quality filtering complete, {len(transectDF)} high quality shots remaining.")

Quality filtering complete, 15551 high quality shots remaining.


# Remove any shots where there is no Tandem-X Elevation values 
transectDF = transectDF.where(transectDF['Tandem-X DEM'] != -999999.0).dropna()

# Plot Digital Elevation Model
demVis = hv.Scatter((transectDF['Shot Index'], transectDF['Tandem-X DEM']), label='Tandem-X DEM')
demVis = demVis.opts(color='black', height=500, width=900, fontsize={'xlabel':16, 'ylabel': 16}, size=1.5)


# Remove any shots where there is no SRTM Elevation values 
transectDF = transectDF.where(transectDF['SRTM DEM'] != -999999.0).dropna()

# Plot SRTM Digital Elevation Model
srtmVis = hv.Scatter((transectDF['Shot Index'], transectDF['SRTM DEM']), label='SRTM DEM')
srtmVis = srtmVis.opts(color='darkblue', height=500, width=900, fontsize={'xlabel':16, 'ylabel': 16}, size=1.5)


# Plot GEDI-Retrieved Elevation
zVis = hv.Scatter((transectDF['Shot Index'], transectDF['Elevation (m)']), label='GEDI-derived Elevation')
zVis = zVis.opts(color='saddlebrown', height=500, width=900, fontsize={'xlabel':16, 'ylabel': 16}, size=1.5)


# Plot Canopy Top Elevation
rhVis = hv.Scatter((transectDF['Shot Index'], transectDF['Canopy Elevation (m)']), label='Canopy Top Elevation')
rhVis = rhVis.opts(color='darkgreen', height=500, width=900, fontsize={'xlabel':16, 'ylabel': 16}, size=1.5, 
                   tools=['hover'], xlabel='Shot Index', ylabel='Elevation (m)')


# Combine all three scatterplots
(demVis * srtmVis * zVis * rhVis).opts(show_legend=True, legend_position='top_left',fontsize={'title':14, 'xlabel':16, 
                                        'ylabel': 16}, title=f'{beamNames[0]} Full Transect: {L2A.split(".")[0]}')


print(index)

45730


# Grab 50 points before and after the shot visualized above
start = index - 50
end = index + 50


print(f"The transect begins at ({transectDF['Latitude'][start]}, {transectDF['Longitude'][start]}) and ends at ({transectDF['Latitude'][end]}, {transectDF['Longitude'][end]}).")

The transect begins at (41.26960578006781, -124.05874672300072) and ends at (41.29996571902301, -124.00367502601864).


transectDF = transectDF.loc[start:end]  # Subset the Dataframe to only the selected region of interest over Redwood NP


# Calculate along-track distance
distance = np.arange(0.0, len(transectDF.index) * 60, 60)  # GEDI Shots are spaced 60 m apart
transectDF['Distance'] = distance                          # Add Distance as a new column in the dataframe


# Create lists of canopy height, distance, and canopy elevation 
rhList, distList, ceList = [], [], []
for s, i in enumerate(transectDF['Shot Index']):
    i = int(i)
    rhi = rh[i]
    for r in rhi:
        rhList.append(r)
        distList.append(distance[s])
        ceList.append(r + transectDF['Elevation (m)'][i])  # Convert to Canopy Top Elevation


title = 'Relative Height Metrics (0-100) across Redwood National Park: June 19, 2019'
hv.Scatter((distList,rhList)).opts(color='darkgreen', alpha=0.1, height=500, width=900, size=7.5, 
                                                xlabel='Distance Along Transect (m)', ylabel='Canopy Height (m)', title=title,
                                                fontsize={'title':16, 'xlabel':16, 'ylabel': 16, 'xticks':12, 'yticks':12})


# Plot canopy elevation instead of canopy height
rhVis = hv.Scatter((distList,ceList), label='RH 0-100').opts(color='darkgreen', alpha=0.1, height=500, width=900, size=5)


# Plot GEDI-Retrieved Elevation
demVis = hv.Curve((transectDF['Distance'], transectDF['Elevation (m)']), label='Ground Elevation')
demVis = demVis.opts(color='black', height=500, width=900, line_width=3, xlabel='Distance Along Transect (m)',
                     ylabel='Canopy Height (m)', fontsize={'title':14, 'xlabel':16, 'ylabel': 16, 'xticks':12, 'yticks':12},
                     title='Relative Height Metrics and Elevation across Redwood National Park: June 19, 2019')

# Plot Elevation and RH metrics together
(demVis * rhVis).opts(legend_position='bottom_right')


del ceList, distList, rhList, rhi


del beamSDS, distance, rh, transectDF


beamNames = [g for g in gediL2A.keys() if g.startswith('BEAM')]


beamNames

['BEAM0000',
 'BEAM0001',
 'BEAM0010',
 'BEAM0011',
 'BEAM0101',
 'BEAM0110',
 'BEAM1000',
 'BEAM1011']


# Set up lists to store data
shotNum, dem, zElevation, zHigh, zLat, zLon, rh25, rh98, rh100 ,quality ,degrade, sensitivity ,beamI = ([] for i in range(13))


# Loop through each beam and open the SDS needed
for b in beamNames:
    [shotNum.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/shot_number') and b in g][0]][()]]
    [dem.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/digital_elevation_model') and b in g][0]][()]]
    [zElevation.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/elev_lowestmode') and b in g][0]][()]]  
    [zHigh.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/elev_highestreturn') and b in g][0]][()]]  
    [zLat.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/lat_lowestmode') and b in g][0]][()]]  
    [zLon.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/lon_lowestmode') and b in g][0]][()]]  
    [rh25.append(h[25]) for h in gediL2A[[g for g in gediSDS if g.endswith('/rh') and b in g][0]][()]]  
    [rh98.append(h[98]) for h in gediL2A[[g for g in gediSDS if g.endswith('/rh') and b in g][0]][()]]
    [rh100.append(h[100]) for h in gediL2A[[g for g in gediSDS if g.endswith('/rh') and b in g][0]][()]]  
    [quality.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/quality_flag') and b in g][0]][()]]  
    [degrade.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/degrade_flag') and b in g][0]][()]]  
    [sensitivity.append(h) for h in gediL2A[[g for g in gediSDS if g.endswith('/sensitivity') and b in g][0]][()]]  
    [beamI.append(h) for h in [b] * len(gediL2A[[g for g in gediSDS if g.endswith('/shot_number') and b in g][0]][()])]


# Convert lists to Pandas dataframe
allDF = pd.DataFrame({'Shot Number': shotNum, 'Beam': beamI, 'Latitude': zLat, 'Longitude': zLon, 'Tandem-X DEM': dem,
                      'Elevation (m)': zElevation, 'Canopy Elevation (m)': zHigh, 'Canopy Height (rh100)': rh100, 'RH 98': rh98,
                      'RH 25': rh25, 'Quality Flag': quality, 'Degrade Flag': degrade, 'Sensitivity': sensitivity})


del beamI, degrade, dem, gediSDS, rh100, rh98, rh25, quality, sensitivity, zElevation, zHigh, zLat, zLon, shotNum


len(allDF)

790135


redwoodNP.envelope[0].bounds

(-124.16015705494489,
 41.080601363502545,
 -123.84950230520286,
 41.83981133687605)


minLon, minLat, maxLon, maxLat = redwoodNP.envelope[0].bounds  # Define the min/max lat/lon from the bounds of Redwood NP


allDF = allDF.where(allDF['Latitude'] > minLat)
allDF = allDF.where(allDF['Latitude'] < maxLat)
allDF = allDF.where(allDF['Longitude'] > minLon)
allDF = allDF.where(allDF['Longitude'] < maxLon)


allDF = allDF.dropna()  # Drop shots outside of the ROI


len(allDF)

4477


# Set any poor quality returns to NaN
allDF = allDF.where(allDF['Quality Flag'].ne(0))
allDF = allDF.where(allDF['Degrade Flag'] < 1) 
allDF = allDF.where(allDF['Sensitivity'] > 0.95)
allDF = allDF.dropna()
len(allDF)

3079


# Take the lat/lon dataframe and convert each lat/lon to a shapely point
allDF['geometry'] = allDF.apply(lambda row: Point(row.Longitude, row.Latitude), axis=1)


# Convert to geodataframe
allDF = gp.GeoDataFrame(allDF)
allDF = allDF.drop(columns=['Latitude','Longitude'])


allDF['Shot Number'] = allDF['Shot Number'].astype(str)  # Convert shot number to string

vdims = []
for f in allDF:
    if f not in ['geometry']:
        vdims.append(f)

visual = pointVisual(allDF, vdims = vdims)
visual * gv.Polygons(redwoodNP['geometry']).opts(line_color='red', color=None)


# Plot the basemap and geoviews Points, defining the color as the Canopy Height for each shot
(gvts.EsriImagery * gv.Points(allDF, vdims=vdims).options(color='Canopy Height (rh100)',cmap='plasma', size=3, tools=['hover'],
                                                          clim=(0,102), colorbar=True, clabel='Meters',
                                                          title='GEDI Canopy Height over Redwood National Park: June 19, 2019',
                                                          fontsize={'xticks': 10, 'yticks': 10, 'xlabel':16, 'clabel':12,
                                                                    'cticks':10,'title':16,'ylabel':16})).options(height=500,
                                                                                                                  width=900)


(gvts.EsriImagery * gv.Points(allDF, vdims=vdims).options(color='Elevation (m)',cmap='terrain', size=3, tools=['hover'],
                                                          clim=(min(allDF['Elevation (m)']), max(allDF['Elevation (m)'])),
                                                          colorbar=True, clabel='Meters',
                                                          title='GEDI Elevation over Redwood National Park: June 19, 2019',
                                                          fontsize={'xticks': 10, 'yticks': 10, 'xlabel':16, 'clabel':12,
                                                                    'cticks':10,'title':16,'ylabel':16})).options(height=500,
                                                                                                                  width=900)


gediL2A.filename  # L2A Filename

'GEDI02_A_2019170155833_O02932_02_T02267_02_003_01_V002.h5'


outName = gediL2A.filename.replace('.h5', '.json')  # Create an output file name using the input file name
outName

'GEDI02_A_2019170155833_O02932_02_T02267_02_003_01_V002.json'


allDF.to_file(outName, driver='GeoJSON')  # Export to GeoJSON


del allDF

	Beam	Shot Number	Longitude	Latitude	Quality Flag
0	BEAM0110	29320600200419869	-142.755692	26.923896	0
1	BEAM0110	29320600200419969	-142.736567	26.943242	0
2	BEAM0110	29320600200420069	-142.717433	26.962569	0
3	BEAM0110	29320600200420169	-142.698294	26.981891	0
4	BEAM0110	29320600200420269	-142.679136	27.001187	0
...	...	...	...	...	...
1066	BEAM0110	29320600200526469	-80.198452	51.796858	0
1067	BEAM0110	29320600200526569	-80.114677	51.797029	0
1068	BEAM0110	29320600200526669	-80.032173	51.797173	0
1069	BEAM0110	29320600200526769	-79.948571	51.797245	0
1070	BEAM0110	29320600200526869	-79.865698	51.797246	0

	Amplitude (DN)	Elevation (m)
0	227.00992	111.252897
1	226.57410	111.103165
2	226.66390	110.953434
3	227.26736	110.803702
4	228.11644	110.653970
...	...	...
1241	230.74078	-74.564040
1242	230.04378	-74.713771
1243	229.24507	-74.863503
1244	228.71117	-75.013235
1245	228.53528	-75.162966

	Shot Index	Shot Number	Latitude	Longitude	Tandem-X DEM	SRTM DEM	Elevation (m)	Canopy Elevation (m)	Canopy Height (rh100)	Quality Flag	Degrade Flag	Sensitivity
0	0	29320600200419869	26.923896	-142.755692	-999999.000000	-999999.0	3693.503906	3693.503906	0.0	0	0	5.029931
1	1	29320600200419870	26.924089	-142.755501	-999999.000000	-999999.0	3693.518311	3693.518311	0.0	0	0	-4.620663
2	2	29320600200419871	26.924283	-142.755309	-999999.000000	-999999.0	3693.532715	3693.532715	0.0	0	0	6.613796
3	3	29320600200419872	26.924477	-142.755118	-999999.000000	-999999.0	3693.547119	3693.547119	0.0	0	0	-0.788731
4	4	29320600200419873	26.924671	-142.754927	-999999.000000	-999999.0	3693.561523	3693.561523	0.0	0	0	-64.327515
...	...	...	...	...	...	...	...	...	...	...	...	...
107009	107009	29320600200526878	51.797246	-79.858241	-36.021603	-999999.0	4535.905273	4535.905273	0.0	0	80	0.446188
107010	107010	29320600200526879	51.797246	-79.857414	-999999.000000	-999999.0	4543.574219	4543.574219	0.0	0	80	0.709804
107011	107011	29320600200526880	51.797245	-79.856577	-999999.000000	-999999.0	4515.902832	4515.902832	0.0	0	80	-0.816940
107012	107012	29320600200526881	51.797245	-79.855748	-999999.000000	-999999.0	4516.102051	4516.102051	0.0	0	80	-43.301651
107013	107013	29320600200526882	51.797245	-79.854919	-41.067085	-999999.0	4515.569336	4515.569336	0.0	0	80	0.408280

Getting Started with GEDI L2A Version 2 Data in Python¶

This tutorial demonstrates how to work with the Elevation and Height Metrics (GEDI02_A.002) data product.¶

Use Case Example:¶

Data Used in the Example:¶

Topics Covered:¶

Before Starting this Tutorial:¶

Setup and Dependencies¶

Having trouble getting a compatible Python environment set up? Contact LP DAAC User Services at: https://lpdaac.usgs.gov/lpdaac-contact-us/¶

Example Data:¶

A NASA Earthdata Login account is required to download the data used in this tutorial. You can create an account at the link provided.¶

You will need to have the file above downloaded into the same directory as this Jupyter Notebook in order to successfully run the code below.¶

Source Code used to Generate this Tutorial:¶

1. Get Started ¶

1.1 Import Packages ¶

Import the required packages and set the input/working directory to run this Jupyter Notebook locally.¶

1.2 Set Up the Working Environment and Retrieve Files¶

The input directory is defined as the current working directory. Note that you will need to have the Jupyter Notebook and example data (.h5 and .geojson) stored in this directory in order to execute the tutorial successfully.¶

In this section, a Version 2 GEDI L2A .h5 file has been downloaded to the inDir defined above. You will need to download the file directly from the LP DAAC Data Pool in order to execute this tutorial.¶

Direct Link to file:¶

Make sure to download the file into the inDir directory defined above.¶

2. Import and Interpret Data ¶

In this section, begin working with the GEDI Level 2A Elevation and Height Metrics Data. Begin by checking out the example L2A file metadata.¶

2.1 Open a GEDI HDF5 File and Read File Metadata ¶

Read the file using h5py.¶

The standard format for GEDI filenames is as follows:¶

Read in a GEDI HDF5 file using the h5py package.¶

Navigate the HDF5 file below.¶

The GEDI HDF5 file contains groups in which data and metadata are stored.¶

First, the METADATA group contains the file-level metadata.¶

2.2 Read SDS Metadata and Subset by Beam ¶

The GEDI instrument consists of 3 lasers producing a total of 8 beam ground transects. The eight remaining groups contain data for each of the eight GEDI beam transects. For additional information, be sure to check out: https://gedi.umd.edu/instrument/specifications/.¶

One useful piece of metadata to retrieve from each beam transect is whether it is a full power beam or a coverage beam.¶

Below, pick one of the full power beams that will be used to retrieve GEDI L2A relative height metrics in Section 3.¶

Identify all the objects in the GEDI HDF5 file below.¶

3. Visualize a GEDI Orbit ¶

In the section below, import GEDI L2A SDS layers into a GeoPandas GeoDataFrame for the beam specified above.¶

Use the lat_lowestmode and lon_lowestmode to create a shapely point for each GEDI shot location.¶

3.1 Subset by Layer and Create a Geodataframe ¶

Read in the SDS and take a representative sample (every 100th shot) and append to lists, then use the lists to generate a pandas dataframe.¶

Above is a dataframe containing columns describing the beam, shot number, lat/lon location, and quality information about each shot.¶

Below, create an additional column called 'geometry' that contains a shapely point generated from each lat/lon location from the shot.¶

Next, convert to a Geopandas GeoDataFrame.¶

Pull out and plot an example shapely point below.¶

3.2 Visualize a GeoDataFrame ¶

In this section, use the GeoDataFrame and the geoviews python package to spatially visualize the location of the GEDI shots on a basemap and import a GeoJSON file of the spatial region of interest for the use case example: Redwood National Park.¶

Import a GeoJSON of Redwood National Park as an additional GeoDataFrame. Note that you will need to have downloaded the GeoJSON from the bitbucket repo containing this tutorial and have it saved in the same directory as this Jupyter Notebook.¶

Defining the vdims below will allow you to hover over specific shots and view information about them.¶

Below, combine a plot of the Redwood National Park Boundary (combine two geoviews plots using *) with the point visual mapping function defined above in order to plot (1) the representative GEDI shots, (2) the region of interest, and (3) a basemap layer.¶

Below is a screenshot of the region of interest:¶

Side Note: Wondering what the 0's and 1's for quality_flag mean?¶

Above, 0 is poor quality and a quality_flag value of 1 indicates the laser shot meets criteria based on energy, sensitivity, amplitude, and real-time surface tracking quality. We will show an example of how to quality filter GEDI data in section 6.1.¶

After finding one of the shots within Redwood NP, find the index for that shot number so that we can find the correct RH metrics to visualize in Section 4.¶

Shot: 29320600200465601¶

4. Work with GEDI L2A Data¶

Interpretation of RH Metrics¶

The GEDI L2A data product provides relative height (RH) metrics, which are “lidar perceived” metrics that have the following characteristics:¶

In this section, import and extract a specific GEDI L2A shot. From there, plot relative height (RH) metrics, and then combine with the L1B full waveform data. Note that we will continue to use BEAM0110 from Section 3.¶

4.1 Import and Extract Specific Shots¶

Notice that there are thousands of datasets available in the GEDI L2A product. In the code blocks below, you will subset to just a few of the datasets available.¶

Begin by subsetting just to the selected full power beam:¶

We will set the shot index used as an example from the GEDI L1B Tutorial to show how to subset a single shot of GEDI L2A data.¶

4.2 Plot Relative Height Metrics¶

In section 4.2, import the relative height metrics and begin exploring how to plot them.¶

Print the description for the rh dataset to better understand relative height (RH) metrics.¶

New with Version 2:¶

The GEDI L2A product provides waveform processing results for multiple algorithm settings. Results for a default algorithm selection are provided in the root directory of the data product for each beam. In GEDI Version 1, the default for all shots was algorithm setting group 1.¶

Next, bring in other useful L2A datasets such as lat_lowestmode and lon_lowestmode.¶

Grab the location, relative height metrics, and algorithm for the shot defined above:¶

Put everything together to identify the shot that we want to extract:¶

In order to plot the RH metrics, you also need to import the elevation recorded at elev_lowestmode (the first elevation recorded) and elev_highestreturn or the last elevation recorded for that waveform.¶

Below, convert canopy height to canopy elevation by adding the ground elevation to each RH 1% interval.¶

Next, use the holoviews (hv) package to make a line plot showing the elevation at each percentile for the selected shot.¶

Congratulations! You have plotted your first relative cumulative RH profile.¶

The rh dataset stores the relative height metrics at 1% intervals, and thus each shot contains 101 values representing rh at 0-100%.¶

Next, add some additional context to the cumulative RH profile, including metrics such as the elev_lowestmode (ground elevation), the elev_highestreturn (highest reflecting surface height), and the relative height metrics at each quartile.¶

Based on the figure above, it looks like the densest portion of the tree canopy for this shot is at around 45-55 meters.¶

4.3 Combine RH Metrics and Waveforms¶

Note that you will need to have downloaded the waveform.csv file from the repo in order to execute the section below.¶

In this section, we will import the corresponding waveform for our selected shot.¶

Next, create a holoviews curve plot of the waveform. If you are interested in learning more about plotting L1B waveforms, be sure to check out the GEDI L1B Tutorial.¶

1.2 Set Up the Working Environment and Retrieve Files ¶

In this section, a Version 2 GEDI L2A .h5 file has been downloaded to the `inDir` defined above. You will need to download the file directly from the LP DAAC Data Pool in order to execute this tutorial.¶

Make sure to download the file into the `inDir` directory defined above.¶

Read the file using `h5py`.¶

Read in a GEDI HDF5 file using the `h5py` package.¶

First, the `METADATA` group contains the file-level metadata.¶

In the section below, import GEDI L2A SDS layers into a `GeoPandas` GeoDataFrame for the beam specified above.¶

Use the `lat_lowestmode` and `lon_lowestmode` to create a `shapely` point for each GEDI shot location.¶

Read in the SDS and take a representative sample (every 100th shot) and append to lists, then use the lists to generate a `pandas` dataframe.¶

Below, create an additional column called 'geometry' that contains a `shapely` point generated from each lat/lon location from the shot.¶

Next, convert to a `Geopandas` GeoDataFrame.¶

Pull out and plot an example `shapely` point below.¶

In this section, use the GeoDataFrame and the `geoviews` python package to spatially visualize the location of the GEDI shots on a basemap and import a GeoJSON file of the spatial region of interest for the use case example: Redwood National Park.¶

Below, combine a plot of the Redwood National Park Boundary (combine two `geoviews` plots using `*`) with the point visual mapping function defined above in order to plot (1) the representative GEDI shots, (2) the region of interest, and (3) a basemap layer.¶

Side Note: Wondering what the 0's and 1's for `quality_flag` mean?¶

4. Work with GEDI L2A Data ¶

In this section, import and extract a specific GEDI L2A shot. From there, plot relative height (RH) metrics, and then combine with the L1B full waveform data. Note that we will continue to use `BEAM0110` from Section 3.¶

Print the description for the `rh` dataset to better understand relative height (RH) metrics.¶

Next, bring in other useful L2A datasets such as `lat_lowestmode` and `lon_lowestmode`.¶

In order to plot the RH metrics, you also need to import the elevation recorded at `elev_lowestmode` (the first elevation recorded) and `elev_highestreturn` or the last elevation recorded for that waveform.¶

Next, use the `holoviews` (hv) package to make a line plot showing the elevation at each percentile for the selected shot.¶

The `rh` dataset stores the relative height metrics at 1% intervals, and thus each shot contains 101 values representing rh at 0-100%.¶

Next, add some additional context to the cumulative RH profile, including metrics such as the `elev_lowestmode` (ground elevation), the `elev_highestreturn` (highest reflecting surface height), and the relative height metrics at each quartile.¶

Next, create a `holoviews` curve plot of the waveform. If you are interested in learning more about plotting L1B waveforms, be sure to check out the GEDI L1B Tutorial.¶

Next, select the datasets for Algorithm Setting Group 3 (`a3`).¶

Next, import a number of desired SDS layers for BEAM0110 (for the entire orbit) and create a `pandas` Dataframe to store the arrays.¶

Now that you have the desired SDS into a `pandas` dataframe, begin plotting the entire beam transect:¶

Below, remove any shots where the `quality_flag` is set to 0 by defining those shots as `nan`.¶

The syntax of the line below can be read as: in the dataframe, find the rows "where" the quality flag is not equal (ne) to 0. If a row (shot) does not meet the condition, set all values equal to `nan` for that row.¶

Below, quality filter even further by using the `degrade_flag` (Greater than zero if the shot occurs during a degrade period, zero otherwise) and the `Sensitivity` layer, using a threshold of 0.95.¶

Below, drop all of the shots that did not pass the quality filtering standards outlined above from the `transectDF`.¶

Next, plot the full remaining transect of high quality values using `holoviews` Scatter(). Combine the Tandem-X derived elevation, SRTM elevation (New to Version 2!), the GEDI-derived elevation, and the Canopy Top Elevation in a combined holoviews plot.¶

Below, subset the transect using `.loc`.¶

6. Spatial Visualization ¶

Loop through each of the desired datasets (SDS) for each beam, append to lists, and transform into a `pandas` DataFrame.¶

Down to roughly 2000 shots, next create a `Shapely` Point out of each shot and insert it as the geometry column in the [soon to be geo]dataframe.¶

Now, using the `pointVisual` function defined in section 3.2, plot the `geopandas` GeoDataFrame using `geoviews`.¶

7. Export Subsets as GeoJSON Files ¶

In this section, export the GeoDataFrame as a `.geojson` file that can be easily opened in your favorite remote sensing and/or GIS software and will include an attribute table with all of the shots/values for each of the SDS layers in the dataframe.¶