import os
import h5py
import numpy as np
import pandas as pd
import geopandas as gp
from shapely.geometry import Point
import geoviews as gv
from geoviews import opts, tile_sources as gvts
import holoviews as hv
gv.extension('bokeh', 'matplotlib')


inDir = os.getcwd() + os.sep  # Set input directory to the current working directory


gediFiles = [g for g in os.listdir() if g.startswith('GEDI01_B') and g.endswith('.h5')]  # List all GEDI L1B .h5 files in inDir
gediFiles

['GEDI01_B_2019170155833_O02932_T02267_02_003_01.h5']


L1B = 'GEDI01_B_2019170155833_O02932_T02267_02_003_01.h5'
L1B

'GEDI01_B_2019170155833_O02932_T02267_02_003_01.h5'


gediL1B = h5py.File(L1B, 'r')  # Read file using h5py


list(gediL1B.keys())

['BEAM0000',
 'BEAM0001',
 'BEAM0010',
 'BEAM0011',
 'BEAM0101',
 'BEAM0110',
 'BEAM1000',
 'BEAM1011',
 'METADATA']


list(gediL1B['METADATA'])

['DatasetIdentification']


for g in gediL1B['METADATA']['DatasetIdentification'].attrs: print(g)

PGEVersion
VersionID
abstract
characterSet
creationDate
credit
fileName
language
originatorOrganizationName
purpose
shortName
spatialRepresentationType
status
topicCategory
uuid


print(gediL1B['METADATA']['DatasetIdentification'].attrs['purpose'])

The purpose of the L1B dataset is to provide geolocated waveforms and supporting datasets for each laser shot for all eight GEDI beams.  This includes corrected and smoothed waveforms, geolocation parameters, and geophysical corrections.


beamNames = [g for g in gediL1B.keys() if g.startswith('BEAM')]
beamNames

['BEAM0000',
 'BEAM0001',
 'BEAM0010',
 'BEAM0011',
 'BEAM0101',
 'BEAM0110',
 'BEAM1000',
 'BEAM1011']


for g in gediL1B['BEAM0000'].attrs: print(g)

description
wp-l1a-to-l1b_githash
wp-l1a-to-l1b_version


for b in beamNames: 
    print(f"{b} is a {gediL1B[b].attrs['description']}")

BEAM0000 is a Coverage beam
BEAM0001 is a Coverage beam
BEAM0010 is a Coverage beam
BEAM0011 is a Coverage beam
BEAM0101 is a Full power beam
BEAM0110 is a Full power beam
BEAM1000 is a Full power beam
BEAM1011 is a Full power beam


beamNames = ['BEAM0110']


gediL1B_objs = []
gediL1B.visit(gediL1B_objs.append)                                           # Retrieve list of datasets
gediSDS = [o for o in gediL1B_objs if isinstance(gediL1B[o], h5py.Dataset)]  # Search for relevant SDS inside data file
[i for i in gediSDS if beamNames[0] in i][0:10]                              # Print the first 10 datasets for selected beam

['BEAM0110/all_samples_sum',
 'BEAM0110/ancillary/master_time_epoch',
 'BEAM0110/ancillary/mean_samples',
 'BEAM0110/ancillary/smoothing_width',
 'BEAM0110/beam',
 'BEAM0110/channel',
 'BEAM0110/geolocation/altitude_instrument',
 'BEAM0110/geolocation/altitude_instrument_error',
 'BEAM0110/geolocation/bounce_time_offset_bin0',
 'BEAM0110/geolocation/bounce_time_offset_bin0_error']


lonSample, latSample, shotSample, srfSample, degradeSample, beamSample = [], [], [], [], [], []  # Set up lists to store data

# Open the SDS
lats = gediL1B[f'{beamNames[0]}/geolocation/latitude_bin0'][()]
lons = gediL1B[f'{beamNames[0]}/geolocation/longitude_bin0'][()]
shots = gediL1B[f'{beamNames[0]}/shot_number'][()]
srf = gediL1B[f'{beamNames[0]}/stale_return_flag'][()]
degrade = gediL1B[f'{beamNames[0]}/geolocation/degrade'][()]

# Take every 100th shot and append to list
for i in range(len(shots)):
    if i % 100 == 0:
        shotSample.append(str(shots[i]))
        lonSample.append(lons[i])
        latSample.append(lats[i])
        srfSample.append(srf[i])
        degradeSample.append(degrade[i])
        beamSample.append(beamNames[0])
            
# Write all of the sample shots to a dataframe
latslons = pd.DataFrame({'Beam': beamSample, 'Shot Number': shotSample, 'Longitude': lonSample, 'Latitude': latSample,
                         'Stale Return Flag': srfSample, 'Degrade': degradeSample})
latslons


# Clean up variables that will no longer be needed
del beamSample, degrade, degradeSample, gediL1B_objs, latSample, lats, lonSample, lons, shotSample, shots, srf, srfSample


# Take the lat/lon dataframe and convert each lat/lon to a shapely point
latslons['geometry'] = latslons.apply(lambda row: Point(row.Longitude, row.Latitude), axis=1)


# Convert to a Geodataframe
latslons = gp.GeoDataFrame(latslons)
latslons = latslons.drop(columns=['Latitude','Longitude'])
latslons['geometry']

0       POINT (111.99630 -51.80387)
1       POINT (112.03913 -51.80391)
2       POINT (112.08027 -51.80384)
3       POINT (112.12145 -51.80374)
4       POINT (112.16262 -51.80362)
                   ...             
9792     POINT (88.20845 -51.80358)
9793     POINT (88.24961 -51.80361)
9794     POINT (88.29075 -51.80358)
9795     POINT (88.33191 -51.80355)
9796     POINT (88.37309 -51.80351)
Name: geometry, Length: 9797, dtype: geometry


latslons['geometry'][0]


# Define a function for visualizing GEDI points
def pointVisual(features, vdims):
    return (gvts.EsriImagery * gv.Points(features, vdims=vdims).options(tools=['hover'], height=500, width=900, size=5, 
                                                                        color='yellow', fontsize={'xticks': 10, 'yticks': 10, 
                                                                                                  'xlabel':16, 'ylabel': 16}))


redwoodNP = gp.GeoDataFrame.from_file('RedwoodNP.geojson')  # Import geojson as GeoDataFrame


redwoodNP


redwoodNP['geometry'][0]  # Plot GeoDataFrame


# Create a list of geodataframe columns to be included as attributes in the output map
vdims = []
for f in latslons:
    if f not in ['geometry']:
        vdims.append(f)
vdims

['Beam', 'Shot Number', 'Stale Return Flag', 'Degrade']


# Call the function for plotting the GEDI points
gv.Polygons(redwoodNP['geometry']).opts(line_color='red', color=None) * pointVisual(latslons, vdims = vdims)


print(f"stale_return_flag: {gediL1B[b]['stale_return_flag'].attrs['description']}")
print(f"degrade: {gediL1B[b]['geolocation']['degrade'].attrs['description']}")

stale_return_flag: Indicates that a "stale" cue point from the coarse search algorithm is being used.
degrade: Greater than zero if the shot occurs during a degrade period, zero otherwise.


shot = 29320619900465601


index = np.where(gediL1B[f'{beamNames[0]}/shot_number'][()]==shot)[0][0]  # Set the index for the shot identified above
index

465600


del latslons  # No longer need the geodataframe used to visualize the full GEDI orbit


# From the SDS list, use list comprehension to find sample_count, sample_start_index, and rxwaveform
sdsCount = gediL1B[[g for g in gediSDS if g.endswith('/rx_sample_count') and beamNames[0] in g][0]]
sdsStart = gediL1B[[g for g in gediSDS if g.endswith('/rx_sample_start_index') and beamNames[0] in g][0]]
sdsWaveform = [g for g in gediSDS if g.endswith('/rxwaveform') and beamNames[0] in g][0]


print(f"rxwaveform is {gediL1B[sdsWaveform].attrs['description']}")

rxwaveform is The corrected receive (RX) waveforms.  Use rx_sample_count and rx_sample_start_index to identify the location of each waveform.


print(f"rx_sample_count is {sdsCount.attrs['description']}")
print(f"rx_sample_start_index is {sdsStart.attrs['description']}")

rx_sample_count is The number of sample intervals (elements) in each RX waveform.
rx_sample_start_index is The index in the rxwaveform dataset of the first element of each RX waveform.  The indices start at 1.


wfCount = sdsCount[index]           # Number of samples in the waveform
wfStart = int(sdsStart[index] - 1)  # Subtract one because python array indexing starts at 0 not 1


wfShot = gediL1B[f'{beamNames[0]}/shot_number'][index]
wfLat = gediL1B[f'{beamNames[0]}/geolocation/latitude_bin0'][index]
wfLon = gediL1B[f'{beamNames[0]}/geolocation/longitude_bin0'][index]


print(f"The waveform located at: {str(wfLat)}, {str(wfLon)} (shot ID: {wfShot}, index {index}) is from beam {beamNames[0]} \
      and is stored in rxwaveform beginning at index {wfStart} and ending at index {wfStart + wfCount}")

The waveform located at: 41.28533109208681, -124.0300090357876 (shot ID: 29320619900465601, index 465600) is from beam BEAM0110       and is stored in rxwaveform beginning at index 661152000 and ending at index 661152812


# Grab the elevation recorded at the start and end of the full waveform capture
zStart = gediL1B[f'{beamNames[0]}/geolocation/elevation_bin0'][index]   # Height of the start of the rx window
zEnd = gediL1B[f'{beamNames[0]}/geolocation/elevation_lastbin'][index]  # Height of the end of the rx window


print("{:,}".format(gediL1B[sdsWaveform].shape[0]))

1,391,172,580


# Retrieve the waveform sds layer using the sample start index and sample count information to slice the correct dimensions
waveform = gediL1B[sdsWaveform][wfStart: wfStart + wfCount]


# Find elevation difference from start to finish and divide into equal intervals based on sample_count
zStretch = np.add(zEnd, np.multiply(range(wfCount, 0, -1), ((zStart - zEnd) / int(wfCount))))


# match the waveform amplitude values with the elevation and convert to Pandas df
wvDF = pd.DataFrame({'Amplitude (DN)': waveform, 'Elevation (m)': zStretch})
wvDF


hv.Curve(wvDF) # Basic line graph plotting the waveform


# Create a holoviews interactive Curve plot with additional parameters defining the plot aesthetics 
wfVis = hv.Curve(wvDF).opts(color='darkgreen', tools=['hover'], height=500, width=400,
           xlim=(np.min(waveform) - 10, np.max(waveform) + 10), ylim=(np.min(zStretch), np.max(zStretch)),
           fontsize={'xticks':10, 'yticks':10,'xlabel':16, 'ylabel': 16, 'title':13}, line_width=2.5, title=f'{str(wfShot)}')
wfVis


latlons = {}                          # Set up a dictionary to hold multiple waveforms
latlons[wfShot] = Point(wfLon,wfLat)  # Create shapely point and add to dictionary

# Retain waveform and quality layers to be exported later
latlonsWF = [waveform]
latlonsEL = [zStretch]
latlonsSRF = [gediL1B[f'{beamNames[0]}/stale_return_flag'][index]]
latlonsD = [gediL1B[f'{beamNames[0]}/geolocation/degrade'][index]]


# Define which observation to examine by selecting the three indexes prior to the one used above
index = 465597


# Use rx_sample_count and rx_sample_start_index to identify the location of each waveform
wfCount = sdsCount[index]                                             # Number of samples in the waveform
wfStart = int(sdsStart[index] - 1)                                    # Subtract 1, index starts at 0 not 1
wfShot = gediL1B[f'{beamNames[0]}/shot_number'][index]                # Unique Shot Number
wfLat = gediL1B[f'{beamNames[0]}/geolocation/latitude_bin0'][index]   # Latitude
wfLon = gediL1B[f'{beamNames[0]}/geolocation/longitude_bin0'][index]  # Longitude
latlons[wfShot] = Point(wfLon,wfLat)                                  # Create shapely point and add to dictionary

# Grab the elevation recorded at the start and end of the full waveform capture
zStart = gediL1B[f'{beamNames[0]}/geolocation/elevation_bin0'][index]   # Height of the start of the rx window
zEnd = gediL1B[f'{beamNames[0]}/geolocation/elevation_lastbin'][index]  # Height of the end of the rx window

# Retrieve the waveform sds layer using the sample start index and sample count information to slice the correct dimensions
waveform = gediL1B[sdsWaveform][wfStart: wfStart + wfCount]


# Find elevation difference from start to finish, divide into equal intervals
zStretch = np.add(zEnd, np.multiply(range(wfCount, 0, -1), ((zStart - zEnd) / int(wfCount))))

# match the waveform amplitude values with the elevation and convert to Pandas df
wvDF = pd.DataFrame({'Amplitude (DN)': waveform, 'Elevation (m)': zStretch})


# Append waveform and quality layers to be exported later
latlonsWF.append(waveform)
latlonsEL.append(zStretch)
latlonsSRF.append(gediL1B[f'{beamNames[0]}/stale_return_flag'][index])
latlonsD.append(gediL1B[f'{beamNames[0]}/geolocation/degrade'][index])


# Create a holoviews interactive Curve plot with additional parameters defining the plot aesthetics 
visL1B1 = hv.Curve(wvDF).opts(color='darkgreen', tools=['hover'], height=500, width=400,
           xlim=(np.min(waveform) - 10, np.max(waveform) + 10), ylim=(np.min(zStretch), np.max(zStretch)),
           fontsize={'xticks':10, 'yticks':10,'xlabel':16, 'ylabel': 16, 'title':13}, line_width=2.5, title=f'{str(wfShot)}')
visL1B1


# Define which observation to examine by selecting the three indexes prior to the one used above
index = 465598


# Use rx_sample_count and rx_sample_start_index to identify the location of each waveform
wfCount = sdsCount[index]                                             # Number of samples in the waveform
wfStart = int(sdsStart[index] - 1)                                    # Subtract 1, index starts at 0 not 1
wfShot = gediL1B[f'{beamNames[0]}/shot_number'][index]                # Unique Shot Number
wfLat = gediL1B[f'{beamNames[0]}/geolocation/latitude_bin0'][index]   # Latitude
wfLon = gediL1B[f'{beamNames[0]}/geolocation/longitude_bin0'][index]  # Longitude
latlons[wfShot] = Point(wfLon,wfLat)                                  # Create shapely point and add to dictionary

# Grab the elevation recorded at the start and end of the full waveform capture
zStart = gediL1B[f'{beamNames[0]}/geolocation/elevation_bin0'][index]   # Height of the start of the rx window
zEnd = gediL1B[f'{beamNames[0]}/geolocation/elevation_lastbin'][index]  # Height of the end of the rx window

# Retrieve the waveform sds layer using the sample start index and sample count information to slice the correct dimensions
waveform = gediL1B[sdsWaveform][wfStart: wfStart + wfCount]


# Find elevation difference from start to finish, divide into equal intervals
zStretch = np.add(zEnd, np.multiply(range(wfCount, 0, -1), ((zStart - zEnd) / int(wfCount))))

# match the waveform amplitude values with the elevation and convert to Pandas df
wvDF = pd.DataFrame({'Amplitude (DN)': waveform, 'Elevation (m)': zStretch})


# Append waveform and quality layers to be exported later
latlonsWF.append(waveform)
latlonsEL.append(zStretch)
latlonsSRF.append(gediL1B[f'{beamNames[0]}/stale_return_flag'][index])
latlonsD.append(gediL1B[f'{beamNames[0]}/geolocation/degrade'][index])


# Create a holoviews interactive Curve plot with additional parameters defining the plot aesthetics 
visL1B2 = hv.Curve(wvDF).opts(color='darkgreen', tools=['hover'], height=500, width=400,
           xlim=(np.min(waveform) - 10, np.max(waveform) + 10), ylim=(np.min(zStretch), np.max(zStretch)),
           fontsize={'xticks':10, 'yticks':10,'xlabel':16, 'ylabel': 16, 'title':13}, line_width=2.5, title=f'{str(wfShot)}')
visL1B2


# Define which observation to examine by selecting the three indexes prior to the one used above
index = 465599


# Use rx_sample_count and rx_sample_start_index to identify the location of each waveform
wfCount = sdsCount[index]                                             # Number of samples in the waveform
wfStart = int(sdsStart[index] - 1)                                    # Subtract 1, index starts at 0 not 1
wfShot = gediL1B[f'{beamNames[0]}/shot_number'][index]                # Unique Shot Number
wfLat = gediL1B[f'{beamNames[0]}/geolocation/latitude_bin0'][index]   # Latitude
wfLon = gediL1B[f'{beamNames[0]}/geolocation/longitude_bin0'][index]  # Longitude
latlons[wfShot] = Point(wfLon,wfLat)                                  # Create shapely point and add to dictionary

# Grab the elevation recorded at the start and end of the full waveform capture
zStart = gediL1B[f'{beamNames[0]}/geolocation/elevation_bin0'][index]   # Height of the start of the rx window
zEnd = gediL1B[f'{beamNames[0]}/geolocation/elevation_lastbin'][index]  # Height of the end of the rx window

# Retrieve the waveform sds layer using the sample start index and sample count information to slice the correct dimensions
waveform = gediL1B[sdsWaveform][wfStart: wfStart + wfCount]


# Find elevation difference from start to finish, divide into equal intervals
zStretch = np.add(zEnd, np.multiply(range(wfCount, 0, -1), ((zStart - zEnd) / int(wfCount))))

# match the waveform amplitude values with the elevation and convert to Pandas df
wvDF = pd.DataFrame({'Amplitude (DN)': waveform, 'Elevation (m)': zStretch})


# Append waveform and quality layers to be exported later
latlonsWF.append(waveform)
latlonsEL.append(zStretch)
latlonsSRF.append(gediL1B[f'{beamNames[0]}/stale_return_flag'][index])
latlonsD.append(gediL1B[f'{beamNames[0]}/geolocation/degrade'][index])


# Create a holoviews interactive Curve plot with additional parameters defining the plot aesthetics 
visL1B3 = hv.Curve(wvDF).opts(color='darkgreen', tools=['hover'], height=500, width=400,
           xlim=(np.min(waveform) - 10, np.max(waveform) + 10), ylim=(np.min(zStretch), np.max(zStretch)),
           fontsize={'xticks':10, 'yticks':10,'xlabel':16, 'ylabel': 16, 'title':13}, line_width=2.5, title=f'{str(wfShot)}')
visL1B3


# The "+" symbol will plot multiple saved holoviews plots together
visL1B1.opts(width=240) + visL1B2.opts(width=240, labelled=[]) + visL1B3.opts(width=240, labelled=[]) + \
wfVis.opts(width=240, labelled=[])


# Convert dict to geodataframe
latlons = gp.GeoDataFrame({'Shot Number': list(latlons.keys()),'rxwaveform Amplitude (DN)': latlonsWF,
                           'rxwaveform Elevation (m)': latlonsEL, 'Stale Return Flag': latlonsSRF, 'Degrade': latlonsD,
                           'geometry': list(latlons.values())})


latlons['Shot Number'] = latlons['Shot Number'].astype(str)  # Convert shot number from integer to string


vdims

['Beam', 'Shot Number', 'Stale Return Flag', 'Degrade']


# Create a list of geodataframe columns to be included as attributes in the output map
vdims = []
for f in latlons:
    if f not in ['geometry']:
        if 'rxwaveform' not in f:
            vdims.append(f)

# Plot the geodataframe
gv.Polygons(redwoodNP['geometry']).opts(line_color='red', color=None) * pointVisual(latlons, vdims=vdims)


del wfVis, visL1B1, visL1B2, visL1B3, waveform, wvDF, zStretch, latlonsWF, latlonsEL, latlonsSRF, latlonsD


latlons = latlons.where(latlons['Stale Return Flag'].ne(1))  # Set any stale returns to NaN


latlons = latlons.where(latlons['Degrade'].ne(1))


latlons = latlons.dropna()  # Drop all of the rows (shots) that did not pass the quality filtering above


print(f"Quality filtering complete, {len(latlons)} high quality shots remaining.")

Quality filtering complete, 4 high quality shots remaining.


del latlons


print(index)

465599


# Grab 50 points before and after the shot visualized above
start = index - 50
end = index + 50 
transectIndex = np.arange(start, end, 1)


# Calculate along-track distance
distance = np.arange(0.0, len(transectIndex) * 60, 60)  # GEDI Shots are spaced 60 m apart


# Create a list of tuples containing Shot number, and each waveform value at height (z)
wfList = []
for s, i in enumerate(transectIndex):
    # Basic quality filtering from section 5
    if gediL1B['BEAM0110/geolocation/degrade'][i] == 0 and gediL1B['BEAM0110/stale_return_flag'][i] == 0:
        zStart = gediL1B['BEAM0110/geolocation/elevation_bin0'][i]
        zEnd = gediL1B['BEAM0110/geolocation/elevation_lastbin'][i]
        zCount = sdsCount[i]
        zStretch = np.add(zEnd, np.multiply(range(zCount, 0, -1), ((zStart - zEnd) / int(zCount))))
        waveform = gediL1B[sdsWaveform][sdsStart[i]: sdsStart[i] + zCount]
        waves = []
        for z, w in enumerate(waveform):
            waves.append((distance[s], zStretch[z],w))  # Append Distance (x), waveform elevation (y) and waveform amplitude (z)
        wfList.append(waves)
    else:
        print(f"Shot {s} did not pass quality filtering and will be excluded.")


import warnings
warnings.filterwarnings('ignore')
# Plot the waveform values for the transect with the waveform amplitude being plotted as the 'c' or colormap
l1bVis = hv.Path(wfList, vdims='Amplitude').options(color='Amplitude', clim=(200,405), cmap='Greens', line_width=20, width=950,
                                                    height=500, clabel='Amplitude',colorbar=True, 
                                                    xlabel='Distance Along Transect (m)', ylabel='Elevation (m)', 
                                                    fontsize={'title':16, 'xlabel':16, 'ylabel': 16, 'xticks':12, 'yticks':12, 
                                                              'clabel':12, 'cticks':10}, colorbar_position='right',
                                                    title='GEDI L1B Waveforms across Redwood National Park: June 19, 2019')
l1bVis


del waveform, waves, wfList, zStretch


beamNames = [g for g in gediL1B.keys() if g.startswith('BEAM')]


beamNames

['BEAM0000',
 'BEAM0001',
 'BEAM0010',
 'BEAM0011',
 'BEAM0101',
 'BEAM0110',
 'BEAM1000',
 'BEAM1011']


# Set up lists to store data
shotNum, dem, zElevation, zHigh, zLat, zLon, srf, degrade, beamI, rxCount, rxStart = ([] for i in range(11))


# Loop through each beam and open the SDS needed
for b in beamNames:
    [shotNum.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/shot_number') and b in g][0]][()]]
    [dem.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/digital_elevation_model') and b in g][0]][()]]
    [zElevation.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/elevation_bin0') and b in g][0]][()]]  
    [zHigh.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/elevation_lastbin') and b in g][0]][()]]  
    [zLat.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/latitude_bin0') and b in g][0]][()]]  
    [zLon.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/longitude_bin0') and b in g][0]][()]]  
    [srf.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/stale_return_flag') and b in g][0]][()]]  
    [degrade.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/degrade') and b in g][0]][()]]  
    [rxCount.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/rx_sample_count') and b in g][0]][()]]  
    [rxStart.append(h) for h in gediL1B[[g for g in gediSDS if g.endswith('/rx_sample_start_index') and b in g][0]][()]]  
    [beamI.append(h) for h in [b] * len(gediL1B[[g for g in gediSDS if g.endswith('/shot_number') and b in g][0]][()])]


# Convert lists to Pandas dataframe
allDF = pd.DataFrame({'Shot Number': shotNum, 'Beam': beamI, 'Latitude': zLat, 'Longitude': zLon, 'Tandem-X DEM': dem,
                      'Elevation bin 0 (m)': zElevation, 'Elevation last bin (m)': zHigh, 'Stale Return Flag': srf,
                      'Degrade Flag': degrade, 'rx Start Index': rxStart, 'rx Sample Count': rxCount})


del shotNum, dem, zElevation, zHigh, zLat, zLon, srf, degrade, beamI


len(allDF)

3547051


redwoodNP.envelope[0].bounds

(-124.16015705494489,
 41.080601363502545,
 -123.84950230520286,
 41.83981133687605)


minLon, minLat, maxLon, maxLat = redwoodNP.envelope[0].bounds  # Define the min/max lat/lon from the bounds of Redwood NP


allDF = allDF.where(allDF['Latitude'] > minLat)
allDF = allDF.where(allDF['Latitude'] < maxLat)
allDF = allDF.where(allDF['Longitude'] > minLon)
allDF = allDF.where(allDF['Longitude'] < maxLon)


allDF = allDF.dropna()  # Drop shots outside of the ROI


len(allDF)

4477


# Set any poor quality returns to NaN
allDF = allDF.where(allDF['Stale Return Flag'].ne(1))
allDF = allDF.where(allDF['Degrade Flag'].ne(1))
allDF = allDF.dropna()
len(allDF)

4475


# Take the lat/lon dataframe and convert each lat/lon to a shapely point
allDF['geometry'] = allDF.apply(lambda row: Point(row.Longitude, row.Latitude), axis=1)


# Convert to geodataframe
allDF = gp.GeoDataFrame(allDF)
allDF = allDF.drop(columns=['Latitude','Longitude'])


allDF['Shot Number'] = allDF['Shot Number'].astype(str)  # Convert shot number to string

vdims = []
for f in allDF:
    if f not in ['geometry']:
        vdims.append(f)

visual = pointVisual(allDF, vdims = vdims)
visual * gv.Polygons(redwoodNP['geometry']).opts(line_color='red', color=None)


(gvts.EsriImagery * gv.Points(allDF, vdims=vdims).options(color='Elevation bin 0 (m)',cmap='terrain', size=3, tools=['hover'],
                                                          clim=(min(allDF['Elevation bin 0 (m)']), 
                                                          max(allDF['Elevation bin 0 (m)'])), colorbar=True, clabel='Meters',
                                                          title='GEDI Elevation bin0 over Redwood National Park: June 19, 2019',
                                                          fontsize={'xticks': 10, 'yticks': 10, 'xlabel':16, 'clabel':12,
                                                                    'cticks':10,'title':16,'ylabel':16})).options(height=500,
                                                                                                                  width=900)


waves = []  # List to store each list of waveform values

# Loop through each row (shot) in the data frame, extract waveform values, append to list of lists
for _, shot in allDF.iterrows():
    starts = int(shot['rx Start Index'])
    wave = gediL1B[f"{shot['Beam']}/rxwaveform"][starts: starts + int(shot['rx Sample Count'])]
    
    # In order to export as geojson, the type needs to be converted from a numpy array to a single comma separated string
    waves.append(','.join([str(q) for q in wave]))
    
# Add to data frame
allDF['rx Waveform'] = waves


gediL1B.filename  # L1B Filename

'GEDI01_B_2019170155833_O02932_T02267_02_003_01.h5'


outName = gediL1B.filename.replace('.h5', '.json')  # Create an output file name using the input file name
outName

'GEDI01_B_2019170155833_O02932_T02267_02_003_01.json'


allDF.to_file(outName, driver='GeoJSON')  # Export to GeoJSON


del allDF, waves

	Beam	Shot Number	Longitude	Latitude	Stale Return Flag	Degrade
0	BEAM0110	29320618800000001	111.996300	-51.803868	1	0
1	BEAM0110	29320604600000101	112.039132	-51.803905	1	0
2	BEAM0110	29320614600000201	112.080271	-51.803836	1	0
3	BEAM0110	29320600400000301	112.121445	-51.803737	1	0
4	BEAM0110	29320610400000401	112.162622	-51.803621	1	0
...	...	...	...	...	...	...
9792	BEAM0110	29320617400979201	88.208452	-51.803578	1	0
9793	BEAM0110	29320603200979301	88.249610	-51.803614	1	0
9794	BEAM0110	29320613200979401	88.290753	-51.803581	1	0
9795	BEAM0110	29320623200979501	88.331913	-51.803548	1	0
9796	BEAM0110	29320609000979601	88.373089	-51.803506	1	0

	Amplitude (DN)	Elevation (m)
0	226.009323	37.228645
1	225.360245	37.078965
2	225.373764	36.929286
3	226.165710	36.779607
4	227.480103	36.629927
...	...	...
807	231.473923	-83.562517
808	230.386932	-83.712196
809	228.637512	-83.861876
810	226.949661	-84.011555
811	226.125153	-84.161234

Getting Started with GEDI L1B Data in Python¶

This tutorial demonstrates how to work with the Geolocated Waveform (GEDI01_B.001) data product.¶

Use Case Example:¶

Data Used in the Example:¶

Topics Covered:¶

Before Starting this Tutorial:¶

Setup and Dependencies¶

Having trouble getting a compatible Python environment set up? Contact LP DAAC User Services at: https://lpdaac.usgs.gov/lpdaac-contact-us/¶

Example Data:¶

A NASA Earthdata Login account is required to download the data used in this tutorial. You can create an account at the link provided.¶

You will need to have the file above downloaded into the same directory as this Jupyter Notebook in order to successfully run the code below.¶

Source Code used to Generate this Tutorial:¶

1. Get Started ¶

1.1 Import Packages ¶

Import the required packages and set the input/working directory to run this Jupyter Notebook locally.¶

1.2 Set Up the Working Environment and Retrieve Files¶

The input directory is defined as the current working directory. Note that you will need to have the jupyter notebook and example data (.h5 and .geojson) stored in this directory in order to execute the tutorial successfully.¶

In this section, the GEDI .h5 file has been downloaded to the inDir defined above. You will need to download the file directly from the LP DAAC Data Pool in order to execute this tutorial.¶

Direct Link to file:¶

2. Import and Interpret Data ¶

2.1 Open a GEDI HDF5 File and Read File Metadata ¶

Read the file using h5py.¶

The standard format for GEDI filenames is as follows:¶

Read in a GEDI HDF5 file using the h5py package.¶

Navigate the HDF5 file below.¶

The GEDI HDF5 file contains groups in which data and metadata are stored.¶

First, the METADATA group contains the file-level metadata.¶

2.2 Read SDS Metadata and Subset by Beam ¶

The GEDI instrument consists of 3 lasers producing a total of 8 beam ground transects. The eight remaining groups contain data for each of the eight GEDI beam transects. For additional information, be sure to check out: https://gedi.umd.edu/instrument/specifications/.¶

One useful piece of metadata to retrieve from each beam transect is whether it is a full power beam or a coverage beam.¶

Below, pick one of the full power beams that will be used to retrieve GEDI L1B waveforms in Section 3.¶

Identify all the objects in the GEDI HDF5 file below.¶

3. Visualize a GEDI Orbit ¶

In the section below, import GEDI L1B SDS layers into a GeoPandas GeoDataFrame for the beam specified above.¶

Use the latitude_bin0 and longitude_bin0 to create a shapely point for each GEDI shot location.¶

3.1 Subset by Layer and Create a Geodataframe ¶

Read in the SDS and take a representative sample (every 100th shot) and append to lists, then use the lists to generate a pandas dataframe.¶

Above is a dataframe containing columns describing the beam, shot number, lat/lon location, and quality information about each shot.¶

Below, create an additional column called 'geometry' that contains a shapely point generated from each lat/lon location from the shot.¶

Next, convert to a Geopandas GeoDataFrame.¶

Pull out and plot an example shapely point below.¶

3.2 Visualize a GeoDataFrame ¶

In this section, use the GeoDataFrame and the geoviews python package to spatially visualize the location of the GEDI shots on a basemap and import a geojson file of the spatial region of interest for the use case example: Redwood National Park.¶

Import a geojson of Redwood National Park as an additional GeoDataFrame. Note that you will need to have downloaded the geojson from the bitbucket repo containing this tutorial and have it saved in the same directory as this Jupyter Notebook.¶

Defining the vdims below will allow you to hover over specific shots and view information about them.¶

Below, combine a plot of the Redwood National Park Boundary (combine two geoviews plots using *) with the point visual mapping function defined above in order to plot (1) the representative GEDI shots, (2) the region of interest, and (3) a basemap layer.¶

Above is a good illustration of the full GEDI orbit (GEDI files are stored as one ISS orbit). One of the benefits of using geoviews is the interactive nature of the output plots. Use the tools to the right of the map above to zoom in and find the shots intersecting Redwood National Park.¶

Below is a screenshot of the region of interest:¶

Side Note: Wondering what the 0's and 1's for stale_return_flag and degrade mean?¶

We will show an example of how to quality filter GEDI data in section 5.¶

After finding one of the shots within Redwood NP, find the index for that shot number so that we can find the correct waveform to visualize in Section 4.¶

Shot: 29320619900465601¶

4. Subset and Visualize Waveforms ¶

In this section, learn how to extract and subset specific waveforms and plot them using holoviews.¶

4.1 Import and Extract Waveforms¶

Print the description for each of these datasets to better understand how we will use them to extract specific waveforms.¶

Next, read how to use the rx_sample_count and rx_sample_start_index layers:¶

Use rx_sample_count and rx_sample_start_index to identify the location of each waveform in rxwaveform.¶

Next grab additional information about the shot, including the unique shot_number, and lat/lon location.¶

Put everything together to identify the waveform we want to extract:¶

In order to plot a full waveform, you also need to import the elevation recorded at bin0 (the first elevation recorded) and lastbin or the last elevation recorded for that waveform.¶

Extract the full waveform using the index start and count:¶

Below you can see why it is important to extract the specific waveform that you are interested in: almost 1.4 billion values are stored in the rxwaveform dataset!¶

4.2 Visualize Waveforms¶

Below, plot the extracted waveform using the elevation difference from bin0 to last_bin on the y axis, and the waveform energy returned or amplitude (digital number = dn) on the x axis.¶

Next, use the Holoviews python package (hv) to plot the waveform.¶

Congratulations! You have plotted your first waveform.¶

Above is a basic line plot showing amplitude (DN) as a function of elevation from the rxwaveform for the specific shot selected. Next, add additional chart elements and make the graph interactive.¶

As you can see, the selected shot does not look so interesting--this waveform shape looks more characteristic of some type of low canopy/bare ground than a tree canopy. If you zoom in to the GEDI shot it appears to be over the river itself or possibly a sand bar:¶

Next, plot a couple more waveforms and see if you can capture a shot over the forest.¶

Now that is starting to look more like a very tall, multi-layered tree canopy! Continue with the next shot:¶

Lastly, plot the transect of four consecutive waveforms:¶

Notice above moving west to east (left to right) along the transect you can see the elevation lowering and the tree canopy decreasing as it encounters the sandbar/river.¶

Below, use geoviews to plot the location of the four points to verify what is seen above.¶

Above, zoom in on the yellow dots until you can get a clearer view of all four shots.¶

5. Quality Filtering ¶

Now that you have the desired layers imported as a dataframe for the selected shots, let's perform quality filtering.¶

Below, remove any shots where the stale_return_flag is set to 1 (indicates that a "stale" cue point from the coarse search algorithm is being used) by defining those shots as nan.¶

The syntax of the line below can be read as: in the dataframe, find the rows "where" the stale return flag is not equal (ne) to 0. If a row (shot) does not meet the condition, set all values equal to nan for that row.¶

Below, quality filter even further by using the degrade flag (Greater than zero if the shot occurs during a degrade period, zero otherwise).¶

1.2 Set Up the Working Environment and Retrieve Files ¶

In this section, the GEDI .h5 file has been downloaded to the `inDir` defined above. You will need to download the file directly from the LP DAAC Data Pool in order to execute this tutorial.¶

Read the file using `h5py`.¶

Read in a GEDI HDF5 file using the `h5py` package.¶

First, the `METADATA` group contains the file-level metadata.¶

In the section below, import GEDI L1B SDS layers into a `GeoPandas` GeoDataFrame for the beam specified above.¶

Use the `latitude_bin0` and `longitude_bin0` to create a `shapely` point for each GEDI shot location.¶

Read in the SDS and take a representative sample (every 100th shot) and append to lists, then use the lists to generate a `pandas` dataframe.¶

Below, create an additional column called 'geometry' that contains a `shapely` point generated from each lat/lon location from the shot.¶

Next, convert to a `Geopandas` GeoDataFrame.¶

Pull out and plot an example `shapely` point below.¶

In this section, use the GeoDataFrame and the `geoviews` python package to spatially visualize the location of the GEDI shots on a basemap and import a geojson file of the spatial region of interest for the use case example: Redwood National Park.¶

Below, combine a plot of the Redwood National Park Boundary (combine two `geoviews` plots using `*`) with the point visual mapping function defined above in order to plot (1) the representative GEDI shots, (2) the region of interest, and (3) a basemap layer.¶

Side Note: Wondering what the 0's and 1's for `stale_return_flag` and `degrade` mean?¶

In this section, learn how to extract and subset specific waveforms and plot them using `holoviews`.¶

4.1 Import and Extract Waveforms ¶

Use `rx_sample_count` and `rx_sample_start_index` to identify the location of each waveform in `rxwaveform`.¶

Next grab additional information about the shot, including the unique `shot_number`, and lat/lon location.¶

In order to plot a full waveform, you also need to import the elevation recorded at `bin0` (the first elevation recorded) and `lastbin` or the last elevation recorded for that waveform.¶

Next, use the `Holoviews` python package (hv) to plot the waveform.¶

Below, remove any shots where the `stale_return_flag` is set to 1 (indicates that a "stale" cue point from the coarse search algorithm is being used) by defining those shots as `nan`.¶

The syntax of the line below can be read as: in the dataframe, find the rows "where" the stale return flag is not equal (ne) to 0. If a row (shot) does not meet the condition, set all values equal to `nan` for that row.¶

Below, quality filter even further by using the `degrade` flag (Greater than zero if the shot occurs during a degrade period, zero otherwise).¶

Below, drop all of the shots that did not pass the quality filtering standards outlined above from the `transectDF`.¶

In order to plot each vertical value for each waveform, you will need to reformat the data structure to match what is needed by `holoviews` Path() capabilities.¶

Below, plot each waveform by using `holoviews` Path() function. This will plot each individual waveform value by distance, with the amplitude plotted in the third dimension in shades of green.¶

If you zoom in to a subset of the transect, you can get even better detail showing the structure of the canopy from the `rxwaveform` dataset, where denser portions of the canopy are seen in darker shades of green.¶

7. Spatial Visualization ¶

Loop through each of the desired datasets (SDS) for each beam, append to lists, and transform into a `pandas` DataFrame.¶

Next create a `Shapely` Point out of each shot and insert it as the geometry column in the [soon to be geo]dataframe.¶

Now, using the `pointVisual` function defined in section 3.2, plot the `geopandas` GeoDataFrame using `geoviews`.¶

8. Export Subsets as GeoJSON Files ¶

In this section, export the GeoDataFrame as a `.geojson` file that can be easily opened in your favorite remote sensing and/or GIS software and will include an attribute table with all of the shots/values for each of the SDS layers in the dataframe.¶

Material written by Cole Krehbiel¹