Source code for sar_pre_processing.sar_pre_processor

"""
Wrapper module to launch preprocessor
"""

import os
import yaml
import fnmatch
# import pyproj
# import zipfile
# import shutil
# import ogr
import xml.etree.ElementTree as etree
from datetime import datetime
from .file_list_sar_pre_processing import SARList
import subprocess
from netCDF4 import Dataset
from .netcdf_stack import NetcdfStack

import pdb


[docs]class AttributeDict(object): """ A class to convert a nested Dictionary into an object with key-values accessibly using attribute notation (AttributeDict.attribute) instead of key notation (Dict["key"]). This class recursively sets Dicts to objects, allowing you to recurse down nested dicts (like: AttributeDict.attr.attr) """ def __init__(self, **entries): self.add_entries(**entries)
[docs] def add_entries(self, **entries): for key, value in entries.items(): if type(value) is dict: self.__dict__[key] = AttributeDict(**value) else: self.__dict__[key] = value
# def has_entry(self, entry: str): # return self._has_entry(entry, 0) # def _has_entry(self, entry: str, current_index: int): # entry_keys = entry.split('.') # if entry_keys[current_index] in self.__dict__.keys(): # if current_index < len(entry_keys): # dict_entry = self.__dict__[entry_keys[current_index]] # if type(dict_entry) is not dict: # return False # return dict_entry._has_entry(entry, current_index + 1) # return True # return False def __getitem__(self, key): """ Provides dict-style access to attributes """ return getattr(self, key)
[docs]class PreProcessor(object): def __init__(self, **kwargs): self.config_file = kwargs.get('config', None) self.filelist = kwargs.get('filelist', None) self._check() self._load_config() def _check(self): assert self.config_file is not None, 'ERROR: Configuration file needs to be provided'
[docs] def pre_process(self): assert False, 'Routine should be implemented in child class'
def _load_config(self): """ Load configuration from self.config.bb.pre_process() writes to self.config. """ with open(self.config_file, 'r') as cfg: self.config = yaml.load(cfg) self.config = AttributeDict(**self.config)
[docs]class SARPreProcessor(PreProcessor): def __init__(self, **kwargs): super(SARPreProcessor, self).__init__(**kwargs) # Check if output folder is specified assert self.config.output_folder is not None, 'ERROR: output folder needs to be specified' # Initialise output folder for different preprocessing steps # (can be put in the YAML config file if needed) self.config.output_folder_step1 = os.path.join( self.config.output_folder, 'step1') self.config.output_folder_step2 = os.path.join( self.config.output_folder, 'step2') self.config.output_folder_step3 = os.path.join( self.config.output_folder, 'step3') # Initialise name of necessary xml-graphs for preprocessing # (can be put in the YAML config file if needed) self.config.xml_graph_pre_process_step1 = 'pre_process_step1.xml' self.config.xml_graph_pre_process_step1_border = 'pre_process_step1_border.xml' self.config.xml_graph_pre_process_step2 = 'pre_process_step2.xml' self.config.xml_graph_pre_process_step3 = 'pre_process_step3.xml' # Initialise name addition for output files self.name_addition_step1 = '_GC_RC_No_Su' self.name_addition_step2 = '_Co' self.name_addition_step3 = '_speckle' # Check if path of SNAP's graph-processing-tool is specified assert self.config.gpt is not None, 'ERROR: path for SNAPs graph-processing-tool is not not specified' # Check if path of path to XML files is specified # if not self.config.has_entry('xml_graph_path') is None: # self.config.xml_graph_path = '.\\xml_files' # assert self.config.xml_graph.path is not None, 'ERROR: path of XML files for processing is not not specified' # pass # TODO PUT THE GRAPH DIRECTORIES AND NAMES IN A SEPARATE CONFIG !!! # # xml-processing graph # self.xmlgraphpath = '/media/tweiss/Work/python_code/MULTIPLY/pre_processing_Sentinel_1/xml_files/' # # the XML config files should gfo somewhere in a specific directory in the multiply-core repo # self.xmlgraph = 'preprocess_v01.xml' # todo discuss if input/output specification part of processing or part of # instantiation of the object itself def _create_filelist(self, input_folder, expression): """ Create list containing all files in input_folder (without subfolder) that contain the provided expression within the filename """ filelist = [] for root, dirnames, filenames in os.walk(input_folder): for filename in fnmatch.filter(filenames, expression): filelist.append(os.path.join(root, filename)) break # print('Number of found files:', len(filelist)) return filelist def _decomposition_filename(self, file): """ Decomposition of filename including path in path, filename, fileshortname and extension """ (filepath, filename) = os.path.split(file) (fileshortname, extension) = os.path.splitext(filename) return filepath, filename, fileshortname, extension def _get_area(self, lat_min, lat_max, lon_min, lon_max): """ Change input coordinates for subset operator" """ assert lat_min <= lat_max, 'ERROR: invalid lat' assert lon_min <= lon_max, 'ERROR: invalid lon' return '%.14f %.14f,%.14f %.14f,%.14f %.14f,%.14f %.14f,%.14f %.14f' % (lon_min, lat_min, lon_min, lat_max, lon_max, lat_max, lon_max, lat_min, lon_min, lat_min)
[docs] def pre_process_step1(self, **kwargs): """ Pre-process S1 SLC data with SNAP's GPT 1) apply precise orbit file 2) thermal noise removal 3) calibration 4) TOPSAR-Deburst 5) Geometric Terrain Correction 6) Radiometric Correction (after kellndorfer et al.) 7) backscatter normalisation on specified angle in config file (based on Lambert's Law) """ # create filelist self.file_list = SARList(config=self.config_file).create_list() # Check if input folder is specified assert self.config.input_folder is not None, 'ERROR: input folder not specified' assert os.path.exists(self.config.input_folder) # Check if output folder for step1 is specified, if not existing create # new folder assert self.config.output_folder_step1 is not None, 'ERROR: output folder for step1 needs to be specified' if not os.path.exists(self.config.output_folder_step1): os.makedirs(self.config.output_folder_step1) # Check if XML file for pre-processing is specified assert self.config.xml_graph_pre_process_step1 is not None, \ 'ERROR: path of XML file for pre-processing step 1 is not not specified' if self.config.subset == 'yes': try: lower_right_y = self.config.region['lr']['lat'] upper_left_y = self.config.region['ul']['lat'] upper_left_x = self.config.region['ul']['lon'] lower_right_x = self.config.region['lr']['lon'] # todo: how is it with coordinates that go across the datum # line ?? # Coordinates for subset area area = self._get_area( lower_right_y, upper_left_y, upper_left_x, lower_right_x) process_all = 'no' except AttributeError: print('area of interest not specified, whole images will be processed') process_all = 'yes' elif self.config.subset == 'no': process_all = 'yes' else: raise ValueError('subset has to be set to "yes" or "no"') # loop to process all files stored in input directory for file in self.file_list[0]: print('Scene ', self.file_list[0].index( file) + 1, ' of ', len(self.file_list[0])) # Divide filename filepath, filename, fileshortname, extension = self._decomposition_filename( file) # Call SNAP routine, xml file print('Process ', filename, ' with SNAP.') outputfile = os.path.join( self.config.output_folder_step1, fileshortname + self.name_addition_step1 + '.dim') try: normalisation_angle = self.config.normalisation_angle if normalisation_angle is None: normalisation_angle = 35 print( 'normalisaton angle not specified, default value of 35 is used for processing') except AttributeError: normalisation_angle = 35 print( 'normalisaton angle not specified, default value of 35 is used for processing') if process_all == 'no': # pdb.set_trace() subprocess.call(self.config.gpt + ' ' + os.path.join(self.config.xml_graph_path, self.config.xml_graph_pre_process_step1) + ' -Pinput="' + file + '" -Poutput="' + outputfile + '" -Pangle="' + str(normalisation_angle) + '" -Parea="POLYGON ((' + area + '))" -c 2G -x', shell=True) else: subprocess.call(self.config.gpt + ' ' + os.path.join(self.config.xml_graph_path, self.config.xml_graph_pre_process_step1) + ' -Pinput="' + file + '" -Poutput="' + outputfile + '" -Pangle="' + str(normalisation_angle) + '" -c 2G -x', shell=True) # pdb.set_trace() for i, file in enumerate(self.file_list[1][::2]): file_list2 = self.file_list[1][1::2] file2 = file_list2[i] # Divide filename filepath, filename, fileshortname, extension = self._decomposition_filename( file) outputfile = os.path.join( self.config.output_folder_step1, fileshortname + self.name_addition_step1 + '.dim') try: normalisation_angle = self.config.normalisation_angle if normalisation_angle is None: normalisation_angle = 35 print('normalisaton angle not specified, default value of 35 is used for processing') except AttributeError: normalisation_angle = 35 print('normalisaton angle not specified, default value of 35 is used for processing') # pdb.set_trace() if process_all == 'no': subprocess.call(self.config.gpt + ' ' + os.path.join(self.config.xml_graph_path, self.config.xml_graph_pre_process_step1_border) + ' -Pinput="' + file + '" -Pinput2="' + file2 + '" -Poutput="' + outputfile + '" -Pangle="' + str(normalisation_angle) + '" -Parea="POLYGON ((' + area + '))" -c 2G -x', shell=True) else: subprocess.call(self.config.gpt + ' ' + os.path.join(self.config.xml_graph_path, self.config.xml_graph_pre_process_step1_border) + ' -Pinput="' + file + '" -Pinput2="' + file2 + '" -Poutput="' + outputfile + '" -Pangle="' + str(normalisation_angle) + '" -c 2G -x', shell=True)
[docs] def pre_process_step2(self, **kwargs): """ pre_process_step1 has to be done first Pre-process S1 SLC data with SNAP's GPT 1) co-register pre-processed data !!! all files will get metadata of the master image !!! Problem? """ # Check if XML file for pre-processing step 2 is specified assert self.config.xml_graph_pre_process_step2 is not None, 'ERROR: path of XML file for pre-processing step 2 is not not specified' # Check if output folder of pre_process_step1 exists assert os.path.exists( self.config.output_folder_step1), 'ERROR: output folder of pre-processing step1 not found' # Check if output folder for step2 is specified, if not existing create # new folder assert self.config.output_folder_step2 is not None, 'ERROR: output folder for step2 needs to be specified' if not os.path.exists(self.config.output_folder_step2): os.makedirs(self.config.output_folder_step2) # Create filelist with all to be processed images if self.filelist is None: print( 'no filelist specified therefore all images in output folder step1 will be processed') filelist = self._create_filelist( self.config.output_folder_step1, '*.dim') filelist.sort() else: filelist = [] for file in self.filelist: filepath, filename, fileshortname, extension = self._decomposition_filename( file) new_file_name = os.path.join( self.config.output_folder_step1, fileshortname + self.name_addition_step1 + '.dim') if os.path.exists(new_file_name) is True: filelist.append(new_file_name) else: print('skip processing for %s. File does not exists' % file) filelist.sort() # Set Master image for co-registration master = filelist[0] # loop to co-register all found images to master image for file in filelist: print('Scene', filelist.index(file) + 1, 'of', len(filelist)) # Divide filename filepath, filename, fileshortname, extension = self._decomposition_filename( file) # Call SNAP routine, xml file print('Process ', filename, ' with SNAP.') outputfile = os.path.join( self.config.output_folder_step2, fileshortname + self.name_addition_step2 + '.dim') os.system(self.config.gpt + ' ' + os.path.join(self.config.xml_graph_path, self.config.xml_graph_pre_process_step2) + ' -Pinput="' + master + '" -Pinput2="' + file + '" -Poutput="' + outputfile + '"') print(datetime.now())
[docs] def pre_process_step3(self, **kwargs): """ pre_process_step1 and 2 has to be done first Pre-process S1 SLC data with SNAP's GPT 1) apply multi-temporal speckle filter """ # Check if output folder of pre_process_step1 exists assert os.path.exists( self.config.output_folder_step2), 'ERROR: output folder of pre-processing step2 not found' # Check if output folder for step3 is specified, if not existing create # new folder assert self.config.output_folder_step3 is not None, 'ERROR: output folder for step3 needs to be specified' if not os.path.exists(self.config.output_folder_step3): os.makedirs(self.config.output_folder_step3) # list with all dim files found in output-folder of pre_process_step2 filelist = self._create_filelist(os.path.join( self.config.output_folder, 'step2'), '*.dim') filelist.sort() # Create filelist with all to be processed images if self.filelist is None: print( 'no filelist specified therefore all images in output folder step2 will be processed') filelist = self._create_filelist( self.config.output_folder_step2, '*.dim') else: filelist = [] for file in self.filelist: filepath, filename, fileshortname, extension = self._decomposition_filename( file) new_file_name = os.path.join( self.config.output_folder_step2, fileshortname + self.name_addition_step1 + self.name_addition_step2 + '.dim') if os.path.exists(new_file_name) is True: filelist.append(new_file_name) else: print('skip processing for %s. File does not exists' % file) # Sort filelist by date (hard coded position in filename!!!) filepath, filename, fileshortname, extension = self._decomposition_filename(filelist[0]) filelist.sort(key=lambda x: x[len(filepath) + 18:len(filepath) + 33]) filelist_old = filelist for i in ['S1A', 'S1B']: filelist = [k for k in filelist_old if i in k] if self.config.speckle_filter.multi_temporal.apply == 'yes': # Check if XML file for pre-processing step 3 is specified assert self.config.xml_graph_pre_process_step3 is not None, 'ERROR: path of XML file for pre-processing step 3 is not not specified' new_filelist = [] # loop to apply multi-temporal filtering # right now 15 scenes if possible 7 before and 7 after multi-temporal filtered scene, vv and vh polarisation are separated # use the speckle filter algorithm metadata? metadata for date # might be wrong!!! for i, file in enumerate(filelist): # apply speckle filter on 15 scenes if possible 7 before and 7 after the scene of interest # what happens if there are less then 15 scenes available if i < 2: processing_filelist = filelist[0:5] else: if i <= len(filelist) - 3: processing_filelist = filelist[i - 2:i + 3] else: processing_filelist = filelist[ i - 2 - (3 - (len(filelist) - i)):len(filelist)] filepath, filename, fileshortname, extension = self._decomposition_filename( file) a, a, b, a = self._decomposition_filename(self._create_filelist( os.path.join(filepath, fileshortname + '.data'), '*_slv1_*.img')[0]) a, a, c, a = self._decomposition_filename(self._create_filelist( os.path.join(filepath, fileshortname + '.data'), '*_slv2_*.img')[0]) a, a, d, a = self._decomposition_filename(self._create_filelist( os.path.join(filepath, fileshortname + '.data'), '*_slv3_*.img')[0]) a, a, e, a = self._decomposition_filename(self._create_filelist( os.path.join(filepath, fileshortname + '.data'), '*_slv4_*.img')[0]) list_bands_single_speckle_filter = ','.join([b, c, d, e]) name_change_vv_single = d name_change_vh_single = e name_change_vv_norm_single = b name_change_vh_norm_single = c # pdb.set_trace() list_bands_vv_multi = [] list_bands_vh_multi = [] list_bands_vv_norm_multi = [] list_bands_vh_norm_multi = [] for processing_file in processing_filelist: filepath, filename, fileshortname, extension = self._decomposition_filename( processing_file) # get filename from folder # think about better way !!!! a, a, band_vv_name_multi, a = self._decomposition_filename(self._create_filelist( os.path.join(filepath, fileshortname + '.data'), '*_slv3_*.img')[0]) a, a, band_vh_name_multi, a = self._decomposition_filename(self._create_filelist( os.path.join(filepath, fileshortname + '.data'), '*_slv4_*.img')[0]) a, a, band_vv_name_norm_multi, a = self._decomposition_filename(self._create_filelist( os.path.join(filepath, fileshortname + '.data'), '*_slv1_*.img')[0]) a, a, band_vh_name_norm_multi, a = self._decomposition_filename(self._create_filelist( os.path.join(filepath, fileshortname + '.data'), '*_slv2_*.img')[0]) list_bands_vv_multi.append(band_vv_name_multi) list_bands_vh_multi.append(band_vh_name_multi) list_bands_vv_norm_multi.append(band_vv_name_norm_multi) list_bands_vh_norm_multi.append(band_vh_name_norm_multi) # Divide filename of file of interest filepath, filename, fileshortname, extension = self._decomposition_filename( file) outputfile = os.path.join( self.config.output_folder_step3, fileshortname + self.name_addition_step3 + '.nc') date = datetime.strptime(fileshortname[17:25], '%Y%m%d') date = date.strftime('%d%b%Y') theta = 'localIncidenceAngle_slv10_' + date processing_filelist = ','.join(processing_filelist) list_bands_vv_multi = ','.join(list_bands_vv_multi) list_bands_vh_multi = ','.join(list_bands_vh_multi) list_bands_vv_norm_multi = ','.join(list_bands_vv_norm_multi) list_bands_vh_norm_multi = ','.join(list_bands_vh_norm_multi) # pdb.set_trace() os.system(self.config.gpt + ' ' + os.path.join(self.config.xml_graph_path, self.config.xml_graph_pre_process_step3) + ' -Pinput="' + processing_filelist + '" -Pinput2="' + file + '" -Poutput="' + outputfile + '" -Ptheta="' + theta + '" -Plist_bands_vv_multi="' + list_bands_vv_multi + '" -Plist_bands_vh_multi="' + list_bands_vh_multi + '" -Plist_bands_vv_norm_multi="' + list_bands_vv_norm_multi + '" -Plist_bands_vh_norm_multi="' + list_bands_vh_norm_multi + '" -Pdate="' + date + '" -Pname_change_vv_single="' + name_change_vv_single + '" -Pname_change_vh_single="' + name_change_vh_single + '" -Pname_change_vv_norm_single="' + name_change_vv_norm_single + '" -Pname_change_vh_norm_single="' + name_change_vh_norm_single + '" -Plist_bands_single_speckle_filter="' + list_bands_single_speckle_filter + '"') print(datetime.now())
[docs] def netcdf_information(self, **kwargs): # input folder input_folder = self.config.output_folder_step3 expression = '*.nc' filelist = self._create_filelist(input_folder, expression) # for loop though all measurement points for file in filelist: # Divide filename filepath, filename, fileshortname, extension = self._decomposition_filename( file) filepath2 = self.config.output_folder_step1 # extract orbitdirection from metadata metadata = etree.parse(os.path.join(filepath2,filename[0:79]+'.dim')) for i in metadata.findall('Dataset_Sources'): for ii in i.findall('MDElem'): for iii in ii.findall('MDElem'): for iiii in iii.findall('MDATTR'): r = iiii.get('name') if r == 'PASS': orbitdir = iiii.text if orbitdir == 'ASCENDING': orbitdir = 'ASCENDING' elif orbitdir =='DESCENDING': orbitdir = 'DESCENDING' else: pass continue # extract orbit from metadata metadata = etree.parse(os.path.join(filepath2,filename[0:79]+'.dim')) for i in metadata.findall('Dataset_Sources'): for ii in i.findall('MDElem'): for iii in ii.findall('MDElem'): for iiii in iii.findall('MDATTR'): r = iiii.get('name') if r == 'REL_ORBIT': relorbit = iiii.text # extract frequency from metadata metadata = etree.parse(os.path.join(filepath2,filename[0:79]+'.dim')) for i in metadata.findall('Dataset_Sources'): for ii in i.findall('MDElem'): for iii in ii.findall('MDElem'): for iiii in iii.findall('MDATTR'): r = iiii.get('name') if r == 'radar_frequency': frequency = float(iiii.text)/1000. # extract satellite name from name tag if fileshortname[0:3] == 'S1A': sat = 'S1A' elif fileshortname[0:3] == 'S1B': sat = 'S1B' else: pass # extract date from filename date = datetime.strptime(fileshortname[17:32], '%Y%m%dT%H%M%S') dset = Dataset(file, 'r+', format="NETCDF4") try: dset.delncattr('start_date') dset.delncattr('stop_date') except: RuntimeError dset.setncattr_string('date', str(date)) dset.setncattr_string('orbitdirection', orbitdir) dset.setncattr_string('relativeorbit', relorbit) dset.setncattr_string('satellite', sat) dset.setncattr_string('frequency', str(frequency))
"""run script""" if __name__ == "__main__": processing = SARPreProcessor(config='sample_config_file.yml') # processing.pre_process_step1() # processing.pre_process_step2() # processing.pre_process_step3() subprocess.call(os.path.join(os.getcwd(),'projection_problem.sh ' + processing.config.output_folder_step3), shell=True) # processing.netcdf_information() # NetcdfStack(input_folder=processing.config.output_folder_step3, output_path=processing.config.output_folder_step3.rsplit('/', 1)[0] , output_filename=processing.config.output_folder_step3.rsplit('/', 2)[1]) print('finished') # filtertype = self.config.speckle_filter.multi_temporal.filter # filtersizex = self.config.speckle_filter.multi_temporal.filtersizex # filtersizey = self.config.speckle_filter.multi_temporal.filtersizey # windowsize = self.config.speckle_filter.multi_temporal.windowsize # targetwindowsize = self.config.speckle_filter.multi_temporal.targetwindowsize