Source code for multiply_data_access.data_access
# coding=UTF-8
"""
Description
===========
This module contains the MULTIPLY data access API.
"""
from abc import ABCMeta, abstractmethod
from typing import List, Sequence, Optional
from datetime import datetime, timedelta
from multiply_core.util import FileRef, are_times_equal, are_polygons_almost_equal, get_time_from_string
from shapely.wkt import loads
from shapely.geometry import Polygon
import os
__author__ = 'Alexander Löw (Ludwig Maximilians-Universität München), ' \
'Tonio Fincke (Brockmann Consult GmbH)'
[docs]class DataSetMetaInfo(object):
"""
A representation of meta information about a data set. To be retrieved from a query on a MetaInfoProvider or
DataStore.
"""
def __init__(self, coverage: str, start_time: Optional[str], end_time: Optional[str], data_type: str,
identifier: str, referenced_data: Optional[str] = None):
self._coverage = coverage
self._start_time = start_time
self._end_time = end_time
self._data_type = data_type
self._identifier = identifier
self._referenced_data = referenced_data
def __repr__(self):
return 'Data Set:\n' \
' Id: {}, \n' \
' Type: {}, \n' \
' Start Time: {}, \n' \
' End Time: {}, \n' \
' Coverage: {}\n'.format(self.identifier, self.data_type, self.start_time, self.end_time, self.coverage)
@property
def start_time(self) -> Optional[str]:
"""The dataset's start time. Can be none."""
return self._start_time
@property
def end_time(self) -> Optional[str]:
"""The dataset's end time. Can be none."""
return self._end_time
@property
def coverage(self) -> str:
"""The dataset's spatial coverage, given as WKT string."""
return self._coverage
@property
def data_type(self) -> str:
"""The type of the dataset."""
return self._data_type
@property
def identifier(self) -> str:
"""An identifier so that the data set can be found on the Data Store's File System."""
return self._identifier
@property
def referenced_data(self) -> Optional[str]:
"""A list of additional files that are referenced by this data set. Can be none."""
return self._referenced_data
[docs] def equals(self, other: object) -> bool:
"""Checks whether two data set meta infos are equal. Does not check the identifier or referenced data sets!"""
return self.equals_except_data_type(other) and self._data_type == other.data_type
[docs] def equals_except_data_type(self, other: object) -> bool:
"""Checks whether two data set meta infos are equal, except that they may have the same data type.
Does not check the identifier or referenced data sets!"""
return type(other) == DataSetMetaInfo and \
are_times_equal(self._start_time, other.start_time) and \
are_times_equal(self._end_time, other.end_time) and \
are_polygons_almost_equal(self.coverage, other.coverage)
[docs]class FileSystem(metaclass=ABCMeta):
"""
An abstraction of a file system on which data sets are physically stored
"""
[docs] @classmethod
@abstractmethod
def name(cls) -> str:
"""
:return: The name of the file system implementation.
"""
[docs] @abstractmethod
def get(self, data_set_meta_info: DataSetMetaInfo) -> Sequence[FileRef]:
"""Retrieves a sequence of 'FileRef's."""
def get_as_dict(self) -> dict:
"""
:return: A representation of this file system as dictionary.
"""
return {'type': self.name(),
'parameters': self.get_parameters_as_dict()}
[docs] @abstractmethod
def get_parameters_as_dict(self) -> dict:
"""
:return: The parameters of this file system as dict
"""
[docs] @abstractmethod
def can_put(self) -> bool:
"""
:return: True, if data can be put into this file system.
"""
[docs] @abstractmethod
def put(self, from_url: str, data_set_meta_info: DataSetMetaInfo) -> DataSetMetaInfo:
"""Adds a data set to the file system by copying it from the given url to the expected location within
the file system. Returns an updated data set meta info."""
[docs] @abstractmethod
def remove(self, data_set_meta_info: DataSetMetaInfo):
"""Removes all data sets from the file system that are described by the data set meta info"""
[docs] @abstractmethod
def scan(self) -> Sequence[DataSetMetaInfo]:
"""Retrieves a sequence of data set meta informations of all file refs found in the file system."""
[docs]class FileSystemAccessor(metaclass=ABCMeta):
[docs] @classmethod
def create_from_parameters(cls, parameters: dict) -> FileSystem:
"""Returns a FileSystem object."""
[docs]class MetaInfoProvider(metaclass=ABCMeta):
"""
An abstraction of a provider that contains meta information about the files provided by a data store.
"""
[docs] @classmethod
@abstractmethod
def name(cls) -> str:
"""The name of the file system implementation."""
[docs] @abstractmethod
def query(self, query_string: str) -> List[DataSetMetaInfo]:
"""
Processes a query and retrieves a result. The result will consist of all the data sets that satisfy the query.
:return: A list of meta information about data sets that fulfill the query.
"""
[docs] @abstractmethod
def provides_data_type(self, data_type: str) -> bool:
"""
Whether the meta info provider provides access to data of the queried type
:param data_type: A string labelling the data
:return: True if data of that type can be requested from the meta info provider
"""
[docs] @abstractmethod
def get_provided_data_types(self) -> List[str]:
"""
:return: A list of the data types provided by this data store.
"""
@abstractmethod
def encapsulates_data_type(self, data_type: str) -> bool:
"""
Whether the meta info provider provides encapsulated access to data of the queried type. Data access is
considered encapsulated when the data is not provided directly from the meta info provider, but indirectly by
requesting a provided data type which in some form relies on the encapsulated data.
:param data_type: A string labelling the data
:return: True if data of that type is encapsulated by one of the meta infor provider's provided data types.
"""
@staticmethod
def get_roi_from_query_string(query_string: str) -> Optional[Polygon]:
roi_as_wkt = query_string.split(';')[0]
if roi_as_wkt == '':
return None
roi = loads(roi_as_wkt)
if not isinstance(roi, Polygon):
raise ValueError('ROI must be a polygon')
return roi
@staticmethod
def get_start_time_from_query_string(query_string: str) -> Optional[datetime]:
start_time_as_string = query_string.split(';')[1]
return get_time_from_string(start_time_as_string, False)
@staticmethod
def get_end_time_from_query_string(query_string: str) -> Optional[datetime]:
end_time_as_string = query_string.split(';')[2]
return get_time_from_string(end_time_as_string, True)
@staticmethod
def get_data_types_from_query_string(query_string: str) -> List[str]:
data_types = query_string.split(';')[3].split(',')
if len(data_types) == 1 and data_types[0] == '':
return []
for i, data_type in enumerate(data_types):
data_types[i] = data_type.strip()
return data_types
def get_as_dict(self) -> dict:
"""
:return: A representation of this file system as dictionary.
"""
return {'type': self.name(),
'parameters': self._get_parameters_as_dict()}
[docs] @abstractmethod
def _get_parameters_as_dict(self) -> dict:
"""
:return: The parameters of this file system as dict
"""
def notify_got(self, data_set_meta_info: DataSetMetaInfo) -> None:
"""Informs the meta info provider that the data set has been retrieved from the file system."""
pass
[docs] @abstractmethod
def can_update(self) -> bool:
"""
:return: true if this meta info provider can be updated.
"""
[docs] @abstractmethod
def update(self, data_set_meta_info: DataSetMetaInfo):
"""Adds information about the data set to its internal registry."""
[docs] @abstractmethod
def remove(self, data_set_meta_info: DataSetMetaInfo):
"""Removes information about this data set from its internal registry."""
[docs] @abstractmethod
def get_all_data(self) -> Sequence[DataSetMetaInfo]:
"""Returns all available data set meta infos."""