Source code for PyOpenWorm.datasource_loader
# -*- coding: utf-8 -*-
'''
DataSourceLoaders take a data source identifier and retrieve the primary data (e.g., CSV files, electrode recordings)
from some location (e.g., a file store, via a bittorrent tracker).
Each loader can treat the base_directory given as its own namespace and place directories in there however it wants.
'''
from yarom.utils import FCN
from os.path import exists, isdir, join as pth_join, isabs, realpath
import six
[docs]class DataSourceDirLoader(object):
'''
Loads a data files for a DataSource
The loader is expected to organize files for each data source within the given
base directory.
'''
def __init__(self, base_directory=None):
if base_directory:
self.base_directory = realpath(base_directory)
self.directory_key = FCN(type(self))
def __call__(self, data_source):
'''
Load the data source
Parameters
----------
data_source : PyOpenWorm.datasource.DataSource
The data source to load data for
Returns
-------
A path to the loaded resource
Raises
------
LoadFailed
'''
# Call str(ยท) to give a more uniform interface to the sub-class ``load``
# Conventionally, types that tag or "enhance" a string have the base string representation as their __str__
s = self.load(data_source)
if not s:
raise LoadFailed(data_source, self, 'Loader returned an empty string')
# N.B.: This logic is NOT intended as a security measure against directory traversal: it is only to make the
# interface both flexible and unambiguous for implementers
# Relative paths are allowed
if not isabs(s):
s = pth_join(self.base_directory, s)
# Make sure the loader isn't doing some nonsense with symlinks or non-portable paths
rpath = realpath(s)
if not rpath.startswith(self.base_directory):
msg = 'Loader returned a file path outside of the base directory, {}'.format(self.base_directory)
raise LoadFailed(data_source, self, msg)
if not exists(rpath):
msg = 'Loader returned a non-existant file {}'.format(rpath)
raise LoadFailed(data_source, self, msg)
if not isdir(rpath):
msg = 'Loader did not return a directory, but returned {}'.format(rpath)
raise LoadFailed(data_source, self, msg)
return rpath
def load(self, data_source):
raise NotImplementedError()
def can_load(self, data_source):
return False
def __str__(self):
return FCN(type(self)) + '()'
[docs]class LoadFailed(Exception):
def __init__(self, data_source, loader, *args):
msg = args[0]
mmsg = 'Failed to load {} data with loader {}{}'.format(data_source, loader, ': ' + msg if msg else '')
super(LoadFailed, self).__init__(mmsg, *args[1:])