Source code for fastr.data.url

# Copyright 2011-2014 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""" Module providing tools to parse and create valid urls and paths.

usage example:

When in fastr.config under the mounts section the data mount is set to /media/data, you will get the following.
.. code-block:: python

  >>> from fastr.data.url import get_path_from_url
  >>> get_path_from_url('vfs://data/temp/blaat1.png')
  '/media/data/temp/blaat1.png'
"""

import re
import os.path
import urllib.parse as up
import posixpath

from .. import resources
from fastr.exceptions import FastrUnknownURLSchemeError


[docs]def register_url_scheme(scheme):
    """ Register a custom scheme to behave http like. This is needed to parse
    all things properly.
    """
    for method in [s for s in dir(up) if s.startswith('uses_')]:
        getattr(up, method).append(scheme)


[docs]def get_url_scheme(url):
    """
    Get the schem of the url

    :param str url: url to extract scheme from
    :return: the url scheme
    :rtype: str
    """
    parsed_url = up.urlparse(str(url))
    return parsed_url.scheme


[docs]def get_path_from_url(url):
    """ Get the path to a file from a url.
    Currently supports the file:// and vfs:// scheme's

    Examples:

    .. code-block:: python

      >>> url.get_path_from_url('vfs://neurodata/user/project/file.ext')
      'Y:\\neuro3\\user\\project\\file.ext'


      >>> 'file:///d:/data/project/file.ext'
      'd:\\data\\project\\file.ext'

    .. warning::

      file:// will not function cross platform and is mainly for testing

    """
    # Translate properly depending on the scheme being used
    scheme = get_url_scheme(url)

    # Make the paths and vfs not go via ioplugins, but shortcut it
    if scheme == '':
        # This is not a URL, but a path, throw error
        raise FastrUnknownURLSchemeError('URL using no url scheme in {}, so it must be a path! '.format(url))
    elif scheme == 'vfs':
        # Directly call vfs
        return resources.ioplugins['vfs'].url_to_path(url)
    else:
        # Dispatch to ioplugin via the ioplugin manager
        try:
            ioplugin = resources.ioplugins[scheme]
        except KeyError:
            raise FastrUnknownURLSchemeError('URL using an unknown scheme in {}'
                                             ' ({} not in {})'.format(url,
                                                                      scheme,
                                                                      list(resources.ioplugins.keys())))

        return ioplugin.url_to_path(url)


[docs]def isurl(string):
    """
    Check if string is a valid url

    :param str string: potential url
    :return: flag indicating if string is a valid url
    """
    if not isinstance(string, str):
        return False

    parsed_url = up.urlparse(str(string))
    return len(parsed_url.scheme) > 1


[docs]def basename(url):
    """
    Get basename of url

    :param str url: the url
    :return: the basename of the path in the url
    """
    parsed_url = up.urlparse(str(url))
    return posixpath.basename(parsed_url.path)


[docs]def dirname(url):
    """
    Get the dirname of the url

    :param str url: the url
    :return: the dirname of the path in the url
    """
    parsed_url = up.urlparse(str(url))
    return posixpath.dirname(parsed_url.path)


[docs]def dirurl(url):
    """
    Get the a new url only having the dirname as the path

    :param str url: the url
    :return: the modified url with only dirname as path
    """
    return split(url)[0]


[docs]def split(url):
    """
    Split a url in a url with the dirname and the basename part of the path of
    the url

    :param str url: the url
    :return: a tuple with (dirname_url, basename)
    """
    # pylint: disable=protected-access
    parsed_url = up.urlparse(str(url))._asdict()
    parsed_url['path'], part = posixpath.split(parsed_url['path'])
    return up.urlunparse(list(parsed_url.values())), part


[docs]def join(url, *p):
    """
    Join the path in the url with p

    :param str url: the base url to join with
    :param p: additional parts of the path
    :return: the url with the parts added to the path
    """
    # pylint: disable=protected-access
    parsed_url = up.urlparse(str(url))._asdict()
    parsed_url['path'] = posixpath.join(parsed_url['path'], *p)
    return up.urlunparse(list(parsed_url.values()))


[docs]def normurl(url):
    """
    Normalized the path of the url

    :param str url: the url
    :return: the normalized url
    """
    # pylint: disable=protected-access
    parsed_url = up.urlparse(str(url))._asdict()
    parsed_url['path'] = posixpath.normpath(parsed_url['path'])
    return up.urlunparse(list(parsed_url.values()))


[docs]def create_vfs_url(mountpoint, path):
    """
    Construct an url from a given mount point and a relative path to the mount point.

    :param str mountpoint: the name of the mountpoint
    :param str path: relative path from the mountpoint
    :return: the created vfs url
    """
    return "vfs://%s/%s" % (mountpoint, path.strip('/'))


[docs]def full_split(urlpath):
    """
    Split the path in the url in a list of parts

    :param urlpath: the url path
    :return: a list of parts
    """
    parts = []
    while True:
        newpath, tail = posixpath.split(urlpath)
        if newpath == urlpath:
            assert not tail
            if urlpath:
                parts.append(urlpath)
            break
        parts.append(tail)
        urlpath = newpath
    parts.reverse()
    return parts


def _correct_separators(path):
    """
    Translates the URL seperator '/' into the apropriate seperator for the OS

    :param str path: the path to correct
    :return: path with consistent separators
    """
    return re.sub('/', ('%r' % os.path.sep).strip("'"), path.strip('/'))