Source code for panoptes_client.exportable

from __future__ import absolute_import, division, print_function

import csv
import datetime
import functools
import time

import requests

from panoptes_client.panoptes import (
    PanoptesAPIException,
    Talk,
)


TALK_EXPORT_TYPES = (
    'talk_comments',
    'talk_tags',
)

talk = Talk()


[docs]class Exportable(object):
    """
    Abstract class containing methods for generating and downloading data
    exports.
    """

[docs]    def get_export(
        self,
        export_type,
        generate=False,
        wait=False,
        wait_timeout=None,
    ):
        """
        Downloads a data export over HTTP. Returns a `Requests Response
        <http://docs.python-requests.org/en/master/api/#requests.Response>`_
        object containing the content of the export.

        - **export_type** is a string specifying which type of export should be
          downloaded.
        - **generate** is a boolean specifying whether to generate a new export
          and wait for it to be ready, or to just download the latest export.
        - **wait** is a boolean specifying whether to wait for an in-progress
          export to finish, if there is one. Has no effect if ``generate`` is
          ``True``.
        - **wait_timeout** is the number of seconds to wait if ``wait`` is
          ``True``. Has no effect if ``wait`` is ``False`` or if ``generate``
          is ``True``.

        The returned :py:class:`.Response` object has two additional attributes
        as a convenience for working with the CSV content; **csv_reader** and
        **csv_dictreader**, which are wrappers for :py:meth:`.csv.reader`
        and :py:class:`csv.DictReader` respectively. These wrappers take care
        of correctly decoding the export content for the CSV parser.

        Example::

            classification_export = Project(1234).get_export('classifications')
            for row in classification_export.csv_reader():
                print(row)

            classification_export = Project(1234).get_export('classifications')
            for row in classification_export.csv_dictreader():
                print(row)
        """

        if generate:
            self.generate_export(export_type)

        if generate or wait:
            export = self.wait_export(export_type, wait_timeout)
        else:
            export = self.describe_export(export_type)

        if export_type in TALK_EXPORT_TYPES:
            media_url = export['data_requests'][0]['url']
        else:
            media_url = export['media'][0]['src']

        response = requests.get(media_url, stream=True)
        response.csv_reader = functools.partial(
            csv.reader,
            response.iter_lines(decode_unicode=True),
        )
        response.csv_dictreader = functools.partial(
            csv.DictReader,
            response.iter_lines(decode_unicode=True),
        )
        return response

[docs]    def wait_export(
        self,
        export_type,
        timeout=None,
    ):
        """
        Blocks until an in-progress export is ready.

        - **export_type** is a string specifying which type of export to wait
          for.
        - **timeout** is the maximum number of seconds to wait.

        If ``timeout`` is given and the export is not ready by the time limit,
        :py:class:`.PanoptesAPIException` is raised.
        """

        success = False
        if timeout:
            end_time = datetime.datetime.now() + datetime.timedelta(
                seconds=timeout
            )

        while (not timeout) or (datetime.datetime.now() < end_time):
            export_description = self.describe_export(
                export_type,
            )

            if export_type in TALK_EXPORT_TYPES:
                export_metadata = export_description['data_requests'][0]
            else:
                export_metadata = export_description['media'][0]['metadata']

            if export_metadata.get('state', '') in ('ready', 'finished'):
                success = True
                break

            time.sleep(2)

        if not success:
            raise PanoptesAPIException(
                '{}_export not ready within {} seconds'.format(
                    export_type,
                    timeout
                )
            )

        return export_description

[docs]    def generate_export(self, export_type):
        """
        Start a new export.

        - **export_type** is a string specifying which type of export to start.

        Returns a :py:class:`dict` containing metadata for the new export.
        """

        if export_type in TALK_EXPORT_TYPES:
            return talk.post_data_request(
                'project-{}'.format(self.id),
                export_type.replace('talk_', '')
            )

        return self.http_post(
            self._export_path(export_type),
            json={"media": {"content_type": "text/csv"}},
        )[0]

[docs]    def describe_export(self, export_type):
        """
        Fetch metadata for an export.

        - **export_type** is a string specifying which type of export to look
          up.

        Returns a :py:class:`dict` containing metadata for the export.
        """
        if export_type in TALK_EXPORT_TYPES:
            return talk.get_data_request(
                'project-{}'.format(self.id),
                export_type.replace('talk_', '')
            )[0]

        return self.http_get(
            self._export_path(export_type),
        )[0]

    def _export_path(self, export_type):
        return '{}/{}_export'.format(self.id, export_type)