Source code for panoptes_client.exportable
from __future__ import absolute_import, division, print_function
import csv
import datetime
import functools
import time
import requests
from panoptes_client.panoptes import (
PanoptesAPIException,
Talk,
)
TALK_EXPORT_TYPES = (
'talk_comments',
'talk_tags',
)
talk = Talk()
[docs]class Exportable(object):
"""
Abstract class containing methods for generating and downloading data
exports.
"""
[docs] def get_export(
self,
export_type,
generate=False,
wait=False,
wait_timeout=None,
):
"""
Downloads a data export over HTTP. Returns a `Requests Response
<http://docs.python-requests.org/en/master/api/#requests.Response>`_
object containing the content of the export.
- **export_type** is a string specifying which type of export should be
downloaded.
- **generate** is a boolean specifying whether to generate a new export
and wait for it to be ready, or to just download the latest export.
- **wait** is a boolean specifying whether to wait for an in-progress
export to finish, if there is one. Has no effect if ``generate`` is
``True``.
- **wait_timeout** is the number of seconds to wait if ``wait`` is
``True``. Has no effect if ``wait`` is ``False`` or if ``generate``
is ``True``.
The returned :py:class:`.Response` object has two additional attributes
as a convenience for working with the CSV content; **csv_reader** and
**csv_dictreader**, which are wrappers for :py:meth:`.csv.reader`
and :py:class:`csv.DictReader` respectively. These wrappers take care
of correctly decoding the export content for the CSV parser.
Example::
classification_export = Project(1234).get_export('classifications')
for row in classification_export.csv_reader():
print(row)
classification_export = Project(1234).get_export('classifications')
for row in classification_export.csv_dictreader():
print(row)
"""
if generate:
self.generate_export(export_type)
if generate or wait:
export = self.wait_export(export_type, wait_timeout)
else:
export = self.describe_export(export_type)
if export_type in TALK_EXPORT_TYPES:
media_url = export['data_requests'][0]['url']
else:
media_url = export['media'][0]['src']
response = requests.get(media_url, stream=True)
response.csv_reader = functools.partial(
csv.reader,
response.iter_lines(decode_unicode=True),
)
response.csv_dictreader = functools.partial(
csv.DictReader,
response.iter_lines(decode_unicode=True),
)
return response
[docs] def wait_export(
self,
export_type,
timeout=None,
):
"""
Blocks until an in-progress export is ready.
- **export_type** is a string specifying which type of export to wait
for.
- **timeout** is the maximum number of seconds to wait.
If ``timeout`` is given and the export is not ready by the time limit,
:py:class:`.PanoptesAPIException` is raised.
"""
success = False
if timeout:
end_time = datetime.datetime.now() + datetime.timedelta(
seconds=timeout
)
while (not timeout) or (datetime.datetime.now() < end_time):
export_description = self.describe_export(
export_type,
)
if export_type in TALK_EXPORT_TYPES:
export_metadata = export_description['data_requests'][0]
else:
export_metadata = export_description['media'][0]['metadata']
if export_metadata.get('state', '') in ('ready', 'finished'):
success = True
break
time.sleep(2)
if not success:
raise PanoptesAPIException(
'{}_export not ready within {} seconds'.format(
export_type,
timeout
)
)
return export_description
[docs] def generate_export(self, export_type):
"""
Start a new export.
- **export_type** is a string specifying which type of export to start.
Returns a :py:class:`dict` containing metadata for the new export.
"""
if export_type in TALK_EXPORT_TYPES:
return talk.post_data_request(
'project-{}'.format(self.id),
export_type.replace('talk_', '')
)
return self.http_post(
self._export_path(export_type),
json={"media": {"content_type": "text/csv"}},
)[0]
[docs] def describe_export(self, export_type):
"""
Fetch metadata for an export.
- **export_type** is a string specifying which type of export to look
up.
Returns a :py:class:`dict` containing metadata for the export.
"""
if export_type in TALK_EXPORT_TYPES:
return talk.get_data_request(
'project-{}'.format(self.id),
export_type.replace('talk_', '')
)[0]
return self.http_get(
self._export_path(export_type),
)[0]
def _export_path(self, export_type):
return '{}/{}_export'.format(self.id, export_type)