Source code for mezzanine.blog.management.commands.import_tumblr

from __future__ import print_function
from __future__ import unicode_literals
from future.builtins import int

from datetime import datetime
from json import loads
from time import sleep

try:
    from urllib.request import urlopen
except ImportError:
    from urllib import urlopen

from django.core.management.base import CommandError
from django.utils.html import strip_tags

from mezzanine.blog.management.base import BaseImporterCommand


MAX_POSTS_PER_CALL = 20  # Max number of posts Tumblr API will return per call.
MAX_RETRIES_PER_CALL = 3  # Max times to retry API call after failing.
SLEEP_PER_RETRY = 3  # Seconds to pause for between retries.


[docs]def title_from_content(content): """ Try and extract the first sentence from a block of test to use as a title. """ for end in (". ", "?", "!", "<br />", "\n", "</p>"): if end in content: content = content.split(end)[0] + end break return strip_tags(content)
[docs]class Command(BaseImporterCommand): """ Import Tumblr blog posts into the blog app. """ def add_arguments(self, parser): super(Command, self).add_arguments(parser) parser.add_argument( "-t", "--tumblr-user", dest="tumblr_user", help="Tumblr username") help = "Import Tumblr blog posts into the blog app." def handle_import(self, options): tumblr_user = options.get("tumblr_user") if tumblr_user is None: raise CommandError("Usage is import_tumblr %s" % self.args) verbosity = int(options.get("verbosity", 1)) json_url = "http://%s.tumblr.com/api/read/json" % tumblr_user json_start = "var tumblr_api_read =" date_format = "%a, %d %b %Y %H:%M:%S" start_index = 0 while True: retries = MAX_RETRIES_PER_CALL try: call_url = "%s?start=%s" % (json_url, start_index) if verbosity >= 2: print("Calling %s" % call_url) response = urlopen(call_url) if response.code == 404: raise CommandError("Invalid Tumblr user.") elif response.code == 503: # The Tumblr API is frequently unavailable so make a # few tries, pausing between each. retries -= 1 if not retries: error = "Tumblr API unavailable, try again shortly." raise CommandError(error) sleep(3) continue elif response.code != 200: raise IOError("HTTP status %s" % response.code) except IOError as e: error = "Error communicating with Tumblr API (%s)" % e raise CommandError(error) data = response.read() json = loads(data.split(json_start, 1)[1].strip().rstrip(";")) posts = json["posts"] start_index += MAX_POSTS_PER_CALL for post in posts: handler = getattr(self, "handle_%s_post" % post["type"]) if handler is not None: title, content = handler(post) pub_date = datetime.strptime(post["date"], date_format) self.add_post(title=title, content=content, pub_date=pub_date, tags=post.get("tags"), old_url=post["url-with-slug"]) if len(posts) < MAX_POSTS_PER_CALL: break def handle_regular_post(self, post): return post["regular-title"], post["regular-body"] def handle_link_post(self, post): title = post["link-text"] content = ('<p><a href="%(link-url)s">%(link-text)s</a></p>' '%(link-description)s') % post return title, content def handle_quote_post(self, post): title = post["quote-text"] content = ("<blockquote>%(quote-text)s</blockquote>" "<p>%(quote-source)s</p>") % post return title, content def handle_photo_post(self, post): title = title_from_content(post["photo-caption"]) content = '<p><img src="%(photo-url-400)s"></p>%(photo-caption)s' content = content % post return title, content def handle_conversation_post(self, post): title = post["conversation-title"] content = post["conversation-text"].replace("\n", "<br />") content = "<p>%s</p>" % content return title, content def handle_video_post(self, post): title = title_from_content(post["video-caption"]) content = "<p>%(video-player)s</p>" % post return title, content def handle_audio_post(self, post): title = post.get("id3-title") content = "%(audio-caption)s<p>%(audio-player)s</p>" % post if not title: title = title_from_content(post["audio-caption"]) content = "<p>%(audio-player)s</p>" % post return title, content def handle_answer_post(self, post): return post["question"], post["answer"]