Source code for mezzanine.blog.management.commands.import_tumblr
from __future__ import print_function
from __future__ import unicode_literals
from future.builtins import int
from datetime import datetime
from json import loads
from time import sleep
try:
from urllib.request import urlopen
except ImportError:
from urllib import urlopen
from django.core.management.base import CommandError
from django.utils.html import strip_tags
from mezzanine.blog.management.base import BaseImporterCommand
MAX_POSTS_PER_CALL = 20 # Max number of posts Tumblr API will return per call.
MAX_RETRIES_PER_CALL = 3 # Max times to retry API call after failing.
SLEEP_PER_RETRY = 3 # Seconds to pause for between retries.
[docs]def title_from_content(content):
"""
Try and extract the first sentence from a block of test to use as a title.
"""
for end in (". ", "?", "!", "<br />", "\n", "</p>"):
if end in content:
content = content.split(end)[0] + end
break
return strip_tags(content)
[docs]class Command(BaseImporterCommand):
"""
Import Tumblr blog posts into the blog app.
"""
def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser.add_argument(
"-t", "--tumblr-user", dest="tumblr_user",
help="Tumblr username")
help = "Import Tumblr blog posts into the blog app."
def handle_import(self, options):
tumblr_user = options.get("tumblr_user")
if tumblr_user is None:
raise CommandError("Usage is import_tumblr %s" % self.args)
verbosity = int(options.get("verbosity", 1))
json_url = "http://%s.tumblr.com/api/read/json" % tumblr_user
json_start = "var tumblr_api_read ="
date_format = "%a, %d %b %Y %H:%M:%S"
start_index = 0
while True:
retries = MAX_RETRIES_PER_CALL
try:
call_url = "%s?start=%s" % (json_url, start_index)
if verbosity >= 2:
print("Calling %s" % call_url)
response = urlopen(call_url)
if response.code == 404:
raise CommandError("Invalid Tumblr user.")
elif response.code == 503:
# The Tumblr API is frequently unavailable so make a
# few tries, pausing between each.
retries -= 1
if not retries:
error = "Tumblr API unavailable, try again shortly."
raise CommandError(error)
sleep(3)
continue
elif response.code != 200:
raise IOError("HTTP status %s" % response.code)
except IOError as e:
error = "Error communicating with Tumblr API (%s)" % e
raise CommandError(error)
data = response.read()
json = loads(data.split(json_start, 1)[1].strip().rstrip(";"))
posts = json["posts"]
start_index += MAX_POSTS_PER_CALL
for post in posts:
handler = getattr(self, "handle_%s_post" % post["type"])
if handler is not None:
title, content = handler(post)
pub_date = datetime.strptime(post["date"], date_format)
self.add_post(title=title, content=content,
pub_date=pub_date, tags=post.get("tags"),
old_url=post["url-with-slug"])
if len(posts) < MAX_POSTS_PER_CALL:
break
def handle_regular_post(self, post):
return post["regular-title"], post["regular-body"]
def handle_link_post(self, post):
title = post["link-text"]
content = ('<p><a href="%(link-url)s">%(link-text)s</a></p>'
'%(link-description)s') % post
return title, content
def handle_quote_post(self, post):
title = post["quote-text"]
content = ("<blockquote>%(quote-text)s</blockquote>"
"<p>%(quote-source)s</p>") % post
return title, content
def handle_photo_post(self, post):
title = title_from_content(post["photo-caption"])
content = '<p><img src="%(photo-url-400)s"></p>%(photo-caption)s'
content = content % post
return title, content
def handle_conversation_post(self, post):
title = post["conversation-title"]
content = post["conversation-text"].replace("\n", "<br />")
content = "<p>%s</p>" % content
return title, content
def handle_video_post(self, post):
title = title_from_content(post["video-caption"])
content = "<p>%(video-player)s</p>" % post
return title, content
def handle_audio_post(self, post):
title = post.get("id3-title")
content = "%(audio-caption)s<p>%(audio-player)s</p>" % post
if not title:
title = title_from_content(post["audio-caption"])
content = "<p>%(audio-player)s</p>" % post
return title, content
def handle_answer_post(self, post):
return post["question"], post["answer"]