Source code for mezzanine.blog.management.commands.import_rss

from __future__ import unicode_literals

from datetime import timedelta
from time import timezone
try:
    from urllib.request import urlopen
    from urllib.parse import urljoin
except ImportError:
    from urllib import urlopen
    from urlparse import urljoin

from django.core.management.base import CommandError

from mezzanine.blog.management.base import BaseImporterCommand


[docs]class Command(BaseImporterCommand): """ Import an RSS feed into the blog app. """ def add_arguments(self, parser): super(Command, self).add_arguments(parser) parser.add_argument( "-r", "--rss-url", dest="rss_url", help="RSS feed URL") parser.add_argument( "-p", "--page-url", dest="page_url", help="URL for a web page containing the RSS link") help = ("Import an RSS feed into the blog app. Requires the " "dateutil and feedparser packages installed, and also " "BeautifulSoup if using the --page-url option.") def handle_import(self, options): rss_url = options.get("rss_url") page_url = options.get("page_url") if not (page_url or rss_url): raise CommandError("Either --rss-url or --page-url option " "must be specified") try: from dateutil import parser except ImportError: raise CommandError("dateutil package is required") try: from feedparser import parse except ImportError: raise CommandError("feedparser package is required") if not rss_url and page_url: if "://" not in page_url: page_url = "http://%s" % page_url try: from BeautifulSoup import BeautifulSoup except ImportError: raise CommandError("BeautifulSoup package is required") for l in BeautifulSoup(urlopen(page_url).read()).findAll("link"): if ("application/rss" in l.get("type", "") or "application/atom" in l.get("type", "")): rss_url = urljoin(page_url, l["href"]) break else: raise CommandError("Could not parse RSS link from the page") posts = parse(rss_url)["entries"] for post in posts: if hasattr(post, 'content'): content = post.content[0]["value"] else: content = post.summary tags = [tag["term"] for tag in getattr(post, 'tags', [])] try: pub_date = parser.parse(getattr(post, "published", post.updated)) - timedelta(seconds=timezone) except AttributeError: pub_date = None self.add_post(title=post.title, content=content, pub_date=pub_date, tags=tags, old_url=None)