#!/usr/bin/env python
# -*- coding: utf-8 -*-

import errno
import logging
import os
import re
import urllib
import urlparse
import xml.etree.ElementTree as ET

aws_base_url = 'http://s3.amazonaws.com/slideshare/'
logger = logging.getLogger('slideshare-dl')

def get_slideshow_pagecontent(url):
    logger.info('Fetching slideshow page: <%s>.', url)
    page = urllib.urlopen(url)
    content = page.read()
    page.close()
    logger.debug('content = %r', content)
    return content

def get_slideshow_name(data):
    match = re.search('doc=([\w-]+)', data)
    name = match.groups()[0]
    return name

def get_slideshow_xml(name):
    url = aws_base_url + name + '.xml'
    logger.info('Fetching slideshow XML: <%s>.', url)
    page = urllib.urlopen(url)
    xml = page.read()
    page.close()
    logger.debug('xml = %r', xml)
    return xml

def get_slideurls(show_xml):
    logger.info('Extracting slide URLs')
    show_tree = ET.fromstring(show_xml)
    slide_elems = show_tree.getiterator('Slide')
    slide_urls = [elem.attrib['Src'] for elem in slide_elems]
    logger.debug('slide_urls = %r', slide_urls)
    return slide_urls

def write_slides(urls, path):
    logger.info('Writing slides...')
    filenames = []
    if not os.path.exists(path):
        os.mkdir(path)
    for url in urls:
        parsed_url = urlparse.urlparse(url)
        slide_name = parsed_url.path.rsplit('/', 1)[1]
        slide_path = os.path.join(path, slide_name)
        logger.info('  %s', slide_path)
        filename, headers = urllib.urlretrieve(url, slide_path)
        filenames += filename
    return filenames

def download_slideshow(url, path=None):
    content = get_slideshow_pagecontent(url)
    show_name = get_slideshow_name(content)
    show_xml = get_slideshow_xml(show_name)
    slide_urls = get_slideurls(show_xml)
    out_dir = path or os.path.join(os.curdir, show_name)
    write_slides(slide_urls, out_dir)


if __name__ == '__main__':
    try:
        import sys
        from optparse import OptionParser

        usage = 'usage: %prog [OPTION]... URL'
        parser = OptionParser(usage=usage)
        parser.add_option('-d', '--output-directory', dest='directory',
                          help='write slides to files in DIR',
                          metavar='DIR')
        parser.add_option('-v', '--verbose', action='count', dest='verbosity',
                          help=('explain what is being done (use twice for '
                               'greater effect)'))
        parser.set_defaults(verbosity=0)
        options, args = parser.parse_args()

        if len(args) != 1:
            parser.error('Incorrect number of arguments.')

        if options.verbosity == 0:
            loglevel = logging.WARN
        elif options.verbosity == 1:
            loglevel = logging.INFO
        elif options.verbosity >= 2:
            loglevel = logging.DEBUG

        logging.basicConfig(level=loglevel)

        url = args[0]
        download_slideshow(url, options.directory)
    except KeyboardInterrupt:
        logger.warn('Program interrupted by user.')
    except BaseException, e:
        logger.exception(e)

