1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#! /usr/bin/env python
'''
tou.py -- automate the download of a broadcast from http://www.tou.tv/.

1) Put somewhere in your $PATH, along rtmpdump.
2) Make executable (chmod +x ...).
3) Invoke from a terminal.

Usage example:

tou.py http://www.tou.tv/en-audition-avec-simon/S01E01

See "tou.py --help" to consult available options.

It requires Python >= 2.5 (note: It is incompatible with version 3 of the
language, eg. Python >= 3.0.0). You also need to have rtmpdump available in your
$PATH (at least revision r141).

Written by Sylvain Fourmanoit <fourmanoit@gmail.com>, based on a script from
Guillaume L\'Heureux <glheureux.projects@gmail.com>.

History
-------
2012-12-15: DEPRECATED. No longer works at the time of writing. If you fix it,
            drop me a line.
2012-06-21: change PID to idMedia in first phase (thanks to Sebastien Audet
            for spotting and fixing it).
2010-11-20: adapt to PID case change on tou.tv (thanks to Carl Quirion 
            for the reminder).
2010-11-02: for increased portability, use subprocess module instead of 
            os.system to call rtmpdump (confirmed to run on Windows 
            by Benoit Poirier).
2010-02-01: initial release.
'''
#-------------------------------------------------------------------------------
# Imports
#
import os, logging, subprocess, shlex
import urllib, re, xml.etree.ElementTree

#-------------------------------------------------------------------------------
# Core logic
#        
def fetch_url(url):
    logging.debug('fetching "%s"...' % url)
    f = urllib.urlopen(url)
    if hasattr(f, 'getcode'):
        if f.getcode() != 200:
            raise RuntimeError('error fetching "%s" (error %d)' % 
                               (url, f.getcode()))
    else:
        logging.warning(
            'version of urllib cannot check return code of HTTP GET '
            'operation, get newer version of python?')
    return f

def get_rtmp(url):
    data = fetch_url(url).read()

    logging.debug('extracting PID...')
    pid = re.compile('(?<=idMedia":")\w+', re.I).search(data).group(0)
    logging.info('PID is "%s"' % pid)

    data = fetch_url(
        'http://release.theplatform.com/content.select?pid=%s' % pid)

    logging.debug('extracting RTMP path...')
    for path in xml.etree.ElementTree.parse(data).findall('//url'):
        if path.text.startswith('rtmp'):
            break
    else:
        raise ValueError('no RTMP path in SMIL-like document')

    logging.info('RTMP path is "%s"' % path.text)
    return path.text

def find_free_filename(url, max_slots = 10):
    logging.debug('finding unused filename...')
    basename = re.sub(
        '[^a-z0-9_]+', 
        '_', '_'.join((['out', ''] + url.split('/'))[-2:]).lower()) + '.flv'
    for sfx in [('.%d' % i if i > 0 else '') for i in xrange(max_slots)]:
        name = '%s%s' % (basename, sfx)
        if not os.path.exists(name):
            break
        logging.debug('filename "%s" taken, skipping"' % name)
    else:
        raise RuntimeError(
            'could not find a free filename to download "%s" '
            '(basename "%s"), %d slots occupied' % (url, basename, max_slots))
    logging.info('output filename is "%s"' % name)
    return name

def build_commandline(path, filename = 'out.flv', 
                      verbose = False, quiet = False):
    logging.debug('building command line...')
    assert(verbose is False or quiet is False)

    rtmp, playpath = path.split('<break>')
    app            = re.sub('^.*(?=ondemand/)', '', rtmp)
    auth           = re.search('(?<=auth=)[^&]*', rtmp).group(0)
    verbose        = '--verbose' if verbose is True else ''
    quiet          = '--quiet' if quiet is True else ''

    cmdline = (
     "rtmpdump --app '%(app)s' --flashVer 'WIN 10,0,22,87' "
     "--swfVfy 'http://static.tou.tv/lib/ThePlatform/4.1.2/swf/flvPlayer.swf' "
     "--auth '%(auth)s' --tcUrl '%(rtmp)s' --rtmp '%(rtmp)s' "
     "--playpath '%(playpath)s' %(verbose)s %(quiet)s "
     "-o %(filename)s " % locals())

    logging.info('command line is "%s"' % cmdline)
    return cmdline

#-------------------------------------------------------------------------------
# Sample command line when invoked as script
if __name__ == '__main__':
    # Build and parse a command line
    import sys, optparse

    p = optparse.OptionParser(
        usage = ('%prog [options] url\n\n'
                 'Download a clip from tou.tv. You need to have rtmpdump '
                 'in your $PATH.\n\n'
                 'Usage Example:\n'
                 '  %prog http://www.tou.tv/en-audition-avec-simon/S01E01'))
    p.add_option('-o', '--output', dest = 'output', default = None,
                 help = ('force output file (default is to construct it '
                         'from given url)'))
    p.add_option('-n', '--dry-run', dest = 'dry_run', 
                 action = 'store_true', default = False,
                 help = ('perform the full operation, but do not invoke '
                         'rtmpdump'))
    p.add_option('-q', '--quiet', dest = 'quiet',
                 action = 'store_true', default = False,
                 help = 'suppress all output from terminal but critical errors')
    p.add_option('-v', '--verbose', dest = 'verbose',
                 action = 'store_true', default = False,
                 help = 'make script extra-chatty')
                 
    opts, args = p.parse_args()
    if opts.verbose is True and opts.quiet is True:
        p.error('--verbose and --quiet switches invoked at the same time.')
    if len(args) == 0:
        p.error('url not given.')
    else:
        url = args[0]
    
    # Setup logging according to specified verbosity
    logging.basicConfig(format = '%(asctime)-15s (%(levelname)s) %(message)s',
                        level = (logging.DEBUG if opts.verbose else
                                 (logging.INFO if not opts.quiet else
                                  logging.CRITICAL)))

    # Fire in the hole!
    try:
        filename = (find_free_filename(url) if opts.output is None
                    else opts.output)
        cmdline = build_commandline(get_rtmp(url), filename, 
                                    opts.verbose, opts.quiet)
        if opts.dry_run is False:
            # On Posix, we could simply use os.system(cmdline)
            subprocess.call(shlex.split(cmdline))
            logging.info('clip downloaded to "%s"' % filename)
        else:
            logging.debug('dry run, no execution occured')
    except:
        typ, val = sys.exc_info()[:2]
        logging.fatal('%s, %s' % (val, typ.__name__))
        logging.info('abording operation...')

#-------------------------------------------------------------------------------