Alexandre Lision | 0e14301 | 2014-01-22 11:02:46 -0500 | [diff] [blame] | 1 | import urllib2 |
| 2 | import sys |
| 3 | |
| 4 | def fetch_rst(url): |
| 5 | print 'Fetching %s..' % url |
| 6 | req = urllib2.Request(url) |
| 7 | |
| 8 | fd = urllib2.urlopen(req, timeout=30) |
| 9 | body = fd.read() |
| 10 | |
| 11 | pos = body.find("{{{") |
| 12 | if pos >= 0: |
| 13 | body = body[pos+4:] |
| 14 | |
| 15 | pos = body.find("}}}") |
| 16 | if pos >= 0: |
| 17 | body = body[:pos] |
| 18 | |
| 19 | pos = body.find("#!rst") |
| 20 | if pos >= 0: |
| 21 | body = body[pos+6:] |
| 22 | |
| 23 | pos = url.rfind("/") |
| 24 | if pos >= 0: |
| 25 | filename = url[pos+1:] |
| 26 | else: |
| 27 | filename = url |
| 28 | |
| 29 | pos = filename.find('?') |
| 30 | if pos >= 0: |
| 31 | filename = filename[:pos] |
| 32 | |
| 33 | filename += ".rst" |
| 34 | f = open(filename, 'w') |
| 35 | f.write(body) |
| 36 | f.close() |
| 37 | |
| 38 | |
| 39 | def process_index(index): |
| 40 | pages = [] |
| 41 | |
| 42 | f = open(index + '.rst', 'r') |
| 43 | line = f.readline() |
| 44 | while line: |
| 45 | if line.find('toctree::') >= 0: |
| 46 | break |
| 47 | line = f.readline() |
| 48 | |
| 49 | if line.find('toctree::') < 0: |
| 50 | return [] |
| 51 | # Skip directive (or whatever it's called |
| 52 | line = f.readline().strip() |
| 53 | while line and line[0] == ':': |
| 54 | line = f.readline().strip() |
| 55 | # Skip empty lines |
| 56 | line = f.readline().strip() |
| 57 | while not line: |
| 58 | line = f.readline().strip() |
| 59 | # Parse names |
| 60 | while line: |
| 61 | pages.append(line) |
| 62 | line = f.readline().strip() |
| 63 | |
| 64 | f.close() |
| 65 | |
| 66 | return pages |
| 67 | |
| 68 | |
| 69 | if __name__ == '__main__': |
| 70 | print "** Warning: This will overwrite ALL RST files in current directory. Continue? [n] ", |
| 71 | if sys.stdin.readline().strip() != 'y': |
| 72 | sys.exit(0) |
| 73 | |
| 74 | url_format = 'http://trac.pjsip.org/repos/wiki/pjsip-doc/%s?format=txt' |
| 75 | |
| 76 | index = url_format % ('index') |
| 77 | fetch_rst(index) |
| 78 | |
| 79 | pages = process_index('index') |
| 80 | for page in pages: |
| 81 | url = url_format % (page) |
| 82 | fetch_rst(url) |
| 83 | |
| 84 | print 'Done.' |
| 85 | |