|
|
Line 5: |
Line 5: |
| == How to Get the Script Extractor == | | == How to Get the Script Extractor == |
|
| |
|
| Download the script extraction code. | | Download the script extraction code. It's linked right here: [[File:extractCASAscript.py.txt]]. Right-click and save the file as '''extractCASAscript.py'''. Copy the file to your data directory, and make sure you set the script to be executable. |
|
| |
|
| <source lang="bash"> | | <source lang="bash"> |
|
| |
|
| # in bash | | # in bash |
| ftp ftp.cv.nrao.edu
| | chmod u+x extractCASAscript.py |
| # log in anonymously with e-mail as password
| |
| cd NRAO-staff/jgallimo
| |
| get extractCASAscript.py
| |
| </source>
| |
| | |
| Wget may be even simpler if you have it installed.
| |
| | |
| <source lang="bash">
| |
| # in bash
| |
| wget ftp://ftp.cv.nrao.edu:/NRAO-staff/jgallimo/extractCASAscript.py
| |
| </source>
| |
| | |
| === FTP didn't work? ===
| |
| | |
| Typical. Here's a copy of the source code, dated 30 Oct 2009.
| |
| | |
| <source lang="python">
| |
| #!/usr/bin/env python
| |
| #
| |
| # python script intended to extract CASA code from the CASA guide
| |
| # Wiki and collect it into a (CASA-executable) script.
| |
| # Jack Gallimore 8/10/09
| |
| # Updated to accommodate new web host 10/30/09.
| |
| | |
| | |
| import urllib
| |
| import urllib2
| |
| import sys
| |
| import codecs
| |
| import re
| |
| | |
| # globals
| |
| | |
| # define formatting junk that needs to be filtered
| |
| # If I were cleverer with regex the ensuing loop would probably not be necessary.
| |
| junkStr = ["<div dir=\"ltr\" style=\"text-align: left;\">"]
| |
| junkStr = junkStr + ["<div class=\"source-python\" style=\"font-family: monospace;\">"]
| |
| junkStr = junkStr + ["<pre>"]
| |
| junkStr = junkStr + ["</span>"]
| |
| junkStr = junkStr + ["</pre></div></div>"]
| |
| junkStr = junkStr + ["]"]
| |
| junkStr = junkStr + ["["]
| |
| junkStr = junkStr + ["{"]
| |
| junkStr = junkStr + ["}"] # Daddy really needs to practice more regex
| |
| junkStr = junkStr + [" "]
| |
| paren1 = "("
| |
| paren2 = ")"
| |
| quote1 = """
| |
| substr1 = r"<span class=[^>]*>"
| |
| | |
| | |
| # define casa code blocks
| |
| beginBlock = "class=\"source-python\""
| |
| endBlock = "</pre></div></div>"
| |
| | |
| | |
| # function to clean up html strings (convert html markup to executable python)
| |
| def loseTheJunk(line):
| |
| outline = line
| |
| outline = re.sub(substr1, r'', outline)
| |
| for junk in junkStr:
| |
| outline = outline.replace(junk, "")
| |
| outline = outline.replace(quote1, "\"")
| |
| outline = outline.replace(paren1, "(")
| |
| outline = outline.replace(paren2, ")")
| |
| #some additional parsing -- scripting has slightly different
| |
| #syntax than interactive session for tget, default, and go
| |
| #(presumably among others).
| |
| | |
| newline = outline
| |
| newline = newline.replace(r'tget ', r'tget(')
| |
| newline = newline.replace(r'default ', r'default(')
| |
| newline = newline.replace(r'go', r'go(')
| |
| if newline != outline: newline = newline + ')'
| |
| outline = newline
| |
| | |
| return outline
| |
|
| |
| | |
| # start of main code
| |
| | |
| def main():
| |
| try:
| |
| baseURL = sys.argv[1]
| |
| except:
| |
| print 'No argument given.'
| |
| print 'Syntax: extractCASAscript.py \'http:blah.blah.edu/web_site/\''
| |
| sys.exit(2)
| |
| | |
| print "Rest assured. I'm trying to get " + baseURL + " for you now."
| |
| outFile = baseURL.split('/')[-1] + '.py'
| |
| outFile = outFile.replace("index.php?title=","")
| |
| outFile = outFile.replace(":","")
| |
| outFile = outFile.replace("_","")
| |
|
| |
| f = codecs.open(outFile, 'w','utf-8')
| |
|
| |
|
| |
| req = urllib2.Request(baseURL)
| |
| response = urllib2.urlopen(req)
| |
| the_page = response.read().split("\n")
| |
|
| |
| iActive = 0
| |
| print "Things are going well. Let me clean out some of that html markup."
| |
| for line in the_page:
| |
| if (iActive == 0):
| |
| # see if this line begins a python code block
| |
| temp = line.find(beginBlock)
| |
| if temp > -1:
| |
| iActive = 1
| |
| outline = loseTheJunk(line)
| |
| # make sure the endBlock isn't in the same line!
| |
| temp = line.find(endBlock)
| |
| if temp > -1:
| |
| iActive = 0
| |
| line = "DontPrintMeBro" # avoid double printing if endBlock is on the same line
| |
| #debug
| |
| #print ""
| |
| #print 'line = ' + line
| |
| #print 'outline = ' + outline
| |
| #print ""
| |
| #debug
| |
| print >>f, outline
| |
| if (iActive == 1):
| |
| if (line != "DontPrintMeBro"):
| |
| outline = loseTheJunk(line)
| |
| print >>f, outline
| |
| temp = line.find(endBlock)
| |
| if temp > -1:
| |
| iActive = 0
| |
| f.close()
| |
| print "Great. I think I just wrote the file " + outFile + " in the current directory. No promises."
| |
|
| |
| if __name__ == "__main__":
| |
| main()
| |
| | |
| </source> | | </source> |
|
| |
|