|
|
Line 21: |
Line 21: |
| # in bash | | # in bash |
| wget ftp://ftp.cv.nrao.edu:/NRAO-staff/jgallimo/extractCASAscript.py | | wget ftp://ftp.cv.nrao.edu:/NRAO-staff/jgallimo/extractCASAscript.py |
| </source>
| |
|
| |
| === FTP didn't work? ===
| |
|
| |
| Typical. Here's a copy of the source code, dated 30 Oct 2009.
| |
|
| |
| <source lang="python">
| |
| #!/usr/bin/env python
| |
| #
| |
| # python script intended to extract CASA code from the CASA guide
| |
| # Wiki and collect it into a (CASA-executable) script.
| |
| # Jack Gallimore 8/10/09
| |
| # Updated to accommodate new web host 10/30/09.
| |
|
| |
|
| |
| import urllib
| |
| import urllib2
| |
| import sys
| |
| import codecs
| |
| import re
| |
|
| |
| # globals
| |
|
| |
| # define formatting junk that needs to be filtered
| |
| # If I were cleverer with regex the ensuing loop would probably not be necessary.
| |
| junkStr = ["<div dir=\"ltr\" style=\"text-align: left;\">"]
| |
| junkStr = junkStr + ["<div class=\"source-python\" style=\"font-family: monospace;\">"]
| |
| junkStr = junkStr + ["<pre>"]
| |
| junkStr = junkStr + ["</span>"]
| |
| junkStr = junkStr + ["</pre></div></div>"]
| |
| junkStr = junkStr + ["]"]
| |
| junkStr = junkStr + ["["]
| |
| junkStr = junkStr + ["{"]
| |
| junkStr = junkStr + ["}"] # Daddy really needs to practice more regex
| |
| junkStr = junkStr + [" "]
| |
| paren1 = "("
| |
| paren2 = ")"
| |
| quote1 = """
| |
| substr1 = r"<span class=[^>]*>"
| |
|
| |
|
| |
| # define casa code blocks
| |
| beginBlock = "class=\"source-python\""
| |
| endBlock = "</pre></div></div>"
| |
|
| |
|
| |
| # function to clean up html strings (convert html markup to executable python)
| |
| def loseTheJunk(line):
| |
| outline = line
| |
| outline = re.sub(substr1, r'', outline)
| |
| for junk in junkStr:
| |
| outline = outline.replace(junk, "")
| |
| outline = outline.replace(quote1, "\"")
| |
| outline = outline.replace(paren1, "(")
| |
| outline = outline.replace(paren2, ")")
| |
| #some additional parsing -- scripting has slightly different
| |
| #syntax than interactive session for tget, default, and go
| |
| #(presumably among others).
| |
|
| |
| newline = outline
| |
| newline = newline.replace(r'tget ', r'tget(')
| |
| newline = newline.replace(r'default ', r'default(')
| |
| newline = newline.replace(r'go', r'go(')
| |
| if newline != outline: newline = newline + ')'
| |
| outline = newline
| |
|
| |
| return outline
| |
|
| |
|
| |
| # start of main code
| |
|
| |
| def main():
| |
| try:
| |
| baseURL = sys.argv[1]
| |
| except:
| |
| print 'No argument given.'
| |
| print 'Syntax: extractCASAscript.py \'http:blah.blah.edu/web_site/\''
| |
| sys.exit(2)
| |
|
| |
| print "Rest assured. I'm trying to get " + baseURL + " for you now."
| |
| outFile = baseURL.split('/')[-1] + '.py'
| |
| outFile = outFile.replace("index.php?title=","")
| |
| outFile = outFile.replace(":","")
| |
| outFile = outFile.replace("_","")
| |
|
| |
| f = codecs.open(outFile, 'w','utf-8')
| |
|
| |
|
| |
| req = urllib2.Request(baseURL)
| |
| response = urllib2.urlopen(req)
| |
| the_page = response.read().split("\n")
| |
|
| |
| iActive = 0
| |
| print "Things are going well. Let me clean out some of that html markup."
| |
| for line in the_page:
| |
| if (iActive == 0):
| |
| # see if this line begins a python code block
| |
| temp = line.find(beginBlock)
| |
| if temp > -1:
| |
| iActive = 1
| |
| outline = loseTheJunk(line)
| |
| # make sure the endBlock isn't in the same line!
| |
| temp = line.find(endBlock)
| |
| if temp > -1:
| |
| iActive = 0
| |
| line = "DontPrintMeBro" # avoid double printing if endBlock is on the same line
| |
| #debug
| |
| #print ""
| |
| #print 'line = ' + line
| |
| #print 'outline = ' + outline
| |
| #print ""
| |
| #debug
| |
| print >>f, outline
| |
| if (iActive == 1):
| |
| if (line != "DontPrintMeBro"):
| |
| outline = loseTheJunk(line)
| |
| print >>f, outline
| |
| temp = line.find(endBlock)
| |
| if temp > -1:
| |
| iActive = 0
| |
| f.close()
| |
| print "Great. I think I just wrote the file " + outFile + " in the current directory. No promises."
| |
|
| |
| if __name__ == "__main__":
| |
| main()
| |
|
| |
| </source> | | </source> |
|
| |
|
Hopefully the scripts contained in this documentation are (a) useful and (b) work. They were however developed with CASA still a work in progress, and so scripts may break as commands, arguments, and keywords change. We developed this script extractor to allow us to easily extract scripts from these pages and run them. Feel free to try it!
Download the script extraction code.
# in bash
ftp ftp.cv.nrao.edu
# log in anonymously with e-mail as password
cd NRAO-staff/jgallimo
get extractCASAscript.py
Wget may be even simpler if you have it installed.
# in bash
wget ftp://ftp.cv.nrao.edu:/NRAO-staff/jgallimo/extractCASAscript.py
Make your newly acquired python script executable.
# in bash
chmod u+x extractCASAscript.py
To run it, issue the python script name and give the URL as the argument. For example:
# in bash
extractCASAscript.py http://casaguides.nrao.edu/index.php?title=Calibrating_a_VLA_5_GHz_continuum_survey
This command will automatically generate a script called "CalibratingaVLA5GHzcontinuumsurvey.py"
--Jack Gallimore 21:13, 3 November 2009 (UTC)