#!/bin/env python ''' A script to transform CASA Guide webpages and casapy scripts to casapy scripts suited for interactive or non-interactive use. This script can be run from the command line. Use the '-h' option to obtain a help message explaining the command line parameters and options. The script runs in one of three modes: 1) interactive (default): Generates a casapy script that requests user input when interactive GUIs are invoked. 2) non-interactive: Generates a casapy script that sleeps for 60s when an interactive GUI is invoked. (Note: 60s may not be enough time for some plots.) 3) benchmark: Generates a casapy script that makes all tasks noninteractive or removes their invocation all together. The output script imports and makes extensive use of module *casa_call.py* to keep track of the start and stop time of tasks. *casa_call.py* must be in the casapy Python path when the casapy script is run. To work properly, the list of casapy tasks hard coded below must be consistent with the tasks available in the version of casa being tests. To check for consistency or update the hardcoded list, use the function listCASATasks() below (see the function's docstring for instructions). The intended functionality of the benchmarking mode is (1) to allow easy assessment of whether the scripts are working and (2) to produce useful benchmarks of performance, e.g., as a function of machine. ''' # ===================== # IMPORTS # ===================== import urllib import urllib2 import sys import codecs import re import string import os, os.path from optparse import OptionParser # ===================== # DEFINITIONS # ===================== # Expression that define the beginning and end of CASA code blocks in the HTML beginBlock = "class=\"python source-python\"" endBlock = "" # interactives interactive=re.compile("[\s;]*(plotxy|plotcal|plotms|viewer|plotants)") # CASA task list (used for benchmarking markup, else ignored) # Check and update this list using function listCASATasks(), below. # for CASA 3.4... casa_tasks = ['accum', 'applycal', 'asap_init', 'asdmsummar', 'bandpass', 'blcal', 'boxi', 'browsetable', 'calstat', 'caltabconvert', 'clean', 'clearcal', 'clearplot', 'clearstat', 'concat', 'conjugatevis', 'csvclea', 'cvel', 'deconvolve', 'delmod', 'exportasdm', 'exportfits', 'exportuvfits', 'feather', 'find', 'fixplanets', 'fixvis', 'flagautocorr', 'flagcmd', 'flagdata', 'flagmanager', 'fluxscale', 'ft', 'gaincal', 'gencal', 'hanningsmooth', 'help par.parametername', 'help taskname', 'imcollapse', 'imcontsub', 'imfit', 'imhead', 'immath', 'immoments', 'impbcor', 'importasdm', 'importevl', 'importfits', 'importfitsidi', 'importgmr', 'importuvfits', 'importvla', 'imregrid', 'imsmooth', 'imstat', 'imtrans', 'imval', 'imview', 'listcal', 'listfits', 'listhistory', 'listobs', 'listsd', 'listvis', 'mosai', 'msmoments', 'msview', 'oldflagcmd', 'plotants', 'plotcal', 'plotms', 'plotuv', 'plotweathe', 'plotxy', 'polcal', 'predictcomp', 'rmtables', 'sdbaseline', 'sdcal', 'sdcoadd', 'sdfit', 'sdflag', 'sdflagmanager', 'sdgrid', 'sdimaging', 'sdimprocess', 'sdlist', 'sdmath', 'sdplot', 'sdreduce', 'sdsave', 'sdscale', 'sdsmooth', 'sdstat', 'sdtpimaging', 'setjy', 'simanalyze', 'simdata', 'simobserve', 'slsearch', 'smoothcal', 'specfi', 'splattotable', 'split', 'startup', 'statw', 'taskhelp', 'tasklist', 'testautofla', 'testconcat', 'tflagdata', 'toolhelp', 'uvcontsub', 'uvmodelfit', 'uvsub', 'viewer', 'vishead', 'visstat', 'widefiel', 'wvrgcal'] # # CASA 3.3... # casa_tasks = ['accum', 'applycal', 'asap_init', 'bandpass', 'blcal', 'boxi', # 'browsetable', 'calstat', 'clean', 'clearcal', 'clearplot', 'clearstat', # 'concat', 'conjugatevis', 'csvclea', 'cvel', 'deconvolve', 'exportasd', # 'exportfits', 'exportuvfits', 'feather', 'find', 'fixplanets', 'fixvis', # 'flagautocorr', 'flagcmd', 'flagdata', 'flagmanager', 'fluxscale', 'ft', # 'gaincal', 'gencal', 'hanningsmooth', 'help par.parametername', 'help # taskname', 'imcollapse', 'imcontsub', 'imfit', 'imhead', 'immath', 'immoments', # 'impbcor', 'importaipscaltable', 'importasdm', 'importevl', 'importfits', # 'importfitsidi', 'importgmr', 'importoldasd', 'importuvfits', 'importvla', # 'imregrid', 'imsmooth', 'imstat', 'imtrans', 'imval', 'imview', 'listcal', # 'listhistory', 'listobs', 'listsd', 'listvis', 'mosai', 'msmoments', 'msview', # 'plotants', 'plotcal', 'plotms', 'plotuv', 'plotxy', 'polcal', 'rmtables', # 'sdaverage', 'sdbaseline', 'sdcal', 'sdcoadd', 'sdfit', 'sdflag', # 'sdflagmanager', 'sdimaging', 'sdimprocess', 'sdlist', 'sdmath', 'sdplot', # 'sdsave', 'sdscale', 'sdsmooth', 'sdstat', 'sdtpimaging', 'setjy', # 'sim_analyze', 'sim_observe', 'simdata', 'slsearch', 'smoothcal', 'specfi', # 'splattotable', 'split', 'startup', 'taskhelp', 'tasklist', 'testautofla', # 'testconcat', 'toolhelp', 'uvcontsub', 'uvmodelfit', 'uvsub', 'viewer', # 'vishead', 'visstat', 'widefiel'] # define formatting junk that needs to be filtered # JFG comments that regular expressions might clean this up junkStr = ["
"] junkStr = junkStr + ["
"]
junkStr = junkStr + ["
"]
junkStr = junkStr + [""]
junkStr = junkStr + ["
"] junkStr = junkStr + [" "] paren1 = "(" paren2 = ")" brack2 = "]" brack1 = "[" sqirl1 = "{" sqirl2 = "}" quote1 = """ lessthan = "<" greaterthan = ">" ampersand = "&" substr1 = r"]*>" nbsp = " " # tasks to suppress in benchmarking run # plotms -- produces table locks; must be run synchronously # plotants -- produces a table lock that causes wvrgcal to fail tasks_to_suppress = ["plotms", "plotants"] # ===================== # FUNCTIONS # ===================== def countParen(line): """ Returns the net open and closed parentheses. """ pcount = 0 for char in line: if char == '(': pcount += 1 if char == ')': pcount -= 1 return pcount def isInput(line): """ Tests if a line is waiting for user input. """ temp = line.find("raw_input") return temp > -1 def extract_task(line): """ Tries to return the task name being called in the line. """ stripped = line.lstrip() temp = stripped.find("(") if temp == -1: return None return stripped[0:temp] def is_task_call(line): """ Tests if the line is a task call. """ if extract_task(line) in casa_tasks: return True return False def indentation(line): spaces = 0 active = True for char in line: if active == False: continue if char == " ": spaces += 1 else: active=False return spaces def add_benchmarking(line,tasknum=0): this_task = extract_task(line) indents = indentation(line) pre_string = "" for i in range(indents): pre_string+=" " before = pre_string+"this_call = casa_call.Call('"+this_task+"','"+str(tasknum)+"')\n" after = "\n"+pre_string+"this_call.end(out_file)" return before+line+after def suppress_for_benchmark(line): if is_task_call(line) == False: return False if extract_task(line) in tasks_to_suppress: return True return False def make_func_noninteractive(line): """ Make calls to specific tasks/functions non interactive. Set argument interactive to false for specific tasks and functions. """ # First, check to see if clean is called with function arguments. funcName = extract_task(line) if funcName == "clean" or funcName == "au.plotbandpass": # Set argument interactive to false pattern = r'''interactive\ *=\ *(True|T|true)''' new_line = re.sub( pattern, 'interactive = False', line ) if funcName == "clean": # Remove clean mask parameter if it exists pattern = r'''mask\ *=\ *['"].*['"].*,?''' new_line = re.sub( pattern, '', new_line ) return new_line # Second, check for variables in local namespace else: pattern = r'''^[ \t]*interactive\ *=\ *(True|T|true)''' # If variable interactive is being set, make sure it is set to false. if re.match( pattern, line ): pattern2 = r'''\ *(True|T|true)''' new_line = re.sub( pattern2, ' False', line ) return new_line return line def suppress_gui( line ): """ Suppress GUIs. Return modified line. * line = a python statement """ if is_task_call(line): # Plotcal if extract_task(line) == "plotcal": # if showgui is specified, make sure it is false pattern = r'''showgui\ *=\ *(True|T|true|False|F|false)''' new_line = re.sub( pattern, 'showgui = False', line ) if new_line == line: # no substituion made # add showgui=False to parameter list pattern = r'''\(''' new_line = re.sub( pattern, '( showgui=False, ', line, count=1 ) return new_line # Suppress GUIs for other tasks here... return line def turnPlotmsOff( line ): """ Turn off plotms calls. """ if is_task_call(line): if extract_task(line) == "plotms": line = ' '*indentation(line) + "print 'Turned PLOTMS off'" return(line) # function to clean up html strings (convert html markup to executable python) def loseTheJunk(line): """ Strip garbage from line. """ outline = line # this should be function'd -- need to replace tgets etc as in first version of script outline = re.sub(substr1, r'', outline) for junk in junkStr: outline = outline.replace(junk, "") outline = outline.replace(quote1, "\"") outline = outline.replace(paren1, "(") outline = outline.replace(paren2, ")") outline = outline.replace(brack1, "[") outline = outline.replace(brack2, "]") outline = outline.replace(sqirl1, "{") outline = outline.replace(sqirl2, "}") outline = outline.replace(lessthan, "<") outline = outline.replace(greaterthan, ">") outline = outline.replace(ampersand, "&") outline = outline.replace(nbsp, " ") #some additional parsing -- scripting has slightly different #syntax than interactive session for tget, default, and go #(presumably among others). newline = outline newline = newline.replace(r'tget ', r'tget(') newline = newline.replace(r'default ', r'default(') if newline == 'go': newline = 'go(' if newline != outline: newline = newline + ')' outline = newline return outline def addInteractivePause(outline): newoutline = outline indent = " "*indentation(outline) # Use a raw string for the print statement because outline could contain # single and/or double quotes. newoutline += "\n" + indent + "print r'''Command: " + outline.lstrip().replace('\n','\\n') + "'''" newoutline += "\n" + indent + "user_check=raw_input('When you are done with the window, close it and press enter to continue:')" return newoutline def addNonInteractivePause(outline): newoutline = outline newoutline += "\ninp()\nprint('Pausing for 60 seconds...')\n" newoutline += "time.sleep(60)\n" newoutline = string.replace(newoutline, '\n', '\n'+' '*indentation(outline)) return newoutline # Return the pre-material needed to set up benchmarking def benchmark_header( scriptName='script' ): """ Write the header of the benchmarking script. * scriptName = Name of the benchmarking script """ out_file = scriptName.replace('.py','.benchmark.txt') lines = [] lines.append("### Begin Benchmarking Material") lines.append("import casa_call") lines.append("try:") lines.append(" out_file") lines.append("except NameError:") lines.append(" out_file = '" + out_file + "'") lines.append("if os.path.exists(out_file):") lines.append(" counter = 1") lines.append(" while os.path.exists(out_file+'.'+str(counter)):") lines.append(" counter += 1") lines.append(" os.system('mv '+out_file+' '+out_file+'.'+str(counter))") lines.append("os.system('rm -rf '+out_file)") lines.append("### End Benchmarking Material") return lines def pythonize_shell_commands( line ): """ Make casapy-friendly shell commands Python compatible. Unix shell commands included below in the 'commands' list can be called directly from the casapy prompt. These commands cannot be called from inside a Python script. To run these commands in a script, place them in an os.system call. * line = a python statement """ commands = [ 'ls', 'pwd', 'less', 'pwd', 'cd', 'cat' ] firstWord = line.split(' ')[0] if firstWord in commands: line = 'os.system("' + line + '")' return line def make_system_call_noninteractive( line ): """ Make calls to os.system non-interactive. Return non-interacive line. Some shell commands called via os.system require user interaction, such as more. Make these shell calls noninteractive. Replacements: 1) Replace more with cat, 2) [Add other replacements here...] * line = a python statement """ command = '' newCommand = '' # Extract the system command from the os.system statement pattern = r'''os.system\ *\(\ *(['"])(.*)\1\ *\)''' matchObj = re.match( pattern, line ) if matchObj: command = matchObj.group(2) command = command.strip() # Replace more with cat pattern2 = r'''^more\ ''' newCommand = re.sub( pattern2, 'cat ', command ) # Add additional substutions here... newLine = line.replace( command, newCommand, 1 ) return newLine def exclude_raw_input( line ): """ Exclude raw_input calls from non-interactive scripts. * line = a python statement """ pattern = r'''raw_input\ *\(''' if re.search( pattern, line ): newline = ' ' * indentation(line) + '#' + line newline += '\n' + ' '*indentation(line) + 'pass\n' line = newline return line # start of main code def make_noninteractive( line ): """ Make *line* non-interactive. * line = a python statement """ line = make_func_noninteractive(line) line = make_system_call_noninteractive(line) line = exclude_raw_input(line) return line def listCASATasks(): """ Return a list of all the CASA tasks. Also report the difference between the task list in this module and the task list obtained from CASA. Note that the appropriate list will vary with the version of CASA. This function requires casapy modlue *tasks*. In casapy: >>> import extractCASAscript >>> taskList = extractCASAscript.listCASATasks() Review the difference between the task lists here... To update the task list in this module, >>> print taskList Then, copy-paste the output list into this module. THIS COULD BE AUTOMATED IF THE SCRIPT EXTRACTOR WAS RUN WITHIN CASAPY! THE CURRENT DESIGN ASSUMES THIS IS NOT THE CASE. """ from tasks import allcat all_tasks=[] for key in allcat.keys(): for taskname in allcat[key]: if (taskname[0] == '(') or (taskname[0]=='{'): length = len(taskname) taskname = taskname[1:length-2] if (taskname in all_tasks) == False: all_tasks.append(taskname) all_tasks.sort() all_tasks_set = set(all_tasks) casa_tasks_set = set(casa_tasks) print "Tasks in casapy but not in this module: " + str(all_tasks_set.difference(casa_tasks_set)) print "Tasks in this module but not in casapy: " + str(casa_tasks_set.difference(all_tasks_set)) return all_tasks # ===================== # MAIN PROGRAM # ===================== def main( URL, options ): """ Create a Python script from a CASA Guide or existing Python script. * URL = URL to a CASA Guide web page (HTML) or a Python script. * options = options object created by optparse. If options.benchmark is true, output a Python benchmarking script. If URL to a CASA Guide, extract the Python from the guide and create an executable Python script. If options.benchmark is true, produce a benchmark test from the CASA Guide or existing Python script. If URL to a Python script, convert the script to a benchmarking script. """ # Determine if the input file is a Python script pyInput = False if ( URL[-3:].upper() == '.PY' ): pyInput = True # Pull the input file across the web or get it from local network responseLines = [] outFile = '' if ( URL[:4].upper() == 'HTTP' ): print "Acquiring " + URL req = urllib2.Request(URL) response = urllib2.urlopen(req) responseLines = response.read().split("\n") # Clean up the output file name outFile = URL.split('/')[-1] if not pyInput: outFile += '.py' outFile = outFile.replace("index.php?title=","") outFile = outFile.replace(":","") outFile = outFile.replace("_","") else: print "Copying " + URL + " to CWD." os.system('scp elwood:'+URL+' ./') outFile = os.path.basename(URL) localFile = open( outFile , 'r' ) responseLines = localFile.read().split("\n") # Initialize the parser and output line list readingCode = False lineList = [] if pyInput: lineList = responseLines else: # Loop over the lines read from the web page for line in responseLines: # If we are not currently reading code, see if this line # begins a python code block. if (readingCode == False): if beginBlock in line: readingCode = True outline = loseTheJunk(line) lineList += [outline] if endBlock in line: readingCode = False else: outline = loseTheJunk(line) lineList += [outline] if endBlock in line: readingCode = False # The python code is now loaded into a list of lines. Now compress the # lines into individual commands, allowing for commands to span multiple # lines. Lines are grouped by closed parentheses. compressedList = [] iline = 0 while iline < len(lineList): line = lineList[iline] pcount = countParen(line) while(pcount > 0): line += '\n' iline += 1 line += lineList[iline] pcount = countParen(line) line = string.expandtabs(line) compressedList += [line] iline += 1 print str(len(lineList))+" total lines become" print str(len(compressedList))+" compressed lines" # All modes for i,line in enumerate(compressedList): compressedList[i] = pythonize_shell_commands( compressedList[i] ) # Prepare for benchmark and noninteractive modes if options.benchmark or options.noninteractive: for i,line in enumerate(compressedList): compressedList[i] = make_noninteractive( compressedList[i] ) # Write script for benchmark mode if options.benchmark: task_list = [] task_nums = [] print "Writing file for execution in benchmarking mode." tasknum = 0 f = codecs.open(outFile, 'w','utf-8') header = benchmark_header( scriptName = outFile ) for line in header: print >>f, line for line in compressedList: if suppress_for_benchmark(line): print >>f, ' ' * indentation(line) + 'pass #' + \ line.replace('\n','') else: line = suppress_gui(line) if is_task_call(line): this_task = extract_task(line) print "I found a task call for ", this_task tasknum += 1 line = add_benchmarking(line,tasknum) task_list.append(this_task) task_nums.append(tasknum) print >>f, line print >>f, 'casa_call.summarize_bench( out_file, out_file+".summary" )' f.close() # Write task list to expectation file exp_file = outFile+'.expected' print "I am writing the expected flow to a file called "+exp_file f = codecs.open(exp_file, 'w','utf-8') for i in range(len(task_list)): print >>f, task_list[i], task_nums[i] f.close() else: # Write script for interactive and noninteractive modes f = codecs.open(outFile, 'w','utf-8') for line in compressedList: if options.plotmsoff: line = turnPlotmsOff(line) if options.noninteractive: if extract_task(line) == 'plotms': line = addNonInteractivePause(line) else: #interactive line = exclude_raw_input(line) mtch = interactive.match(line) if (mtch and not ("showgui=F" in line)): line = addInteractivePause(line) print >>f, line f.close() print "New file " + outFile + " written to current directory." print "In casapy, run the file using ", print 'execfile("' + outFile + '")' if __name__ == "__main__": usage = \ """ %prog [options] URL *URL* should point to a CASA Guide webpage or to a Python script. *URL* can also be a local file system path.""" parser = OptionParser( usage=usage ) parser.add_option( '-b', '--benchmark', action="store_true", default=False, help="produce benchmark test script" ) parser.add_option( '-n', '--noninteractive', action="store_true", default=False, help="make script non-interactive (non-benchmark mode only)") parser.add_option( '-p', '--plotmsoff', action="store_true", help="turn off all plotms commands") (options, args) = parser.parse_args() if len(args) != 1: parser.print_help() #raise ValueError("") sys.exit(1) main(args[0], options)