# Difference between revisions of "Fit an arbitrary sky (image) model to an existing MS"

Line 12: | Line 12: | ||

− | First suppose we have a calibrated ALMA 7m MS called | + | First suppose we have a calibrated ALMA 7m MS called <b>calibrated_final_cont.ms</b> and a sky model image in a FITS file called <b>myskymap.fits</b>. We need to import it into CASA as a CASA image and ensure it has the proper FREQUENCY and STOKES axes, which in our example we assume is not yet the case. There are two ways to do this: 1) the CASA importfits() task will fill these in for you if you set *defaultaxes=False* with the appropriate sub-parameters; 2) the simutil() module will allow you to do it. We illustrate the second case: |

<pre> | <pre> | ||

Line 46: | Line 46: | ||

</pre> | </pre> | ||

− | |||

+ | Currently there is a mismatch in the way real ALMA data are written and how the CASA simulator identifies antennas, with the result that we need to manually set the telescope name in the MS <b>for 7m data (only)</b>: for most other telescopes this will not be the case. We do this using the table toolkit, which exists in your casa environment as "tb"-- | ||

<pre> | <pre> | ||

# | # | ||

Line 63: | Line 63: | ||

</pre> | </pre> | ||

+ | Next we use the simulator toolkit--- which is instantiated in your CASA environment as "sm"--- to generate predicted model visibilities from the model image. Only three calls to sim are needed: one to open the MS; one to set the primary beam (which is done from the telescope name); and one to generate the predicted visibilities. | ||

<pre> | <pre> | ||

sm.openfromms(msname) | sm.openfromms(msname) | ||

− | + | # this sets the primary beam ("voltage pattern")- | |

− | |||

− | # this sets the primary beam | ||

sm.setvp() | sm.setvp() | ||

+ | # check sanity- | ||

sm.summary() | sm.summary() | ||

Line 75: | Line 75: | ||

# ***NOTE: the result has gone in the data column, not the model column | # ***NOTE: the result has gone in the data column, not the model column | ||

− | # check | + | # check sanity again - |

plotms(vis=msname,xaxis='uvwave',yaxis='amp',ydatacolumn='data',avgtime='1e8') | plotms(vis=msname,xaxis='uvwave',yaxis='amp',ydatacolumn='data',avgtime='1e8') | ||

− | # plot what the observed model would look like, sans noise- | + | # plot what the observed model would look like, sans noise (dirty map) - |

clean(vis=msname,niter=0,cell='2.5arcsec',imsize=[128,128],imagename='model7m') | clean(vis=msname,niter=0,cell='2.5arcsec',imsize=[128,128],imagename='model7m') | ||

+ | </pre> | ||

− | |||

− | |||

− | |||

− | |||

− | |||

− | |||

− | |||

− | |||

− | |||

− | + | Now we wish to extract the complex visibilities and their associated weights into python to see how well they match up (i.e., compute Chi-squared) and estimate a scale factor for the model from the data. We do this using the MS tool, which is instantiated in your CASA environment as "ms". | |

− | + | <pre> | |

− | |||

ms.open(msname) | ms.open(msname) | ||

# the selection can be tricky, but it seems to work fine with | # the selection can be tricky, but it seems to work fine with | ||

# the default parameters using the SPLIT() strategy we have | # the default parameters using the SPLIT() strategy we have | ||

− | # done above- | + | # done above- ***YMMV, BE SURE TO UNDERSTAND THE DATA SELECTION HERE CAREFULLY*** |

ms.selectinit() | ms.selectinit() | ||

− | # ms.getdata() returns a dictionary, each element | + | # ms.getdata() returns a dictionary, each element of which is a numpy array |

− | |||

mydat=ms.getdata(['real','imaginary','u','v','weight']) | mydat=ms.getdata(['real','imaginary','u','v','weight']) | ||

uu=mydat['u'] | uu=mydat['u'] | ||

Line 107: | Line 97: | ||

rr=mydat['real'] | rr=mydat['real'] | ||

ii=mydat['imaginary'] | ii=mydat['imaginary'] | ||

− | # | + | # The weights for the model and the data should be identical |

− | # | + | # and equal to 1/error^2 for each visibility. |

wt=mydat['weight'] | wt=mydat['weight'] | ||

ms.close() | ms.close() | ||

Line 123: | Line 113: | ||

r2.shape | r2.shape | ||

− | # built-in sqrt | + | # built-in sqrt method does not work elementwise so import numpy's version - |

− | |||

import numpy as np | import numpy as np | ||

uvrad=np.sqrt(uu**2+vv**2) | uvrad=np.sqrt(uu**2+vv**2) | ||

Line 130: | Line 119: | ||

a2=np.sqrt(r2**2+i2**2) | a2=np.sqrt(r2**2+i2**2) | ||

− | # first index indexes polarization (there are two --XX, YY) | + | # first index indexes polarization (there are two --XX, YY); second is null (probably SPW or IF, nominally)- |

− | |||

pl.plot(uvrad,ampl[0,0,:],'.') | pl.plot(uvrad,ampl[0,0,:],'.') | ||

− | # | + | # the above plot should match up with the PLOTMS() plot we made just after sm.predict() |

# calculate chi2- | # calculate chi2- | ||

Line 141: | Line 129: | ||

# total chi2 per DOF- | # total chi2 per DOF- | ||

c2.sum()/c2.size | c2.sum()/c2.size | ||

− | # = 2.31 | + | # = 2.31 for this dataset |

− | + | # ***NOTE that the absolute normalization of the weights may or may not be correct, so take the | |

− | # | + | # chi-squared values with a grain of salt. |

− | # | ||

# simple single-parameter linear least squares fit of the model to the data- | # simple single-parameter linear least squares fit of the model to the data- | ||

numerator = r2[:,0,:] * rr[:,0,:] * wt + i2[:,0,:]*ii[:,0,:] * wt | numerator = r2[:,0,:] * rr[:,0,:] * wt + i2[:,0,:]*ii[:,0,:] * wt | ||

denominator = (rr[:,0,:])**2 * wt + (ii[:,0,:])**2 * wt | denominator = (rr[:,0,:])**2 * wt + (ii[:,0,:])**2 * wt | ||

− | # | + | # linear least-squares scale factor between data and model - |

scalefac= numerator.sum() / denominator.sum() | scalefac= numerator.sum() / denominator.sum() | ||

− | # uncertainty in scalefac, if the weights are correct | + | # uncertainty in scalefac, if the weights are correct in an absolute sense - |

− | |||

escalefac=sqrt(1.0/denominator.sum()) | escalefac=sqrt(1.0/denominator.sum()) | ||

− | |||

</pre> | </pre> |

## Revision as of 12:36, 20 May 2015

**Fitting a Sky Model to an Existing Measurement Set in the UV Domain**

*version 20may2015, written with CASA v4.3.1*

This casaguide illustrates how to fit an arbitrary sky image model to an existing MS. Along the way it illustrates how to "get your hands" on the raw visibility data in the python interface and how to compute the Chi-squared of the visibility data to your given model. This could be useful if you have a set of source models you would like to compare in the UV domain, but the models are too complicated to be handled by the set of models provided in CASA's uvmodelfit() task. The methods illustrated here can be used to estimate the *scaling* and *goodness-of-fit* of your model to the MS. They could also be the starting point for a more comprehensive and sophisticated exploration of a set of possible models using the Chi-squared statistic, for instance.

The example assumes we have an ALMA 7m MS; we are interested in the continuum data only; and we have a sky model as a FITS image which we wish to compare with the UV data (i.e., the MS). As written, this tutorial requires you to process one spectral window (SPW) at a time.

First suppose we have a calibrated ALMA 7m MS called **calibrated_final_cont.ms** and a sky model image in a FITS file called **myskymap.fits**. We need to import it into CASA as a CASA image and ensure it has the proper FREQUENCY and STOKES axes, which in our example we assume is not yet the case. There are two ways to do this: 1) the CASA importfits() task will fill these in for you if you set *defaultaxes=False* with the appropriate sub-parameters; 2) the simutil() module will allow you to do it. We illustrate the second case:

default importfits fitsimage='myskymap.fits' imagename='myskymap.image' overwrite=True defaultaxes=False inp go from simutil import simutil util=simutil() # Set the INCENTER and INWIDTH for the SPW of interest from the listobs() util.modifymodel(inimage='myskymap.image', outimage='myskymapFixed.image', inbright='',indirection='',incell='',incenter='85.015425GHz', inwidth='1937.5MHz',innchan=1)

Next we split out the SPW and field(s) of interest --- in this example there happens to be only a single field, but the method would work with a mosaic--- and do some time averaging to reduce the data volume (the averaging by default will be limited by scan and field ID). Two copies of the MS are made; one with the split data, the other will be overwritten to contain the calculated visibilities for the model:

# split out one SPW and the field(s) of interest- # make two copies, one of which will get overwritten by the MOCK spwid='0' realmsname='calibrated_final_cont_spw'+spwid+'_AVG.ms' split(vis='calibrated_final_cont.ms',outputvis=msname,field='4', spw=spwid,combine='scan',timebin='1e8') msname='calibrated_final_cont_spw'+spwid+'_AVG_MOCK.ms' split(vis='calibrated_final_cont.ms',outputvis=msname,field='4', spw=spwid,combine='scan',timebin='1e8')

Currently there is a mismatch in the way real ALMA data are written and how the CASA simulator identifies antennas, with the result that we need to manually set the telescope name in the MS **for 7m data (only)**: for most other telescopes this will not be the case. We do this using the table toolkit, which exists in your casa environment as "tb"--

# # set the telescope name manually- the need for this is probably a defect - # (sim tool infers primary beam from telescope name) # ***DO NOT DO THIS FOR THE 12M DATA!!!*** # tb.open(msname+'/OBSERVATION',nomodify=False) tel=tb.getcol("TELESCOPE_NAME") for i in range(len(tel)): tel[i]="ACA" tb.putcol("TELESCOPE_NAME",tel) tb.done()

Next we use the simulator toolkit--- which is instantiated in your CASA environment as "sm"--- to generate predicted model visibilities from the model image. Only three calls to sim are needed: one to open the MS; one to set the primary beam (which is done from the telescope name); and one to generate the predicted visibilities.

sm.openfromms(msname) # this sets the primary beam ("voltage pattern")- sm.setvp() # check sanity- sm.summary() sm.predict(imagename='lyszmapjyperpixRightSignFixed.image') # ***NOTE: the result has gone in the data column, not the model column # check sanity again - plotms(vis=msname,xaxis='uvwave',yaxis='amp',ydatacolumn='data',avgtime='1e8') # plot what the observed model would look like, sans noise (dirty map) - clean(vis=msname,niter=0,cell='2.5arcsec',imsize=[128,128],imagename='model7m')

Now we wish to extract the complex visibilities and their associated weights into python to see how well they match up (i.e., compute Chi-squared) and estimate a scale factor for the model from the data. We do this using the MS tool, which is instantiated in your CASA environment as "ms".

ms.open(msname) # the selection can be tricky, but it seems to work fine with # the default parameters using the SPLIT() strategy we have # done above- ***YMMV, BE SURE TO UNDERSTAND THE DATA SELECTION HERE CAREFULLY*** ms.selectinit() # ms.getdata() returns a dictionary, each element of which is a numpy array mydat=ms.getdata(['real','imaginary','u','v','weight']) uu=mydat['u'] vv=mydat['v'] rr=mydat['real'] ii=mydat['imaginary'] # The weights for the model and the data should be identical # and equal to 1/error^2 for each visibility. wt=mydat['weight'] ms.close() # now get visibility values from the actual (not mock) data- ms.open(realmsname) ms.selectinit() realdat=ms.getdata(['real','imaginary','u','v','weight']) r2=realdat['real'] i2=realdat['imaginary'] ms.close() # what are the dims of the data? r2.shape # built-in sqrt method does not work elementwise so import numpy's version - import numpy as np uvrad=np.sqrt(uu**2+vv**2) ampl=np.sqrt(rr**2+ii**2) a2=np.sqrt(r2**2+i2**2) # first index indexes polarization (there are two --XX, YY); second is null (probably SPW or IF, nominally)- pl.plot(uvrad,ampl[0,0,:],'.') # the above plot should match up with the PLOTMS() plot we made just after sm.predict() # calculate chi2- c2= (r2[:,0,:]-rr[:,0,:])**2 * wt + (i2[:,0,:]-ii[:,0,:])**2 * wt # typical data point contributes 1.6 to chi2- np.median(c2) # total chi2 per DOF- c2.sum()/c2.size # = 2.31 for this dataset # ***NOTE that the absolute normalization of the weights may or may not be correct, so take the # chi-squared values with a grain of salt. # simple single-parameter linear least squares fit of the model to the data- numerator = r2[:,0,:] * rr[:,0,:] * wt + i2[:,0,:]*ii[:,0,:] * wt denominator = (rr[:,0,:])**2 * wt + (ii[:,0,:])**2 * wt # linear least-squares scale factor between data and model - scalefac= numerator.sum() / denominator.sum() # uncertainty in scalefac, if the weights are correct in an absolute sense - escalefac=sqrt(1.0/denominator.sum())