ProFit: Galaxy Fitting Example

Aaron Robotham & Dan Taranu

2019-07-23

Get the latest version of ProFit:

library(devtools)
install_github('ICRAR/ProFit')

First load the libraries we need:

library(ProFit)
## Loading required package: FITSio
## Loading required package: magicaxis

Prepare the test data

Next we load a table of data describing GAMA galaxies:

data('ExampleInit', package="ProFit")
head(ExampleInit, 10)
##    CATAID sersic.xcen1 sersic.ycen1 sersic.mag1 sersic.mag2 sersic.re1
## 1  265769     122.8069      87.3328    17.08907    17.08907    8.71420
## 2  265911      84.8832      94.5951    16.83217    16.83217    7.05740
## 3  265940      80.3975      52.2173    18.04857    18.04857    5.96290
## 4  265943      53.7013      83.6956    17.70547    17.70547    6.42060
## 5  265981     148.8727     106.4069    16.85617    16.85617   11.00100
## 6  265986      78.1780      79.9999    18.97747    18.97747    6.80575
## 7  265985      51.3467      73.0023    19.03897    19.03897    5.77065
## 8  266033      66.0826      79.0872    17.39137    17.39137    6.25480
## 9  266035     105.1495      78.5289    18.18017    18.18017   11.38780
## 10 266105     264.8947     219.5667    15.45027    15.45027   36.68265
##    sersic.re2 sersic.nser1 sersic.ang2 sersic.axrat2
## 1     17.4284       3.4292    105.8498        0.1541
## 2     14.1148       4.3776    140.8191        0.4891
## 3     11.9258       4.6010    112.2746        0.4875
## 4     12.8412       4.5086    168.2385        0.4179
## 5     22.0020       2.9434     59.6690        0.2621
## 6     13.6115       2.0082    103.2134        0.0423
## 7     11.5413       2.7402      0.7028        0.5400
## 8     12.5096       4.9178     39.5336        0.3336
## 9     22.7756       2.9550     56.3237        0.6222
## 10    73.3653       3.9704     94.4409        0.3477

There are 2 data source options: KiDS or SDSS (the galaxies are the same)

datasource='KiDS'

Now we can extract out the example files we have available for fitting by checking the contents of the directory containing the example FITS files:

ExampleFiles=list.files(system.file("extdata",datasource,package="ProFit"))
ExampleIDs=unlist(strsplit(ExampleFiles[grep('fitim',ExampleFiles)],'fitim.fits'))
ExampleIDs
##  [1] "G265911" "G265940" "G266033" "G266035" "G266662" "G267199" "G267489"
##  [8] "G267525" "G278109" "G279148"

There are 10 example galaxies included. Here we run example 1:

useID=ExampleIDs[1]
image=readFITS(system.file("extdata", paste(datasource,'/',useID,'fitim.fits',sep=''),package="ProFit"))$imDat
sigma=readFITS(system.file("extdata", paste(datasource,'/',useID,'sigma.fits',sep=''),package="ProFit"))$imDat
segim=readFITS(system.file("extdata", paste(datasource,'/',useID,'segim.fits',sep=''),package="ProFit"))$imDat
psf=readFITS(system.file("extdata", paste(datasource,'/',useID,'psfim.fits',sep=''),package="ProFit"))$imDat

Next we extract parameters for a very rough model (not meant to look too good yet):

useIDnum=as.integer(strsplit(useID,'G')[[1]][2])
useloc=which(ExampleInit$CATAID==useIDnum)

Setup the fitting data structures

For our initial model we treat component 1 as the putative bulge and component 2 as the putative disk. We are going to attempt a fit where the disk is forced to have nser=1 and the bulge has an axial ratio of 1.

modellist=list(
  sersic=list(
    xcen= c(dim(image)[1]/2, dim(image)[1]/2),
    ycen= c(dim(image)[2]/2, dim(image)[2]/2),
    mag= c(ExampleInit$sersic.mag1[useloc], ExampleInit$sersic.mag2[useloc]),
    re= c(ExampleInit$sersic.re1[useloc], ExampleInit$sersic.re2[useloc])*
      if(datasource=='KiDS'){1}else{0.2/0.339},
    nser= c(ExampleInit$sersic.nser1[useloc], 1),  #Disk is initially nser=1
    ang= c(ExampleInit$sersic.ang2[useloc], ExampleInit$sersic.ang2[useloc]),
    axrat= c(1, ExampleInit$sersic.axrat2[useloc]),  #Bulge is initially axrat=1
    box=c(0, 0)
  )
)
modellist
## $sersic
## $sersic$xcen
## [1] 84.5 84.5
## 
## $sersic$ycen
## [1] 93.5 93.5
## 
## $sersic$mag
## [1] 16.83217 16.83217
## 
## $sersic$re
## [1]  7.0574 14.1148
## 
## $sersic$nser
## [1] 4.3776 1.0000
## 
## $sersic$ang
## [1] 140.8191 140.8191
## 
## $sersic$axrat
## [1] 1.0000 0.4891
## 
## $sersic$box
## [1] 0 0

The pure model (no PSF):

magimage(profitMakeModel(modellist,dim=dim(image)))

The original image:

magimage(image)

The convolved model (with PSF):

magimage(profitMakeModel(modellist,dim=dim(image),psf=psf))

Next we define our list of what we want to fit (where TRUE means we will fit it later):

tofit=list(
  sersic=list(
    xcen= c(TRUE,NA), #We fit for xcen and tie the two togther
    ycen= c(TRUE,NA), #We fit for ycen and tie the two togther
    mag= c(TRUE,TRUE), #Fit for both
    re= c(TRUE,TRUE), #Fit for both
    nser= c(TRUE,FALSE), #Fit for bulge
    ang= c(FALSE,TRUE), #Fit for disk
    axrat= c(FALSE,TRUE), #Fit for disk
    box= c(FALSE,FALSE) #Fit for neither
  )
)

Now we define what parameters should be fitted in log space:

tolog=list(
  sersic=list(
    xcen= c(FALSE,FALSE),
    ycen= c(FALSE,FALSE),
    mag= c(FALSE,FALSE),
    re= c(TRUE,TRUE), #re is best fit in log space
    nser= c(TRUE,TRUE), #nser is best fit in log space
    ang= c(FALSE,FALSE),
    axrat= c(TRUE,TRUE), #axrat is best fit in log space
    box= c(FALSE,FALSE)
  )
)

Now we specify the priors object. The priors object is really a function that takes modellist as an input and returns an addative log-likelihood sum that increments the total log-likelihood for the current realisation of the model.

The elements you access within the modellist structure must really exist or ProFit will stop with an error. The modellist used internally will be the linear (un-logged version), so logging must be made within the function if, e.g., log-normal distributions are required. You do not have to apply priors to each parameter- notice below a few are left out.

priors=function(new,init,sigmas=c(2,2,2,2,5,5,1,1,1,1,30,30,0.3,0.3)){
  LL=sum(
      dnorm(new$sersic$xcen,init$sersic$xcen,sigmas[1:2],log=TRUE),
      dnorm(new$sersic$ycen,init$sersic$ycen,sigmas[3:4],log=TRUE),
      dnorm(new$sersic$mag,init$sersic$mag,sigmas[5:6],log=TRUE),
      dnorm(log10(new$sersic$re),log10(init$sersic$re),sigmas[7:8],log=TRUE),
      dnorm(log10(new$sersic$nser),log10(init$sersic$nser),sigmas[9:10],log=TRUE),
      dnorm(log10(new$sersic$axrat),log10(init$sersic$axrat),sigmas[13:14],log=TRUE)
  )
  return(LL)
}
print(priors(modellist,modellist))
## [1] -14.61078

Notice that the values for sigmas were defined along with the function definition. In practice, you may want to define them separately as such:

sigmavec=c(2,2,5,1,1,30,0.3,0.2)
priors=function(new,init,sigmas=sigmavec){
  return(sum(2*dnorm(log10(new$sersic$re),log10(init$sersic$re),sigmas[4],log=TRUE)))
}
print(priors(modellist,modellist))
## [1] -3.675754

Unfortunately, R does not store the provided values of sigmas, but simply the name of a variable sigma, so if you happen to redefine sigma later on, the prior function will change (or fail entirely if you delete sigma or save and then load a fit):

sigmavec=sigmavec/2
print(priors(modellist,modellist))
## [1] -0.9031654
print(priors)
## function(new,init,sigmas=sigmavec){
##   return(sum(2*dnorm(log10(new$sersic$re),log10(init$sersic$re),sigmas[4],log=TRUE)))
## }
## <bytecode: 0x7f9975f19eb0>

Because of this issue, we provide a utility function to create a valid prior function, saving the values of the prior distributions:

sigmas=c(2,2,5,1,1,30,0.3,Inf)

sigmas=list(
  sersic=list(
    xcen= numeric(2)+sigmas[1],
    ycen= numeric(2)+sigmas[2],
    mag= numeric(2)+sigmas[3],
    re= numeric(2)+sigmas[4],
    nser= numeric(2)+sigmas[5],
    ang= numeric(2)+sigmas[6],
    axrat= numeric(2)+sigmas[7],
    box= numeric(2)+sigmas[8]
  )
)

priors=profitMakePriors(modellist, sigmas, tolog, allowflat=TRUE)
print(priors(modellist,modellist))
## [1] -23.25105
print(priors)
## function (new, init, sigmas = c(sersic.xcen1 = 2, sersic.xcen2 = 2, 
## sersic.ycen1 = 2, sersic.ycen2 = 2, sersic.mag1 = 5, sersic.mag2 = 5, 
## sersic.re1 = 1, sersic.re2 = 1, sersic.nser1 = 1, sersic.nser2 = 1, 
## sersic.ang1 = 30, sersic.ang2 = 30, sersic.axrat1 = 0.3, sersic.axrat2 = 0.3, 
## sersic.box1 = Inf, sersic.box2 = Inf), tolog = c(sersic.xcen1 = FALSE, 
## sersic.xcen2 = FALSE, sersic.ycen1 = FALSE, sersic.ycen2 = FALSE, 
## sersic.mag1 = FALSE, sersic.mag2 = FALSE, sersic.re1 = TRUE, 
## sersic.re2 = TRUE, sersic.nser1 = TRUE, sersic.nser2 = TRUE, 
## sersic.ang1 = FALSE, sersic.ang2 = FALSE, sersic.axrat1 = TRUE, 
## sersic.axrat2 = TRUE, sersic.box1 = FALSE, sersic.box2 = FALSE
## ), tofit = NULL, means = unlist(init), allowflat = TRUE) 
## {
##     LL = 0
##     parms = unlist(new)
##     if (!is.null(tofit)) 
##         ps = which(tofit)
##     else ps = 1:length(parms)
##     for (p in ps) {
##         if (!(allowflat && (sigmas[p] == Inf))) {
##             parm = parms[[p]]
##             mean = means[[p]]
##             if (tolog[p]) {
##                 parm = log10(parm)
##                 mean = log10(mean)
##             }
##             LL = LL + dnorm(parm, mean, sigmas[p], log = TRUE)
##         }
##     }
##     return = LL
## }
## <bytecode: 0x7f9977431898>

The hard intervals should also be specified in linear space:

intervals=list(
  sersic=list(
    xcen=list(lim=c(0,300),lim=c(0,300)),
    ycen=list(lim=c(0,300),lim=c(0,300)),
    mag=list(lim=c(10,30),lim=c(10,30)),
    re=list(lim=c(1,100),lim=c(1,100)),
    nser=list(lim=c(0.5,20),lim=c(0.5,20)),
    ang=list(lim=c(-180,360),lim=c(-180,360)),
    axrat=list(lim=c(0.1,1),lim=c(0.1,1)),
    box=list(lim=c(-1,1),lim=c(-1,1))
  )
)

It is not a requirement to have a constraints object that do more complex manipulations of the modellist, but it is useful to stop undesirable (perhaps unphysical) exploration.

The constraint object is really a function that takes modellist as an input and returns a modifed (but identical in skeleton structure) modellist output. For this reason it is generally best to make manipulations on the modellist directly rather than attempting to unlist (collapse) it and rebuild it etc (this can be done, but it is prone to coding error).

The elements you access within the modellist structure must really exist or ProFit will stop with an error. All constraints manipulations happen internally to the model in linear (un-logged) form.

Setup the data structure we need for optimisation, taking a few seconds to find the optimal convolution method:

Data=profitSetupData(image=image, sigma=sigma, segim=segim, psf=psf,
                     modellist=modellist, tofit=tofit, tolog=tolog, priors=priors,
                     intervals=intervals,magzero=0, algo.func='optim', like.func="t",
                     verbose=TRUE)

This produces a fairly complex R object, but with all the bits we need for fitting, e.g. (notice the tolog parameteres are now logged):

Data$init
##  sersic.xcen1  sersic.ycen1   sersic.mag1   sersic.mag2    sersic.re1 
##    84.5000000    93.5000000    16.8321750    16.8321750     0.8486447 
##    sersic.re2  sersic.nser1   sersic.ang2 sersic.axrat2 
##     1.1496747     0.6412361   140.8191000    -0.3106023

These are the parameters we wish to fit for, and we take the initial guesses from the model list we provided before.

We can test how things currently look (we get an output because we set verbose=TRUE earlier):

profitLikeModel(parm=Data$init, Data=Data, makeplots=TRUE)
## [1] "Summary of used sample:"
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -76.6588  -0.4123   0.4049   0.2498   1.2617  23.7657 
## [1] "sd / 1-sig / 2-sig range:"
## [1] 3.558640 1.280227 4.606598
## [1] "Using 16777 out of 16777"

##  sersic.xcen1  sersic.ycen1   sersic.mag1   sersic.mag2    sersic.re1 
##    8.4500e+01    9.3500e+01    1.6832e+01    1.6832e+01    8.4864e-01 
##    sersic.re2  sersic.nser1   sersic.ang2 sersic.axrat2               
##    1.1497e+00    6.4124e-01    1.4082e+02   -3.1060e-01   -3.3484e+04

Do some fitting

First we will try optim BFGS.

sigmas=c(2,2,2,2,5,5,1,1,1,1,30,30,0.3,0.3)
print(system.time({optimfit = optim(Data$init, profitLikeModel, method='BFGS', Data=Data,
  control=list(fnscale=-1,parscale=sigmas[which(unlist(tofit))]))}))
print(optimfit$value)

The best optim BFGS fit is given by:

optimfit$par

Check it out:

profitLikeModel(optimfit$par,Data,makeplots=TRUE,whichcomponents=list(sersic=1))
profitLikeModel(optimfit$par,Data,makeplots=TRUE,whichcomponents=list(sersic=2))
profitLikeModel(optimfit$par,Data,makeplots=TRUE,whichcomponents=list(sersic='all'))
modeloptim=profitRemakeModellist(parm=optimfit$par, Data=Data)$modellist
profitEllipsePlot(Data,modeloptim,pixscale=0.2,FWHM=0.5,SBlim=26)

Using constraints it is possible to force relative sizes between components etc. Here we do not want the bulge Re to become larger than the disk Re. We make the function and then put it in the Data list manually. You cannot properly use constraints using R’s optim function, but you can use LaplaceApproximation and LaplacesDemon because they receive modifications to the input parameters.

constraints=function(modellist){
  if(modellist$sersic$re[1]>modellist$sersic$re[2]){
    modellist$sersic$re[1]=modellist$sersic$re[2]
  }
  return=modellist
}
Data$constraints=constraints

Another useful library for non-linear optimization is nloptr. nloptr is an interface to a C library with numerous optimizers. The required C library nlopt-devel should be available to install from your favourite package manager. For optimal performance, we’ll set up a vector of upper and lower bounds for the parameters. These can also be used in optim with the L-BFGS-B optimizer.

if(isTRUE("nloptr" %in% rownames(installed.packages())))
{
  library(nloptr)
  intervals = unlist(Data$intervals)
  tofit = unlist(Data$tofit)
  npar = length(tofit)
  
  whichfit = which(tofit)
  whichfitlog = which(unlist(Data$tolog)[whichfit])
  lower = intervals[2*(1:npar)-1][whichfit]
  upper = intervals[2*(1:npar)][whichfit]
  lower[whichfitlog]=log10(lower[whichfitlog])
  upper[whichfitlog]=log10(upper[whichfitlog])
  
  system.time({nloptfit = nloptr(
    x0 = Data$init, eval_f=function(x,Data) { return(-profitLikeModel(x,Data=Data))}, lb = lower, ub = upper,
    opts=list(algorithm=paste0("NLOPT_LN_NELDERMEAD"),xtol_rel=0,ftol_abs=1e-3,maxeval=2e3), Data=Data)})
  print(-nloptfit$objective)
}

One important consideration is how to determine convergence. Most optimizers test for convergence based on absolute and/or relative changes in the likelihood and/or parameters, but can also stop after a given elapsed time or number of iterations. It is important to check why the optimization stopped, especially before running MCMC, which can be slower to converge to the global minimum.

Now we can try a LaplaceApproximation LM fit. This should take a few minutes:

if(isTRUE("LaplacesDemon" %in% rownames(installed.packages())))
{
  library(LaplacesDemon)
  Data$algo.func="LA"
  system.time({LAfit = LaplaceApproximation(profitLikeModel, parm=Data$init, Data=Data, Iterations=1e3,
    Method='LM', CovEst='Identity', sir=FALSE)})
  print(LAfit$LP.Final)
  system.time({LAfitnm = LaplaceApproximation(profitLikeModel, parm=Data$init, Data=Data, Iterations=1e3,
    Method='NM', CovEst='Identity', sir=FALSE)})
  print(LAfitnm$LP.Final)
}

Note that LaplaceApproximation is typically slower than nlopt/optim, but it can be used to estimate uncertainties more quickly than by doing MCMC. The best LA LM fit is given by:

LAfit$Summary1[,1]

Check it out:

profitLikeModel(LAfit$Summary1[,1],Data,makeplots=TRUE,whichcomponents=list(sersic=1))
profitLikeModel(LAfit$Summary1[,1],Data,makeplots=TRUE,whichcomponents=list(sersic=2))
profitLikeModel(LAfit$Summary1[,1],Data,makeplots=TRUE,whichcomponents=list(sersic='all'))

modelLA=profitRemakeModellist(LAfit$Summary1[,1],Data$modellist,Data$tofit,Data$tolog)$modellist
profitEllipsePlot(Data,modelLA,pixscale=0.2,FWHM=0.5,SBlim=26)

Other optimizers can be used. One particularly effective algorithm is CMA-ES (Covariance Matrix Adaptation - Evolutionary Strategy). CMA-ES samples multiple points (members of a population) from the supplied priors, and then adapts the priors each iteration, shrinking the parameter space that points are sampled from to converge on the best fit. It is a popular optimizer as it is fairly robust (but not immune) to becoming trapped in local minima while still fairly quick to converge.

First make sure that the cmaeshpc package is installed:

library(devtools)
install_github('taranu/cmaeshpc')

It is recommended to use narrower priors than the very broad ones specified above to speed up convergence:

if(isTRUE("cmaeshpc" %in% rownames(installed.packages())))
{
  library(cmaeshpc)
  Data$algo.func="CMA"
  sigmas=c(2,2,2,2,5,5,1,1,1,1,30,30,0.3,0.3)
  cmasigma=sigmas[which(unlist(tofit) == TRUE)]/3
  cmafit=cmaeshpc(Data$init, profitLikeModel, Data=Data, control=list(maxit=1e3,
    fnscale=-1.0, sigma=cmasigma, diag.sigma=TRUE, diag.eigen=TRUE, diag.pop=TRUE,
    diag.value=TRUE, maxwalltime=Inf, trace=TRUE, stopfitness=0, stop.tolx=1e-3*cmasigma))
  profitLikeModel(cmafit$par,Data,makeplots=TRUE)
}

CMA-ES sometimes takes longer than LaplaceApproximation - depending on the convergence criterion specified by stop.tolx - but it usually finds a better fit, and can be run many times to avoid becoming trapped in local minima. Alternately, you may wish to use the faster LaplaceApproximation first, redefine your priors, and then run CMA-ES to search around the LaplaceApproximation best fit.

Now we can try a LaplacesDemon fit (this will take about an hour):

Data$algo.func="LD"

LDfit=LaplacesDemon(profitLikeModel, Initial.Values=LAfit$Summary1[,1], Data=Data,
  Iterations=1e4, Algorithm='CHARM', Thinning=1, Specs=list(alpha.star=0.44))

If it has converged well you will have a Summary2 structure using the ESS:

LDfit$Summary2

If not you can still check Summary1:

LDfit$Summary1

The global fit should be close to the initial LA fit (shown in blue in the following figures).

With any luck you have enough stationary samples to run:

BestLD=magtri(LDfit$Posterior2, samples=500, samptype='ran')

Otherwise try:

BestLD=magtri(LDfit$Posterior1, samples=1000, samptype='end')

We can now check our final fit:

profitLikeModel(BestLD,Data,makeplots=TRUE,whichcomponents=list(sersic=1))
profitLikeModel(BestLD,Data,makeplots=TRUE,whichcomponents=list(sersic=2))
profitLikeModel(BestLD,Data,makeplots=TRUE,whichcomponents=list(sersic='all'))

modelLD=profitRemakeModellist(BestLD,Data$modellist,Data$tofit,Data$tolog)$modellist
profitEllipsePlot(Data,modelLD,pixscale=0.2,FWHM=0.5,SBlim=26)

In the previous examples, the resolution of the convolved model was limited by the size of the pixels, since convolution can only spread flux from pixel-to-pixel. We can do better by sampling the model and PSF on a finer grid than the pixel scale (“fine-sampling”), through interpolation for empirical PSFs.

To test this, set up the data again. This time we will fine-sample the model and PSF by a factor of 3, taking a minute to benchmark convolution methods:

Dataf=profitSetupData(image=image, sigma=sigma, segim=segim, psf=psf,
  modellist=modellist, tofit=tofit, tolog=tolog, priors=priors, intervals=intervals,
  magzero=0, algo.func='LD', verbose=TRUE, nbenchmark=3L, finesample=3L)

Note that profitSetupData automagically fine-sampled the PSF by interpolation. Usually, brute-force convolution is faster than an FFT (which requires 2x padding to avoid artifacts), but it scales as finesample^4, so FFT is often faster with large images and/or PSFs.

Let’s check to see how the fine-sampled model looks:

profitLikeModel(BestLD,Dataf,makeplots=TRUE,whichcomponents=list(sersic='all'))

That doesn’t look so different, but let’s run LaplaceApproximation again to see how the best-fit parameters changed:

Dataf$algo.func="LA"
LAfitf=LaplaceApproximation(profitLikeModel, parm=LAfit$Summary1[,1], Data=Dataf, Iterations=1e3,
  Method='BFGS', CovEst='Identity', sir=FALSE)

Does the new best fit look slightly better? It should:

profitLikeModel(LAfitf$Summary1[,1],Dataf,makeplots=TRUE)

Now run LaplacesDemon again, with fewer iterations to begin with (as it’s slower to convolve):

Dataf$algo.func="LD"

LDfitf=LaplacesDemon(profitLikeModel, Initial.Values=LAfitf$Summary1[,1], Data=Dataf,
  Iterations=1e3, Algorithm='CHARM', Thinning=1, Specs=list(alpha.star=0.44))

If you run the above for 1e4 iterations (will take several hours), try comparing posteriors:

LDfit$Summary2
LDfitf$Summary2

BestLDf=magtri(LDfit$Posterior1, samptype='end')
profitLikeModel(BestLDf,Dataf,makeplots=TRUE)