ctools  2.0.0
 All Classes Namespaces Files Functions Variables Macros Pages
cspull.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 # ==========================================================================
3 # This script generates the pull distribution for all model parameters.
4 #
5 # Copyright (C) 2011-2022 Juergen Knoedlseder
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #
20 # ==========================================================================
21 import sys
22 import gammalib
23 import ctools
24 from cscripts import obsutils
25 from cscripts import ioutils
26 from cscripts import mputils
27 
28 
29 # ============ #
30 # cspull class #
31 # ============ #
32 class cspull(ctools.csobservation):
33  """
34  Generates pull distributions for a model
35  """
36  # Constructor
37  def __init__(self, *argv):
38  """
39  Constructor
40  """
41  # Initialise application by calling the appropriate class constructor
42  self._init_csobservation(self.__class__.__name__, ctools.__version__, argv)
43 
44  # Initialise class members
45  self._fits = None
46  self._nthreads = 0
47 
48  # Return
49  return
50 
51  # State methods por pickling
52  def __getstate__(self):
53  """
54  Extend ctools.csobservation getstate method to include some members
55 
56  Returns
57  -------
58  state : dict
59  Pickled instance
60  """
61  # Set pickled dictionary
62  state = {'base' : ctools.csobservation.__getstate__(self),
63  'fits' : self._fits,
64  'nthreads' : self._nthreads}
65 
66  # Return pickled dictionary
67  return state
68 
69  def __setstate__(self, state):
70  """
71  Extend ctools.csobservation setstate method to include some members
72 
73  Parameters
74  ----------
75  state : dict
76  Pickled instance
77  """
78  ctools.csobservation.__setstate__(self, state['base'])
79  self._fits = state['fits']
80  self._nthreads = state['nthreads']
81 
82  # Return
83  return
84 
85 
86  # Private methods
87  def _get_parameters(self):
88  """
89  Get parameters from parfile
90  """
91  # If there are no observations in container then get some ...
92  if self.obs().is_empty():
93  self.obs(self._get_observations())
94 
95  # ... otherwise add response information and energy boundaries
96  # in case they are missing
97  else:
98  self._setup_observations(self.obs())
99 
100  # Set observation statistic
101  self._set_obs_statistic(gammalib.toupper(self['statistic'].string()))
102 
103  # Get number of energy bins
104  enumbins = self['enumbins'].integer()
105 
106  # Query parameters for On/Off observation
107  if gammalib.toupper(self['onsrc'].string()) != 'NONE':
108  self['onrad'].real()
109 
110  # Query parameters for binned if requested
111  elif enumbins > 0:
112  self['npix'].integer()
113  self['binsz'].real()
114  self['coordsys'].string()
115  self['proj'].string()
116 
117  # Set models if we have none
118  if self.obs().models().is_empty():
119  self.obs().models(self['inmodel'].filename())
120 
121  # Query other parameters
122  self['ntrials'].integer()
123  self['edisp'].boolean()
124  self['seed'].integer()
125  self['chatter'].integer()
126 
127  # Query some parameters
128  self['profile'].boolean()
129 
130  # Read ahead output parameters
131  if self._read_ahead():
132  self['outfile'].filename()
133 
134  # Write input parameters into logger
135  self._log_parameters(gammalib.TERSE)
136 
137  # Set number of processes for multiprocessing
138  self._nthreads = mputils.nthreads(self)
139 
140  # Return
141  return
142 
143  def _obs_string(self, obs):
144  """
145  Generate summary string for observation
146 
147  Parameters
148  ----------
149  obs : `~gammalib.GCTAObservation`
150  Observation
151 
152  Returns
153  -------
154  text : str
155  Summary string
156  """
157  # Extract information from observation
158  if obs.classname() == 'GCTAOnOffObservation':
159  emin = obs.on_spec().ebounds().emin().TeV()
160  emax = obs.on_spec().ebounds().emax().TeV()
161  mode = 'On/Off'
162  else:
163  emin = obs.events().ebounds().emin().TeV()
164  emax = obs.events().ebounds().emax().TeV()
165  binned = (obs.events().classname() == 'GCTAEventCube')
166  if binned:
167  mode = 'binned'
168  else:
169  mode = 'unbinned'
170  events = obs.nobserved()
171 
172  # Compose summary string
173  if events > 0:
174  text = '%d events, %.3f-%.3f TeV, %s' % (events, emin, emax, mode)
175  else:
176  text = '%.3f-%.3f TeV, %s' % (emin, emax, mode)
177 
178  # Return summary string
179  return text
180 
181  def _trial(self, seed):
182  """
183  Compute the pull for a single trial
184 
185  Parameters
186  ----------
187  seed : int
188  Random number generator seed
189 
190  Returns
191  -------
192  result : dict
193  Dictionary of results
194  """
195  # Write header
196  self._log_header2(gammalib.NORMAL, 'Trial %d' %
197  (seed-self['seed'].integer()+1))
198 
199  # Get number of energy bins and On source name and initialise
200  # some parameters
201  nbins = self['enumbins'].integer()
202  onsrc = self['onsrc'].string()
203  edisp = self['edisp'].boolean()
204  statistic = self['statistic'].string()
205  emin = None
206  emax = None
207  binsz = 0.0
208  npix = 0
209  proj = 'TAN'
210  coordsys = 'CEL'
211 
212  # If we have a On source name then set On region radius
213  if gammalib.toupper(onsrc) != 'NONE':
214  onrad = self['onrad'].real()
215  emin = self['emin'].real()
216  emax = self['emax'].real()
217  edisp = True # Use always energy dispersion for On/Off
218  else:
219 
220  # Reset On region source name and radius
221  onrad = 0.0
222  onsrc = None
223 
224  # If we have a binned obeservation then specify the lower and
225  # upper energy limit in TeV
226  if nbins > 0:
227  emin = self['emin'].real()
228  emax = self['emax'].real()
229  binsz = self['binsz'].real()
230  npix = self['npix'].integer()
231  proj = self['proj'].string()
232  coordsys = self['coordsys'].string()
233 
234  # Simulate events
235  obs = obsutils.sim(self.obs(),
236  emin=emin, emax=emax, nbins=nbins,
237  onsrc=onsrc, onrad=onrad,
238  addbounds=True, seed=seed,
239  binsz=binsz, npix=npix, proj=proj, coord=coordsys,
240  edisp=edisp, nthreads=1, log=False,
241  debug=self._logDebug(), chatter=self['chatter'].integer())
242 
243  # Determine number of events in simulation
244  nevents = 0.0
245  for run in obs:
246  nevents += run.nobserved()
247 
248  # Write simulation results
249  self._log_header3(gammalib.NORMAL, 'Simulation')
250  for run in self.obs():
251  self._log_value(gammalib.NORMAL, 'Input observation %s' % run.id(),
252  self._obs_string(run))
253  for run in obs:
254  self._log_value(gammalib.NORMAL, 'Output observation %s' % run.id(),
255  self._obs_string(run))
256  self._log_value(gammalib.NORMAL, 'Number of simulated events', nevents)
257 
258  # Fit model
259  if self['profile'].boolean():
260  models = self.obs().models()
261  for model in models:
262  like = ctools.cterror(obs)
263  like['srcname'] = model.name()
264  like['edisp'] = edisp
265  like['statistic'] = statistic
266  like['nthreads'] = 1 # Avoids OpenMP conflict
267  like['debug'] = self._logDebug()
268  like['chatter'] = self['chatter'].integer()
269  like.run()
270  else:
271  like = ctools.ctlike(obs)
272  like['edisp'] = edisp
273  like['statistic'] = statistic
274  like['nthreads'] = 1 # Avoids OpenMP conflict
275  like['debug'] = self._logDebug()
276  like['chatter'] = self['chatter'].integer()
277  like.run()
278 
279  # Store results
280  logL = like.opt().value()
281  npred = like.obs().npred()
282  models = like.obs().models()
283 
284  # Write result header
285  self._log_header3(gammalib.NORMAL, 'Pulls')
286 
287  # Gather results in form of a list of result columns and a
288  # dictionary containing the results. The result contains the
289  # log-likelihood, the number of simulated events, the number of
290  # predicted events and for each fitted parameter the fitted value,
291  # the pull and the fit error.
292  #
293  # Note that we do not use the model and parameter iterators
294  # because we need the indices to get the true (or real) parameter
295  # values from the input models.
296  colnames = ['LogL', 'Sim_Events', 'Npred_Events']
297  values = {'LogL': logL, 'Sim_Events': nevents, 'Npred_Events': npred}
298  for i in range(models.size()):
299  model = models[i]
300  for k in range(model.size()):
301  par = model[k]
302  if par.is_free():
303 
304  # Set name as a combination of model name and parameter
305  # name separated by an underscore. In that way each
306  # parameter has a unique name.
307  name = model.name() + '_' + par.name()
308 
309  # Set "Parameter", "Parameter_error" and "Parameter_Pull"
310  # column names
311  col_par = name
312  col_par_error = name+'_error'
313  col_par_pull = name+'_pull'
314 
315  # Append column names
316  colnames.append(col_par)
317  colnames.append(col_par_error)
318  colnames.append(col_par_pull)
319 
320  # Compute pull for this parameter as the difference
321  # (fitted - true) / error
322  # In case that the error is 0 the pull is set to 99
323  fitted_value = par.value()
324  real_value = self.obs().models()[i][k].value()
325  error = par.error()
326  if error != 0.0:
327  pull = (fitted_value - real_value) / error
328  else:
329  pull = 99.0
330 
331  # Store results in dictionary
332  values[col_par] = fitted_value
333  values[col_par_error] = error
334  values[col_par_pull] = pull
335 
336  # Write results into logger
337  value = '%.4f (%e +/- %e)' % (pull, fitted_value, error)
338  self._log_value(gammalib.NORMAL, name, value)
339 
340  # Bundle together results in a dictionary
341  result = {'colnames': colnames, 'values': values}
342 
343  # Return
344  return result
345 
346  def _create_fits(self, results):
347  """
348  Create FITS file from results
349 
350  Parameters
351  ----------
352  results : list of dict
353  List of result dictionaries
354  """
355  # Gather headers for parameter columns
356  headers = []
357  for colname in results[0]['colnames']:
358  if colname != 'LogL' and colname != 'Sim_Events' and \
359  colname != 'Npred_Events':
360  headers.append(colname)
361 
362  # Create FITS table columns
363  nrows = len(results)
364  logl = gammalib.GFitsTableDoubleCol('LOGL', nrows)
365  nevents = gammalib.GFitsTableDoubleCol('NEVENTS', nrows)
366  npred = gammalib.GFitsTableDoubleCol('NPRED', nrows)
367  logl.unit('')
368  nevents.unit('counts')
369  npred.unit('counts')
370  columns = []
371  for header in headers:
372  name = gammalib.toupper(header)
373  column = gammalib.GFitsTableDoubleCol(name, nrows)
374  column.unit('')
375  columns.append(column)
376 
377  # Fill FITS table columns
378  for i, result in enumerate(results):
379  logl[i] = result['values']['LogL']
380  nevents[i] = result['values']['Sim_Events']
381  npred[i] = result['values']['Npred_Events']
382  for k, column in enumerate(columns):
383  column[i] = result['values'][headers[k]]
384 
385  # Initialise FITS Table with extension "PULL_DISTRIBUTION"
386  table = gammalib.GFitsBinTable(nrows)
387  table.extname('PULL_DISTRIBUTION')
388 
389  # Add keywords for compatibility with gammalib.GMWLSpectrum
390  table.card('INSTRUME', 'CTA', 'Name of Instrument')
391  table.card('TELESCOP', 'CTA', 'Name of Telescope')
392 
393  # Stamp header
394  self._stamp(table)
395 
396  # Set optional keyword values
397  if gammalib.toupper(self['onsrc'].string()) != 'NONE':
398  onrad = self['onrad'].real()
399  else:
400  onrad = 0.0
401  if self['enumbins'].integer() > 0:
402  npix = self['npix'].integer()
403  binsz = self['binsz'].real()
404  coordsys = self['coordsys'].string()
405  proj = self['proj'].string()
406  else:
407  npix = 0
408  binsz = 0.0
409  coordsys = ''
410  proj = ''
411 
412  # Add script keywords
413  table.card('NPULLS', self['ntrials'].integer(), 'Number of pulls')
414  table.card('SEED', self['seed'].integer(), 'Seed value for pulls')
415  table.card('ONSRC', self['onsrc'].string(), 'Name of On surce for On/Off analysis')
416  table.card('ONRAD', onrad, '[deg] Radius of On region')
417  table.card('ENUMBINS', self['enumbins'].integer(), 'Number of energy bins')
418  table.card('NPIX', npix, 'Number of pixels for binned analysis')
419  table.card('BINSZ', binsz, 'Pixel size for binned analysis')
420  table.card('COORDSYS', coordsys, 'Coordinate system for binned analysis')
421  table.card('PROJ', proj, 'Projection for binned analysis')
422  table.card('STAT', self['statistic'].string(), 'Optimization statistic')
423  table.card('EDISP', self['edisp'].boolean(), 'Use energy dispersion?')
424  table.card('PROFILE', self['profile'].boolean(), 'Use likelihood profile method for errors?')
425 
426  # Append filled columns to fits table
427  table.append(logl)
428  table.append(nevents)
429  table.append(npred)
430  for column in columns:
431  table.append(column)
432 
433  # Create the FITS file now
434  self._fits = gammalib.GFits()
435  self._fits.append(table)
436 
437  # Return
438  return
439 
440 
441  # Public methods
442  def process(self):
443  """
444  Process the script
445  """
446  # Get parameters
447  self._get_parameters()
448 
449  # Write observation into logger
450  self._log_observations(gammalib.NORMAL, self.obs(), 'Input observation')
451 
452  # Write header
453  self._log_header1(gammalib.TERSE, 'Generate pull distribution')
454 
455  # Get number of trials
456  ntrials = self['ntrials'].integer()
457 
458  # Get seed value
459  seed = self['seed'].integer()
460 
461  # Initialise results
462  results = []
463 
464  # If more than a single thread is requested then use multiprocessing
465  if self._nthreads > 1:
466  args = [(self, '_trial', i + seed) for i in range(ntrials)]
467  poolresults = mputils.process(self._nthreads, mputils.mpfunc, args)
468 
469  # Continue with regular processing
470  for i in range(ntrials):
471 
472  # If multiprocessing was used then recover results and put them
473  # into the log file
474  if self._nthreads > 1:
475  results.append(poolresults[i][0])
476  self._log_string(gammalib.TERSE, poolresults[i][1]['log'], False)
477 
478  # ... otherwise make a trial now
479  else:
480 
481  # Run trial
482  result = self._trial(i + seed)
483 
484  # Append results
485  results.append(result)
486 
487  # Create FITS file
488  self._create_fits(results)
489 
490  # Return
491  return
492 
493  def models(self, models):
494  """
495  Set model
496 
497  Parameters
498  ----------
499  models : `~gammalib.GModels`
500  Set model container
501  """
502  # Set model container
503  self.obs().models(models)
504 
505  # Return
506  return
507 
508  def save(self):
509  """
510  Save pull FITS file
511  """
512  # Write header
513  self._log_header1(gammalib.TERSE, 'Save pull distribution')
514 
515  # Continue only if FITS file is valid
516  if self._fits != None:
517 
518  # Get outmap parameter
519  outfile = self['outfile'].filename()
520 
521  # Log file name
522  self._log_value(gammalib.NORMAL, 'Pull distribution file', outfile.url())
523 
524  # Save pull distribution
525  self._fits.saveto(outfile, self['clobber'].boolean())
526 
527  # Return
528  return
529 
530  def pull_distribution(self):
531  """
532  Return pull distribution FITS file
533 
534  Returns:
535  FITS file containing pull distribution
536  """
537  # Return
538  return self._fits
539 
540 
541 # ======================== #
542 # Main routine entry point #
543 # ======================== #
544 if __name__ == '__main__':
545 
546  # Create instance of application
547  app = cspull(sys.argv)
548 
549  # Execute application
550  app.execute()