casacore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SparseDiff.h
Go to the documentation of this file.
1 //# SparseDiff.h: An automatic differentiating class for functions
2 //# Copyright (C) 2007,2008
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //#
27 //# $Id: SparseDiff.h,v 1.3 2008/01/10 12:00:42 wbrouw Exp $
28 
29 #ifndef SCIMATH_SPARSEDIFF_H
30 #define SCIMATH_SPARSEDIFF_H
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
36 #include <casacore/casa/vector.h>
37 #include <utility>
38 
39 // Using
40 using std::pair;
41 
42 namespace casacore { //# NAMESPACE CASACORE - BEGIN
43 
44  //# Forward declarations
45  template <class T> class SparseDiff;
46 
47  // <summary>
48  // Class that computes partial derivatives by automatic differentiation.
49  // </summary>
50  //
51  // <use visibility=export>
52  //
53  // <reviewed reviewer="UNKNOWN" date="" tests="tSparseDiff.cc" demos="dSparseDiff.cc">
54  // </reviewed>
55  //
56  // <prerequisite>
57  // <li> <linkto class=AutoDiff>AutoDiff</linkto> class
58  // </prerequisite>
59  //
60  // <etymology>
61  // Class that computes partial derivatives for some parameters by automatic
62  // differentiation, thus SparseDiff.
63  // </etymology>
64  //
65  // <synopsis>
66  // Class that computes partial derivatives by automatic differentiation.
67  // It does this by storing the value of a function and the values of its first
68  // derivatives with respect to some of its independent parameters.
69  // When a mathematical
70  // operation is applied to a SparseDiff object, the derivative values of the
71  // resulting new object are computed according to the chain rules
72  // of differentiation. SparseDiff operates like the
73  // <linkto class=AutoDiff>AutoDiff</linkto> class, but only determines the
74  // derivatives with respect to the actual dependent variables.
75  //
76  // Suppose we have a function f(x0,x1,...,xn) and its differential is
77  // <srcblock>
78  // df = (df/dx0)*dx0 + (df/dx1)*dx1 + ... + (df/dxn)*dxn
79  // </srcblock>
80  // We can build a class that has the value of the function,
81  // f(x0,x1,...,xn), and the values of the derivatives, (df/dx0), (df/dx1),
82  // ..., (df/dxn) at (x0,x1,...,xn), as class members.
83  //
84  // Now if we have another function, g(x0,x1,...,xn) and its differential is
85  // dg = (dg/dx0)*dx0 + (dg/dx1)*dx1 + ... + (dg/dxn)*dxn,
86  // since
87  // <srcblock>
88  // d(f+g) = df + dg,
89  // d(f*g) = g*df + f*dg,
90  // d(f/g) = df/g - fdg/g^2,
91  // dsin(f) = cos(f)df,
92  // ...,
93  // </srcblock>
94  // we can calculate
95  // <srcblock>
96  // d(f+g), d(f*g), ...,
97  // </srcblock> based on our information on
98  // <srcblock>
99  // df/dx0, df/dx1, ..., dg/dx0, dg/dx1, ..., dg/dxn.
100  // </srcblock>
101  // All we need to do is to define the operators and derivatives of common
102  // mathematical functions.
103  //
104  // To be able to use the class as an automatic differentiator of a function
105  // object, we need a templated function object, i.e. an object with:
106  // <ul>
107  // <li> a <src> template <class T> T operator()(const T)</src>
108  // <li> or multiple variable input like:
109  // <src> template <class T> T operator()(const Vector<T> &)</src>
110  // <li> all dependent variables used in the calculation of the function
111  // value should have been typed with T.
112  // </ul>
113  // A simple example of such a function object could be:
114  // <srcblock>
115  // template <class T> f {
116  // public:
117  // T operator()(const T &x, const T &a, const T &b) {
118  // return a*b*x; }
119  // };
120  // // Instantiate the following versions:
121  // template class f<Double>;
122  // template class f<SparseDiff<Double> >;
123  // </srcblock>
124  // A call with values will produce the function value:
125  // <srcblock>
126  // cout << f(7.0, 2.0, 3.0) << endl;
127  // // will produce the value at x=7 for a=2; b=3:
128  // 42
129  // // But a call indicating that we want derivatives to a and b:
130  // cout << f(SparseDiff<Double>(7.0), SparseDiff<Double>(2.0, 0),
131  // SparseDiff<Double>(3.0, 1)) << endl;
132  // // will produce the value at x=7 for a=2; b=3:
133  // // and the partial derivatives wrt a and b at x=7:
134  // (42, [21, 14])
135  // // The following will calculate the derivate wrt x:
136  // cout << f(SparseDiff<Double>(7.0, 0), SparseDiff<Double>(2.0),
137  // SparseDiff<Double>(3.0)) << endl;
138  // (42,[6])
139  // </srcblock>
140  // Note that in practice constants may be given as Double constants.
141  // In actual practice, there are a few rules to obey for the structure of
142  // the function object if you want to use the function object and its
143  // derivatives in least squares fitting procedures in the Fitting
144  // module. The major one is to view the function object having 'fixed' and
145  // 'variable' parameters. I.e., rather than viewing the function as
146  // depending on parameters <em>a, b, x</em> (<src>f(a,b,x)</src>), the
147  // function is considered to be <src>f(x; a,b)</src>, where <em>a, b</em>
148  // are 'fixed' parameters, and <em>x</em> a variable parameter.
149  // Fixed parameters should be contained in a
150  // <linkto class=FunctionParam>FunctionParam</linkto> container object;
151  // while the variable parameter(s) are given in the function
152  // <src>operator()</src>. See <linkto class=Function>Function</linkto> class
153  // for details.
154  //
155  // A Gaussian spectral profile would in general have the center frequency,
156  // the width and the amplitude as fixed parameters, and the frequency as
157  // a variable. Given a spectrum, you would solve for the fixed parameters,
158  // given spectrum values. However, in other cases the role of the
159  // parameters could be reversed. An example could be a whole stack of
160  // observed (in the laboratory) spectra at different temperatures at
161  // one frequency. In that case the width would be the variable parameter,
162  // and the frequency one of the fixed (and to be solved for)parameters.
163  //
164  // Since the calculation of the derivatives is done with simple overloading,
165  // the calculation of second (and higher) derivatives is easy. It should be
166  // noted that higher deivatives are inefficient in the current incarnation
167  // (there is no knowledge e.g. about symmetry in the Jacobian). However,
168  // it is a very good way to get the correct answers of the derivatives. In
169  // practice actual production code will be better off with specialization
170  // of the <src>f<SparseDiff<> ></src> implementation.
171  //
172  // The <src>SparseDiff</src> class is the class the user communicates with.
173  // Alias classes (<linkto class=SparseDiffA>SparseDiffA</linkto> and
174  // <linkto class=SparseDiffA>SparseDiffX</linkto>) exist
175  // to make it possible to have different incarnations of a templated
176  // method (e.g. a generic one and a specialized one). See the
177  // <src>dSparseDiff</src> demo for an example of its use.
178  //
179  // All operators and functions are declared in <linkto file=SparseDiffMath.h>
180  // SparseDiffMath</linkto>. The output operator in
181  // <linkto file=SparseDiffIO.h>SparseDiffIO</linkto>.
182  // The actual structure of the
183  // data block used by <src>SparseDiff</src> is described in
184  // <linkto class=SparseDiffRep>SparseDiffRep</linkto>.
185  //
186  // A SparseDiff can be constructed from an AutoDiff.
187  // <em>toAutoDiff(n)</em> can convert it to an AutoDiff.
188  // </synopsis>
189  //
190  // <example>
191  // <srcblock>
192  // // First a simple example.
193  // // We have a function of the form f(x,y,z); and want to know the
194  // // value of the function for x=10; y=20; z=30; and for
195  // // the derivatives at those points.
196  // // Specify the values; and indicate the parameter dependence:
197  // SparseDiff<Double> x(10.0, 0);
198  // SparseDiff<Double> y(20.0, 1);
199  // SparseDiff<Double> z(30.0, 2);
200  // // The result will be:
201  // SparseDiff<Double> result = x*y + sin(z);
202  // cout << result.value() << endl;
203  // // 199.012
204  // cout << result.derivatives() << endl;
205  // // [20, 10, 0.154251]
206  // // Note: sin(30) = -0.988; cos(30) = 0.154251;
207  // </srcblock>
208  //
209  // See for an extensive example the demo program dSparseDiff. It is
210  // based on the example given above, and shows also the use of second
211  // derivatives (which is just using <src>SparseDiff<SparseDiff<Double> ></src>
212  // as template argument).
213  // <srcblock>
214  // // The function, with fixed parameters a,b:
215  // template <class T> class f {
216  // public:
217  // T operator()(const T& x) { return a_p*a_p*a_p*b_p*b_p*x; }
218  // void set(const T& a, const T& b) { a_p = a; b_p = b; }
219  // private:
220  // T a_p;
221  // T b_p;
222  // };
223  // // Call it with different template arguments:
224  // Double a0(2), b0(3), x0(7);
225  // f<Double> f0; f0.set(a0, b0);
226  // cout << "Value: " << f0(x0) << endl;
227  //
228  // SparseDiff<Double> a1(2,0), b1(3,1), x1(7);
229  // f<SparseDiff<Double> > f1; f1.set(a1, b1);
230  // cout << "Diff a,b: " << f1(x1) << endl;
231  //
232  // SparseDiff<Double> a2(2), b2(3), x2(7,0);
233  // f<SparseDiff<Double> > f2; f2.set(a2, b2);
234  // cout << "Diff x: " << f2(x2) << endl;
235  //
236  // SparseDiff<SparseDiff<Double> > a3(SparseDiff<Double>(2,0),0),
237  // b3(SparseDiff<Double>(3,1),1), x3(SparseDiff<Double>(7));
238  // f<SparseDiff<SparseDiff<Double> > > f3; f3.set(a3, b3);
239  // cout << "Diff2 a,b: " << f3(x3) << endl;
240  //
241  // SparseDiff<SparseDiff<Double> > a4(SparseDiff<Double>(2)),
242  // b4(SparseDiff<Double>(3)),
243  // x4(SparseDiff<Double>(7,0),0);
244  // f<SparseDiff<SparseDiff<Double> > > f4; f4.set(a4, b4);
245  // cout << "Diff2 x: " << f4(x4) << endl;
246  //
247  // // Result will be:
248  // // Value: 504
249  // // Diff a,b: (504, [756, 336])
250  // // Diff x: (504, [72])
251  // // Diff2 a,b: ((504, [756, 336]), [(756, [756, 504]), (336, [504, 112])])
252  // // Diff2 x: ((504, [72]), [(72, [0])])
253  //
254  // // It needed the template instantiations definitions:
255  // template class f<Double>;
256  // template class f<SparseDiff<Double> >;
257  // template class f<SparseDiff<SparseDiff<Double> > >;
258  // </srcblock>
259  // </example>
260  //
261  // <motivation>
262  // The creation of the class was motivated by least-squares non-linear fits
263  // in cases where each individual condition equation depends only on a
264  // fraction of the fixed parameters (e.g. self-calibration where only pairs
265  // of antennas are present per equation), and hence only a few
266  // partial derivatives of a fitted function are needed. It would be tedious
267  // to create functionals for all partial derivatives of a function.
268  // </motivation>
269  //
270  // <templating arg=T>
271  // <li> any class that has the standard mathematical and comparison
272  // operators and functions defined.
273  // </templating>
274  //
275  // <todo asof="2007/11/27">
276  // <li> Nothing I know of.
277  // </todo>
278 
279  template <class T> class SparseDiff {
280  public:
281  //# Typedefs
282  typedef T value_type;
284  typedef const value_type& const_reference;
286  typedef const value_type* const_iterator;
287 
288  //# Constructors
289  // Construct a constant with a value of zero. Zero derivatives.
290  SparseDiff();
291 
292  // Construct a constant with a value of v. Zero derivatives.
293  SparseDiff(const T &v);
294 
295  // A function f(x0,x1,...,xn,...) with a value of v. The
296  // nth derivative is one, and all other derivatives are zero.
297  SparseDiff(const T &v, const uInt n);
298 
299  // A function f(x0,x1,...,xn,...) with a value of v. The
300  // nth derivative is der, and all other derivatives are zero.
301  SparseDiff(const T &v, const uInt n, const T &der);
302 
303  // Construct from an AutoDiff
304  SparseDiff(const AutoDiff<T> &other);
305 
306  // Construct one from another (deep copy)
307  SparseDiff(const SparseDiff<T> &other);
308 
309  // Destructor
310  ~SparseDiff();
311 
312  // Assignment operator. Assign a constant to variable.
313  SparseDiff<T> &operator=(const T &v);
314 
315  // Assignment operator. Add a gradient to variable.
316  SparseDiff<T> &operator=(const pair<uInt, T> &der);
317 
318  // Assignment operator. Assign gradients to variable.
319  SparseDiff<T> &operator=(const vector<pair<uInt, T> > &der);
320 
321  // Assign from an Autodiff
322  SparseDiff<T> &operator=(const AutoDiff<T> &other);
323 
324  // Assign one to another (deep copy)
325  SparseDiff<T> &operator=(const SparseDiff<T> &other);
326 
327  // Assignment operators
328  // <group>
329  void operator*=(const SparseDiff<T> &other);
330  void operator/=(const SparseDiff<T> &other);
331  void operator+=(const SparseDiff<T> &other);
332  void operator-=(const SparseDiff<T> &other);
333  void operator*=(const T other) { rep_p->operator*=(other);
334  value() *= other; }
335  void operator/=(const T other) { rep_p->operator/=(other);
336  value() /= other; }
337  void operator+=(const T other) { value() += other; }
338  void operator-=(const T other) { value() -= other; }
339  // </group>
340 
341  // Convert to an AutoDiff of length <em>n</em>
342  AutoDiff<T> toAutoDiff(uInt n) const;
343 
344  // Returns the pointer to the structure of value and derivatives.
345  // <group>
347  const SparseDiffRep<T> *theRep() const { return rep_p; }
348  // </group>
349 
350  // Returns the value of the function
351  // <group>
352  T &value() { return rep_p->val_p; }
353  const T &value() const { return rep_p->val_p; }
354  // </group>
355 
356  // Returns a vector of the derivatives of a SparseDiff
357  // <group>
358  vector<pair<uInt, T> > &derivatives() const;
359  void derivatives(vector<pair<uInt, T> > &res) const;
360  const vector<pair<uInt, T> > &grad() const { return rep_p->grad_p; }
361  vector<pair<uInt, T> > &grad() { return rep_p->grad_p; }
362  // </group>
363 
364  // Returns a specific derivative. No check for a valid which.
365  // <group>
366  pair<uInt, T> &derivative(uInt which) { return rep_p->grad_p[which]; }
367  const pair<uInt, T> &derivative(uInt which) const {
368  return rep_p->grad_p[which]; }
369  // </group>
370 
371  // Return total number of derivatives
372  uInt nDerivatives() const { return rep_p->grad_p.size(); }
373 
374  // Is it a constant, i.e., with zero derivatives?
375  Bool isConstant() const { return rep_p->grad_p.empty(); }
376 
377  // Sort criterium
378  static Bool ltSort(pair<uInt, T> &lhs, pair<uInt, T> &rhs);
379 
380  // Sort derivative list; cater for doubles and zeroes
381  void sort();
382 
383  private:
384  //# Data
385  // Value representation
387 
388  };
389 
390 
391 } //# NAMESPACE CASACORE - END
392 
393 #ifndef CASACORE_NO_AUTO_TEMPLATES
394 #include <casacore/scimath/Mathematics/SparseDiff.tcc>
395 #endif //# CASACORE_NO_AUTO_TEMPLATES
396 #endif
void operator+=(const SparseDiff< T > &other)
Bool isConstant() const
Is it a constant, i.e., with zero derivatives?
Definition: SparseDiff.h:375
const value_type * const_iterator
Definition: SparseDiff.h:286
Class that computes partial derivatives by automatic differentiation.
Definition: SparseDiff.h:45
const vector< pair< uInt, T > > & grad() const
Definition: SparseDiff.h:360
const value_type & const_reference
Definition: SparseDiff.h:284
vector< pair< uInt, T > > & grad()
Definition: SparseDiff.h:361
const T & value() const
Definition: SparseDiff.h:353
value_type & reference
Definition: SparseDiff.h:283
void operator+=(const T other)
Definition: SparseDiff.h:337
void operator/=(const T other)
Definition: SparseDiff.h:335
vector< pair< uInt, T > > & derivatives() const
Returns a vector of the derivatives of a SparseDiff.
void operator*=(const T other)
Definition: SparseDiff.h:333
value_type * iterator
Definition: SparseDiff.h:285
SparseDiff()
Construct a constant with a value of zero.
AutoDiff< T > toAutoDiff(uInt n) const
Convert to an AutoDiff of length n
T & value()
Returns the value of the function.
Definition: SparseDiff.h:352
Representation of data for the spare automatic differentiation calss.
Definition: SparseDiffRep.h:89
pair< uInt, T > & derivative(uInt which)
Returns a specific derivative.
Definition: SparseDiff.h:366
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
Class that computes partial derivatives by automatic differentiation.
Definition: AutoDiff.h:257
void operator-=(const T other)
Definition: SparseDiff.h:338
const pair< uInt, T > & derivative(uInt which) const
Definition: SparseDiff.h:367
void sort()
Sort derivative list; cater for doubles and zeroes.
const SparseDiffRep< T > * theRep() const
Definition: SparseDiff.h:347
uInt nDerivatives() const
Return total number of derivatives.
Definition: SparseDiff.h:372
SparseDiff< T > & operator=(const T &v)
Assignment operator.
~SparseDiff()
Destructor.
SparseDiffRep< T > * theRep()
Returns the pointer to the structure of value and derivatives.
Definition: SparseDiff.h:346
void operator/=(const SparseDiff< T > &other)
void operator*=(const SparseDiff< T > &other)
Assignment operators.
static Bool ltSort(pair< uInt, T > &lhs, pair< uInt, T > &rhs)
Sort criterium.
void operator-=(const SparseDiff< T > &other)
SparseDiffRep< T > * rep_p
Value representation.
Definition: SparseDiff.h:386
unsigned int uInt
Definition: aipstype.h:51