casacore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BiweightStatistics.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 
26 #ifndef SCIMATH_BIWEIGHTSTATISTICS_H
27 #define SCIMATH_BIWEIGHTSTATISTICS_H
28 
29 #include <casacore/casa/aips.h>
30 
32 
33 #include <set>
34 #include <vector>
35 #include <utility>
36 
37 namespace casacore {
38 
39 // The biweight algorithm is a robust iterative algorithm that computes two
40 // quantities called the "location" and the "scale", which are analogous to
41 // the mean and the standard deviation. Important equations are
42 //
43 // A. How to compute u_i values, which are related to the weights,
44 // w_i = (1 - u_i*u_i) if abs(u_i) < 1, 0 otherwise, using the equation
45 //
46 // u_i = (x_i - c_bi)/(c*s_bi) (1)
47 //
48 // where x_i are the data values, c_bi is the biweight location, c is a
49 // configurable constant, and s_bi is the biweight scale. For the initial
50 // computation of the u_i values, c_bi is set equal to the median of the
51 // distribution and s_bi is set equal to the normalized median of the
52 // absolute deviation about the median (that is the median of the absolute
53 // deviation about the median multiplied by the value of the probit function
54 // at 0.75).
55 // B The location, c_bi, is computed from
56 //
57 // c_bi = sum(x_i * w_i^2)/sum(w_i^2) (2)
58 //
59 // where only values of u_i which satisfy abs(u_i) < 1 (w_i > 0) are used in
60 // the sums.
61 // C. The scale value is computed using
62 //
63 // n * sum((x_i - c_bi)^2 * w_i^4)
64 // s_bi^2 = _______________________________ (3)
65 // p * max(1, p - 1)
66 //
67 // where n is the number of points for the entire distribution (which includes
68 // all the data for which abs(u_i) >= 1) and p is given by
69 //
70 // p = abs(sum((w_i) * (w_i - 4*u_i^2)))
71 //
72 // Again, the sums include only data for which abs(u_i) < 1.
73 //
74 // The algorithm proceeds as follows.
75 // 1. Compute initial u_i values from equation (1), setting c_bi equal to the
76 // median of the distribution and s_bi equal to the normalized median of the
77 // absolute deviation about the median.
78 // 2. Compute the initial value of the scale using the u_i values computed in
79 // step 1. using equation 3.
80 // 3. Recompute u_i values using the most recent previous scale and location
81 // values.
82 // 4. Compute the location using the u_i values from step 3 and equation (2).
83 // 5. Recompute u_i values using the most recent previous scale and location
84 // values.
85 // 6. Compute the new scale value using the the u_i values computed in step 5
86 // and the value of the location computed in step 4.
87 // 7. Steps 3. - 6. are repeated until convergence occurs or the maximum number
88 // of iterations (a configurable parameter) is reached. The convergence
89 // criterion is given by
90 //
91 // abs(1 - s_bi/s_bi,prev) < 0.03 * sqrt(0.5/(n - 1))
92 //
93 // where s_bi,prev is the value of the scale computed in the previous
94 // iteration.
95 //
96 // SPECIAL CASE TO FACILITATE SPEED
97 //
98 // In the special case where maxNiter is specified to be negative, the algorithm
99 // proceeds as follows
100 // 1. Compute u_i values using the median for the location and the normalized
101 // median of the absolute deviation about the median as the scale
102 // 2. Compute the location and scale (which can be carried out simultaneously)
103 // using the u_i values computed in step 1. The value of the location is just
104 // the median that is used in equation (3) to compute the scale
105 //
106 // IMPORTANT NOTE REGARDING USER SPECIFIED WEIGHTS
107 //
108 // Although user-specified weights can be supplied, they are effectively ignored
109 // by this algorithm, except for data which have weights of zero, which are
110 // ignored.
111 
112 // This is a derived class of ClassicalStatistics, rather than
113 // ConstrainedRangeStatistics, because if behaves differently from
114 // ConstrainedRangeStatistics and does not need to use any methods in that
115 // class, so making it a specialization of the higher level ClassicalStatistics
116 // seems the better choice.
117 template <
118  class AccumType, class DataIterator, class MaskIterator=const Bool*,
119  class WeightsIterator=DataIterator
120 >
122  : public ClassicalStatistics<CASA_STATP> {
123 public:
124 
125  BiweightStatistics(Int maxNiter=3, Double c=6.0);
126 
127  // copy semantics
129 
130  virtual ~BiweightStatistics();
131 
132  // copy semantics
134  const BiweightStatistics<CASA_STATP>& other
135  );
136 
137  virtual StatisticsData::ALGORITHM algorithm() const;
138 
139  // Clone this instance
140  virtual StatisticsAlgorithm<CASA_STATP>* clone() const;
141 
142  // <group>
143  // these statistics are not supported. The methods, which override
144  // the virtual ancestor versions, throw exceptions.
145  virtual AccumType getMedian(
146  CountedPtr<uInt64> knownNpts=nullptr,
147  CountedPtr<AccumType> knownMin=nullptr,
148  CountedPtr<AccumType> knownMax=nullptr,
149  uInt binningThreshholdSizeBytes=4096*4096,
150  Bool persistSortedArray=False, uInt nBins=10000
151  );
152 
153  virtual AccumType getMedianAndQuantiles(
154  std::map<Double, AccumType>& quantileToValue,
155  const std::set<Double>& quantiles, CountedPtr<uInt64> knownNpts=nullptr,
156  CountedPtr<AccumType> knownMin=nullptr,
157  CountedPtr<AccumType> knownMax=nullptr,
158  uInt binningThreshholdSizeBytes=4096*4096,
159  Bool persistSortedArray=False, uInt nBins=10000
160  );
161 
162  virtual AccumType getMedianAbsDevMed(
163  CountedPtr<uInt64> knownNpts=nullptr,
164  CountedPtr<AccumType> knownMin=nullptr,
165  CountedPtr<AccumType> knownMax=nullptr,
166  uInt binningThreshholdSizeBytes=4096*4096,
167  Bool persistSortedArray=False, uInt nBins=10000
168  );
169 
170  virtual std::map<Double, AccumType> getQuantiles(
171  const std::set<Double>& quantiles, CountedPtr<uInt64> npts=nullptr,
173  uInt binningThreshholdSizeBytes=4096*4096,
174  Bool persistSortedArray=False, uInt nBins=10000
175  );
176 
177  virtual std::pair<Int64, Int64> getStatisticIndex(
179  );
180  // </group>
181 
182  // returns the number of iterations performed to
183  // compute the current location and scale values
184  Int getNiter() const;
185 
186  // reset object to initial state. Clears all private fields including data,
187  // accumulators, etc.
188  virtual void reset();
189 
190  // If c is True, an exception is thrown; this algorithm does not support
191  // computing stats as data are added.
192  virtual void setCalculateAsAdded(Bool c);
193 
194  // Provide guidance to algorithms by specifying a priori which statistics
195  // the caller would like calculated. This algorithm always needs to compute
196  // the location (MEAN) and the scale (STDDEV) so these statistics are always
197  // added to the input set, which is why this method overrides the base class
198  // version.
199  virtual void setStatsToCalculate(std::set<StatisticsData::STATS>& stats);
200 
201 protected:
202 
203  void _computeStats();
204 
206 
207 private:
208  Double _c{0};
210  AccumType _location{0}, _scale{0};
211  std::pair<AccumType, AccumType> _range{};
212  // _npts is the number of points computed using ClassicalStatistics
214 
215  // because the compiler gets confused if these aren't explicitly typed
216  static const AccumType FOUR;
217  static const AccumType FIVE;
218 
220  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4,
221  AccumType& ww_4u2, DataIterator dataIter, MaskIterator maskIter,
222  WeightsIterator weightsIter, uInt64 dataCount,
223  const typename StatisticsDataset<CASA_STATP>::ChunkData& chunk
224  );
225 
227  AccumType& sxw2, AccumType& sw2, DataIterator dataIter,
228  MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount,
229  const typename StatisticsDataset<CASA_STATP>::ChunkData& chunk
230  );
231 
232  void _computeScaleSums(
233  AccumType& sx_M2w4, AccumType& ww_4u2, DataIterator dataIter,
234  MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount,
235  const typename StatisticsDataset<CASA_STATP>::ChunkData& chunk
236  ) const;
237 
238  void _doLocationAndScale();
239 
240  void _doLocation();
241 
242  void _doScale();
243 
244  // <group>
245  // sxw2 = sum(x_i*(1 - u_i^2)^2)
246  // sw2 = sum((1-u_i^2)^2)
247  // sx_M2w4 = sum((x_i - _location)^2 * (1 - u_i^2)^4) = sum((x_i - _location)^2 * w_i^4)
248  // ww_4u2 = sum((1 - u_i^2) * (1 - 5*u_i^2)) = sum(w_i * (w_i - 4*u_i^2))
250  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4, AccumType& ww_4u2,
251  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
252  ) const;
253 
255  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4, AccumType& ww_4u2,
256  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
257  const DataRanges& ranges, Bool isInclude
258  ) const;
259 
261  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4, AccumType& ww_4u2,
262  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
263  const MaskIterator& maskBegin, uInt maskStride
264  ) const;
265 
267  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4, AccumType& ww_4u2,
268  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
269  const MaskIterator& maskBegin, uInt maskStride,
270  const DataRanges& ranges, Bool isInclude
271  ) const;
272 
274  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4, AccumType& ww_4u2,
275  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
276  uInt64 nr, uInt dataStride
277  ) const;
278 
280  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4, AccumType& ww_4u2,
281  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
282  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
283  ) const;
284 
286  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4, AccumType& ww_4u2,
287  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
288  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
289  uInt maskStride, const DataRanges& ranges, Bool isInclude
290  ) const;
291 
293  AccumType& sxw2, AccumType& sw2, AccumType& sx_M2w4, AccumType& ww_4u2,
294  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
295  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
296  uInt maskStride
297  ) const;
298  // </group>
299 
300  // <group>
301  // sxw2 = sum(x_i*(1 - u_i^2)^2)
302  // sw2 = sum((1-u_i^2)^2)
303  void _locationSums(
304  AccumType& sxw2, AccumType& sw2, const DataIterator& dataBegin,
305  uInt64 nr, uInt dataStride
306  ) const;
307 
308  void _locationSums(
309  AccumType& sxw2, AccumType& sw2, const DataIterator& dataBegin,
310  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
311  ) const;
312 
313  void _locationSums(
314  AccumType& sxw2, AccumType& sw2, const DataIterator& dataBegin,
315  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
316  uInt maskStride
317  ) const;
318 
319  void _locationSums(
320  AccumType& sxw2, AccumType& sw2, const DataIterator& dataBegin,
321  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
322  uInt maskStride, const DataRanges& ranges, Bool isInclude
323  ) const;
324 
325  void _locationSums(
326  AccumType& sxw2, AccumType& sw2, const DataIterator& dataBegin,
327  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
328  ) const;
329 
330  void _locationSums(
331  AccumType& sxw2, AccumType& sw2, const DataIterator& dataBegin,
332  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
333  const DataRanges& ranges, Bool isInclude
334  ) const;
335 
336  void _locationSums(
337  AccumType& sxw2, AccumType& sw2, const DataIterator& dataBegin,
338  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
339  const MaskIterator& maskBegin, uInt maskStride,
340  const DataRanges& ranges, Bool isInclude
341  ) const;
342 
343  void _locationSums(
344  AccumType& sxw2, AccumType& sw2, const DataIterator& dataBegin,
345  const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
346  const MaskIterator& maskBegin, uInt maskStride
347  ) const;
348  // </group>
349 
350  // <group>
351  // sx_M2w4 = sum((x_i - _location)^2 * (1 - u_i^2)^4) = sum((x_i - _location)^2 * w_i^4)
352  // ww_4u2 = sum((1 - u_i^2) * (1 - 5*u_i^2)) = sum(w_i * (w_i - 4*u_i^2))
353  void _scaleSums(
354  AccumType& sx_M2w4, AccumType& ww_4u2, const DataIterator& dataBegin,
355  uInt64 nr, uInt dataStride
356  ) const;
357 
358  void _scaleSums(
359  AccumType& sx_M2w4, AccumType& ww_4u2, const DataIterator& dataBegin,
360  uInt64 nr, uInt dataStride, const DataRanges& ranges,
361  Bool isInclude
362  ) const;
363 
364  void _scaleSums(
365  AccumType& sx_M2w4, AccumType& ww_4u2, const DataIterator& dataBegin,
366  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
367  uInt maskStride
368  ) const;
369 
370  void _scaleSums(
371  AccumType& sx_M2w4, AccumType& ww_4u2, const DataIterator& dataBegin,
372  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
373  uInt maskStride, const DataRanges& ranges, Bool isInclude
374  ) const;
375 
376  void _scaleSums(
377  AccumType& sx_M2w4, AccumType& ww_4u2, const DataIterator& dataBegin,
378  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
379  ) const;
380 
381  void _scaleSums(
382  AccumType& sx_M2w4, AccumType& ww_4u2, const DataIterator& dataBegin,
383  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
384  const DataRanges& ranges, Bool isInclude
385  ) const;
386 
387  void _scaleSums(
388  AccumType& sx_M2w4, AccumType& ww_4u2, const DataIterator& dataBegin,
389  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
390  const MaskIterator& maskBegin, uInt maskStride,
391  const DataRanges& ranges, Bool isInclude
392  ) const;
393 
394  void _scaleSums(
395  AccumType& sx_M2w4, AccumType& ww_4u2, const DataIterator& dataBegin,
396  const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
397  const MaskIterator& maskBegin, uInt maskStride
398  ) const;
399  // </group>
400 
401 };
402 
403 }
404 
405 #ifndef CASACORE_NO_AUTO_TEMPLATES
406 #include <casacore/scimath/StatsFramework/BiweightStatistics.tcc>
407 #endif
408 
409 #endif
void _computeLocationAndScaleSums(AccumType &sxw2, AccumType &sw2, AccumType &sx_M2w4, AccumType &ww_4u2, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const typename StatisticsDataset< CASA_STATP >::ChunkData &chunk)
int Int
Definition: aipstype.h:50
void _locationAndScaleSums(AccumType &sxw2, AccumType &sw2, AccumType &sx_M2w4, AccumType &ww_4u2, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
sxw2 = sum(x_i*(1 - u_i^2)^2) sw2 = sum((1-u_i^2)^2) sx_M2w4 = sum((x_i - _location)^2 * (1 - u_i^2)^...
Representation of a statistics dataset used in statistics framework calculatations.
void _computeScaleSums(AccumType &sx_M2w4, AccumType &ww_4u2, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const typename StatisticsDataset< CASA_STATP >::ChunkData &chunk) const
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantileToValue, const std::set< Double > &quantiles, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
virtual AccumType getMedian(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
these statistics are not supported.
LatticeExprNode max(const LatticeExprNode &left, const LatticeExprNode &right)
BiweightStatistics(Int maxNiter=3, Double c=6.0)
unsigned long long uInt64
Definition: aipsxtype.h:39
void _computeLocationSums(AccumType &sxw2, AccumType &sw2, DataIterator dataIter, MaskIterator maskIter, WeightsIterator weightsIter, uInt64 dataCount, const typename StatisticsDataset< CASA_STATP >::ChunkData &chunk)
virtual StatsData< AccumType > _getStatistics()
Int getNiter() const
returns the number of iterations performed to compute the current location and scale values ...
BiweightStatistics< CASA_STATP > & operator=(const BiweightStatistics< CASA_STATP > &other)
copy semantics
virtual void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Provide guidance to algorithms by specifying a priori which statistics the caller would like calculat...
uInt64 _npts
_npts is the number of points computed using ClassicalStatistics
Class to calculate statistics in a &quot;classical&quot; sense, ie using accumulators with no special filtering...
std::pair< AccumType, AccumType > _range
ALGORITHM
implemented algorithms
void _locationSums(AccumType &sxw2, AccumType &sw2, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
sxw2 = sum(x_i*(1 - u_i^2)^2) sw2 = sum((1-u_i^2)^2)
virtual void setCalculateAsAdded(Bool c)
If c is True, an exception is thrown; this algorithm does not support computing stats as data are add...
virtual std::pair< Int64, Int64 > getStatisticIndex(StatisticsData::STATS stat)
see base class description
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &quantiles, CountedPtr< uInt64 > npts=nullptr, CountedPtr< AccumType > min=nullptr, CountedPtr< AccumType > max=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
Referenced counted pointer for constant data.
Definition: CountedPtr.h:80
LatticeExprNode min(const LatticeExprNode &left, const LatticeExprNode &right)
static const AccumType FIVE
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
double Double
Definition: aipstype.h:55
virtual StatisticsAlgorithm< CASA_STATP > * clone() const
Clone this instance.
void _scaleSums(AccumType &sx_M2w4, AccumType &ww_4u2, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
sx_M2w4 = sum((x_i - _location)^2 * (1 - u_i^2)^4) = sum((x_i - _location)^2 * w_i^4) ww_4u2 = sum((1...
#define DataRanges
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
const Bool False
Definition: aipstype.h:44
The biweight algorithm is a robust iterative algorithm that computes two quantities called the &quot;locat...
virtual AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
const Double c
Fundamental physical constants (SI units):
static const AccumType FOUR
because the compiler gets confused if these aren&#39;t explicitly typed
virtual void reset()
reset object to initial state.
unsigned int uInt
Definition: aipstype.h:51