casacore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ConstrainedRangeStatistics.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 
26 #ifndef SCIMATH_CONSTRAINEDRANGESTATISTICS_H
27 #define SCIMATH_CONSTRAINEDRANGESTATISTICS_H
28 
29 #include <casacore/casa/aips.h>
30 
33 
34 #include <set>
35 #include <vector>
36 #include <utility>
37 
38 namespace casacore {
39 
40 // Abstract base class for statistics algorithms which are characterized by
41 // a range of good values. The range is usually calculated dynamically based
42 // on the entire distribution. The specifics of such calculations are
43 // delegated to derived classes.
44 
45 template <
46  class AccumType, class DataIterator, class MaskIterator=const Bool*,
47  class WeightsIterator=DataIterator
48 >
49 class ConstrainedRangeStatistics : public ClassicalStatistics<CASA_STATP> {
50 public:
51 
52  ConstrainedRangeStatistics() = delete;
53 
55 
56  // <group>
57  // In the following group of methods, if the size of the composite dataset
58  // is smaller than
59  // <src>binningThreshholdSizeBytes</src>, the composite dataset
60  // will be (perhaps partially) sorted and persisted in memory during the
61  // call. In that case, and if <src>persistSortedArray</src> is True, this
62  // sorted array will remain in memory after the call and will be used on
63  // subsequent calls of this method when
64  // <src>binningThreshholdSizeBytes</src> is greater than the size of the
65  // composite dataset. If <src>persistSortedArray</src> is False, the sorted
66  // array will not be stored after this call completes and so any subsequent
67  // calls for which the dataset size is less than
68  // <src>binningThreshholdSizeBytes</src>, the dataset will be sorted from
69  // scratch. Values which are not included due to non-unity strides, are not
70  // included in any specified ranges, are masked, or have associated weights
71  // of zero are not considered as dataset members for quantile computations.
72  // If one has a priori information regarding the number of points (npts)
73  // and/or the minimum and maximum values of the data set, these can be
74  // supplied to improve performance. Note however, that if these values are
75  // not correct, the resulting median and/or quantile values will also not be
76  // correct (although see the following notes regarding max/min). Note that
77  // if this object has already had getStatistics() called, and the min and
78  // max were calculated, there is no need to pass these values in as they
79  // have been stored internally and used (although passing them in shouldn't
80  // hurt anything). If provided, npts, the number of points falling in the
81  // specified ranges which are not masked and have weights > 0, should be
82  // exactly correct. <src>min</src> can be less than the true minimum, and
83  // <src>max</src> can be greater than the True maximum, but for best
84  // performance, these should be as close to the actual min and max as
85  // possible. In order for quantile computations to occur over multiple
86  // datasets, all datasets must be available. This means that if
87  // setCalculateAsAdded() was previously called by passing in a value of
88  // True, these methods will throw an exception as the previous call
89  // indicates that there is no guarantee that all datasets will be available.
90  // If one uses a data provider (by having called setDataProvider()), then
91  // this should not be an issue.
92 
93  // get the median of the distribution.
94  // For a dataset with an odd number of good points, the median is just the
95  // value at index int(N/2) in the equivalent sorted dataset, where N is the
96  // number of points. For a dataset with an even number of points, the median
97  // is the mean of the values at indices int(N/2)-1 and int(N/2) in the
98  // sorted dataset.
99  virtual AccumType getMedian(
100  CountedPtr<uInt64> knownNpts=nullptr,
101  CountedPtr<AccumType> knownMin=nullptr,
102  CountedPtr<AccumType> knownMax=nullptr,
103  uInt binningThreshholdSizeBytes=4096*4096,
104  Bool persistSortedArray=False, uInt nBins=10000
105  );
106 
107  // get the median of the absolute deviation about the median of the data.
108  virtual AccumType getMedianAbsDevMed(
109  CountedPtr<uInt64> knownNpts=nullptr,
110  CountedPtr<AccumType> knownMin=nullptr,
111  CountedPtr<AccumType> knownMax=nullptr,
112  uInt binningThreshholdSizeBytes=4096*4096,
113  Bool persistSortedArray=False, uInt nBins=10000
114  );
115 
116  // If one needs to compute both the median and quantile values, it is better
117  // to call getMedianAndQuantiles() rather than getMedian() and
118  // getQuantiles() separately, as the first will scan large data sets fewer
119  // times than calling the seperate methods. The return value is the median;
120  // the quantiles are returned in the <src>quantileToValue</src> map.
121  virtual AccumType getMedianAndQuantiles(
122  std::map<Double, AccumType>& quantileToValue,
123  const std::set<Double>& quantiles,
124  CountedPtr<uInt64> knownNpts=nullptr,
125  CountedPtr<AccumType> knownMin=nullptr,
126  CountedPtr<AccumType> knownMax=nullptr,
127  uInt binningThreshholdSizeBytes=4096*4096,
128  Bool persistSortedArray=False, uInt nBins=10000
129  );
130 
131  // Get the specified quantiles. <src>quantiles</src> must be between 0 and
132  // 1, noninclusive.
133  virtual std::map<Double, AccumType> getQuantiles(
134  const std::set<Double>& quantiles,
135  CountedPtr<uInt64> knownNpts=nullptr,
136  CountedPtr<AccumType> knownMin=nullptr,
137  CountedPtr<AccumType> knownMax=NULL,
138  uInt binningThreshholdSizeBytes=4096*4096,
139  Bool persistSortedArray=False, uInt nBins=10000
140  );
141  // </group>
142 
143  // get the min and max of the data set
144  virtual void getMinMax(AccumType& mymin, AccumType& mymax);
145 
146  // scan the dataset(s) that have been added, and find the number of good
147  // points. This method may be called even if setStatsToCaclulate has been
148  // called and NPTS has been excluded. If setCalculateAsAdded(True) has
149  // previously been called after this object has been (re)initialized, an
150  // exception will be thrown.
151  virtual uInt64 getNPts();
152 
153  // see base class description
155 
156  // reset object to initial state. Clears all private fields including data,
157  // accumulators, global range.
158  virtual void reset();
159 
160 protected:
161 
162  // Concrete derived classes are responsible for providing an appropriate
163  // QuantileComputer object to the constructor, which is ultimately passed
164  // up the instantiation hierarchy and stored at the StatisticsAlgorithm
165  // level.
168  );
169 
170  // copy semantics
173  );
174 
175  // copy semantics
178  );
179 
180  // <group>
181  // scan through the data set to determine the number of good (unmasked,
182  // weight > 0, within range) points. The first with no mask, no ranges, and
183  // no weights is trivial with npts = nr in this class, but is implemented
184  // here so that derived classes may override it.
185  virtual void _accumNpts(
186  uInt64& npts, const DataIterator& dataStart, uInt64 nr, uInt dataStride
187  ) const;
188 
189  virtual void _accumNpts(
190  uInt64& npts,
191  const DataIterator& dataStart, uInt64 nr, uInt dataStride,
192  const DataRanges& ranges, Bool isInclude
193  ) const;
194 
195  virtual void _accumNpts(
196  uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
197  const MaskIterator& maskBegin, uInt maskStride
198  ) const;
199 
200  virtual void _accumNpts(
201  uInt64& npts,
202  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
203  const MaskIterator& maskBegin, uInt maskStride,
204  const DataRanges& ranges, Bool isInclude
205  ) const;
206 
207  virtual void _accumNpts(
208  uInt64& npts, const DataIterator& dataBegin,
209  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
210  ) const;
211 
212  virtual void _accumNpts(
213  uInt64& npts, const DataIterator& dataBegin,
214  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
215  const DataRanges& ranges, Bool isInclude
216  ) const;
217 
218  virtual void _accumNpts(
219  uInt64& npts, const DataIterator& dataBegin,
220  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
221  const MaskIterator& maskBegin, uInt maskStride,
222  const DataRanges& ranges, Bool isInclude
223  ) const;
224 
225  virtual void _accumNpts(
226  uInt64& npts, const DataIterator& dataBegin,
227  const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
228  const MaskIterator& maskBegin, uInt maskStride
229  ) const;
230  // </group>
231 
232  virtual AccumType _getStatistic(StatisticsData::STATS stat);
233 
235 
236  // <group>
237  virtual void _minMax(
239  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
240  ) const;
241 
242  virtual void _minMax(
244  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
245  const DataRanges& ranges, Bool isInclude
246  ) const;
247 
248  virtual void _minMax(
250  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
251  const MaskIterator& maskBegin, uInt maskStride
252  ) const;
253 
254  virtual void _minMax(
256  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
257  const MaskIterator& maskBegin, uInt maskStride,
258  const DataRanges& ranges, Bool isInclude
259  ) const;
260 
261  virtual void _minMax(
263  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
264  uInt64 nr, uInt dataStride
265  ) const;
266 
267  virtual void _minMax(
269  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
270  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
271  ) const;
272 
273  virtual void _minMax(
275  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
276  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
277  uInt maskStride, const DataRanges& ranges, Bool isInclude
278  ) const;
279 
280  virtual void _minMax(
282  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
283  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
284  uInt maskStride
285  ) const;
286  // </group>
287 
288  // <group>
289  // Sometimes we want the min, max, and npts all in one scan.
290  virtual void _minMaxNpts(
291  uInt64& npts, CountedPtr<AccumType>& mymin,
292  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
293  uInt dataStride
294  ) const;
295 
296  virtual void _minMaxNpts(
297  uInt64& npts, CountedPtr<AccumType>& mymin,
298  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
299  uInt dataStride, const DataRanges& ranges, Bool isInclude
300  ) const;
301 
302  virtual void _minMaxNpts(
303  uInt64& npts, CountedPtr<AccumType>& mymin,
304  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
305  uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
306  ) const;
307 
308  virtual void _minMaxNpts(
309  uInt64& npts, CountedPtr<AccumType>& mymin,
310  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
311  uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
312  const DataRanges& ranges, Bool isInclude
313  ) const;
314 
315  virtual void _minMaxNpts(
316  uInt64& npts, CountedPtr<AccumType>& mymin,
317  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
318  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
319  ) const;
320 
321  virtual void _minMaxNpts(
322  uInt64& npts, CountedPtr<AccumType>& mymin,
323  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
324  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
325  const DataRanges& ranges, Bool isInclude
326  ) const;
327 
328  virtual void _minMaxNpts(
329  uInt64& npts, CountedPtr<AccumType>& mymin,
330  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
331  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
332  const MaskIterator& maskBegin, uInt maskStride,
333  const DataRanges& ranges, Bool isInclude
334  ) const;
335 
336  virtual void _minMaxNpts(
337  uInt64& npts, CountedPtr<AccumType>& mymin,
338  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
339  const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
340  const MaskIterator& maskBegin, uInt maskStride
341  ) const;
342  // </group>
343 
344  // This method is purposefully non-virtual. Derived classes
345  // should implement the version with no parameters.
346  void _setRange(CountedPtr<std::pair<AccumType, AccumType> > r);
347 
348  // derived classes need to implement how to set their respective range
349  virtual void _setRange() = 0;
350 
351  // <group>
352  // no weights, no mask, no ranges
353  virtual void _unweightedStats(
354  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
355  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
356  );
357 
358  // no weights, no mask
359  virtual void _unweightedStats(
360  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
361  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
362  const DataRanges& ranges, Bool isInclude
363  );
364 
365  virtual void _unweightedStats(
366  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
367  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
368  const MaskIterator& maskBegin, uInt maskStride
369  );
370 
371  virtual void _unweightedStats(
372  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
373  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
374  const MaskIterator& maskBegin, uInt maskStride,
375  const DataRanges& ranges, Bool isInclude
376  );
377  // </group>
378 
379  // <group>
380  // has weights, but no mask, no ranges
381  virtual void _weightedStats(
382  StatsData<AccumType>& stats, LocationType& location,
383  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
384  uInt64 nr, uInt dataStride
385  );
386 
387  virtual void _weightedStats(
388  StatsData<AccumType>& stats, LocationType& location,
389  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
390  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
391  );
392 
393  virtual void _weightedStats(
394  StatsData<AccumType>& stats, LocationType& location,
395  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
396  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
397  uInt maskStride
398  );
399 
400  virtual void _weightedStats(
401  StatsData<AccumType>& stats, LocationType& location,
402  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
403  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
404  uInt maskStride, const DataRanges& ranges, Bool isInclude
405  );
406  // </group>
407 
408 private:
409 
411 
412 };
413 
414 }
415 
416 #ifndef CASACORE_NO_AUTO_TEMPLATES
417 #include <casacore/scimath/StatsFramework/ConstrainedRangeStatistics.tcc>
418 #endif
419 
420 #endif
virtual AccumType getMedian(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
virtual AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
get the median of the absolute deviation about the median of the data.
virtual void _setRange()=0
derived classes need to implement how to set their respective range
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &quantiles, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
Get the specified quantiles.
unsigned long long uInt64
Definition: aipsxtype.h:39
CountedPtr< std::pair< AccumType, AccumType > > _range
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride)
has weights, but no mask, no ranges
Class to calculate statistics in a &quot;classical&quot; sense, ie using accumulators with no special filtering...
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantileToValue, const std::set< Double > &quantiles, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
If one needs to compute both the median and quantile values, it is better to call getMedianAndQuantil...
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
Sometimes we want the min, max, and npts all in one scan.
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
virtual uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
Referenced counted pointer for constant data.
Definition: CountedPtr.h:80
virtual AccumType _getStatistic(StatisticsData::STATS stat)
ConstrainedRangeStatistics< CASA_STATP > & operator=(const ConstrainedRangeStatistics< CASA_STATP > &other)
copy semantics
std::pair< Int64, Int64 > LocationType
virtual LocationType getStatisticIndex(StatisticsData::STATS stat)
see base class description
#define DataRanges
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
virtual void reset()
reset object to initial state.
Abstract base class for statistics algorithms which are characterized by a range of good values...
const Bool False
Definition: aipstype.h:44
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataStart, uInt64 nr, uInt dataStride) const
scan through the data set to determine the number of good (unmasked, weight &gt; 0, within range) points...
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride)
no weights, no mask, no ranges
virtual StatsData< AccumType > _getStatistics()
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
get the min and max of the data set
unsigned int uInt
Definition: aipstype.h:51