casacore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FitToHalfStatistics.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 
26 #ifndef SCIMATH_FITTOHALFSTATISTICS_H
27 #define SCIMATH_FITTOHALFSTATISTICS_H
28 
29 #include <casacore/casa/aips.h>
30 
33 
34 namespace casacore {
35 
36 // Class to calculate statistics using the so-called fit to half algorithm. In
37 // this algorithm, a center value is specified, and only points greater or equal
38 // or less or equal this value are included. Furthermore, each of the included
39 // points is reflected about the center value, and these virtual points are
40 // added to the included points and the union of sets of included real points
41 // and virtual points are used for computing statistics. The specified center
42 // point is therefore the mean and median of the resulting distribution, and the
43 // total number of points is exactly twice the number of real data points that
44 // are included.
45 //
46 // This class uses a ConstrainedRangeQuantileComputer object for computing
47 // quantile-like statistics. See class documentation for StatisticsAlgorithm for
48 // details regarding QuantileComputer classes.
49 
50 template <
51  class AccumType, class DataIterator, class MaskIterator=const Bool *,
52  class WeightsIterator=DataIterator
53 >
55  : public ConstrainedRangeStatistics<CASA_STATP> {
56 public:
57 
58  const static AccumType TWO;
59 
60  // <src>value</src> is only used if <src>center</src>=CVALUE
65  );
66 
67  // copy semantics
69 
70  virtual ~FitToHalfStatistics();
71 
72  // copy semantics
75  );
76 
77  // Clone this instance. Caller is responsible for deleting the returned
78  // pointer.
79  virtual StatisticsAlgorithm<CASA_STATP>* clone() const;
80 
81  // get the algorithm that this object uses for computing stats
84  };
85 
86  // The median is just the center value, so none of the parameters to this
87  // method are used.
88  AccumType getMedian(
89  CountedPtr<uInt64> knownNpts=nullptr,
90  CountedPtr<AccumType> knownMin=nullptr,
91  CountedPtr<AccumType> knownMax=nullptr,
92  uInt binningThreshholdSizeBytes=4096*4096,
93  Bool persistSortedArray=False, uInt nBins=10000
94  );
95 
96  // <group>
97  // In the following group of methods, if the size of the composite dataset
98  // is smaller than <src>binningThreshholdSizeBytes</src>, the composite
99  // dataset will be (perhaps partially) sorted and persisted in memory during
100  // the call. In that case, and if <src>persistSortedArray</src> is True,
101  // this sorted array will remain in memory after the call and will be used
102  // on subsequent calls of this method when
103  // <src>binningThreshholdSizeBytes</src> is greater than the size of the
104  // composite dataset. If <src>persistSortedArray</src> is False, the sorted
105  // array will not be stored after this call completes and so any subsequent
106  // calls for which the dataset size is less than
107  // <src>binningThreshholdSizeBytes</src>, the dataset will be sorted from
108  // scratch. Values which are not included due to non-unity strides, are not
109  // included in any specified ranges, are masked, or have associated weights
110  // of zero are not considered as dataset members for quantile computations.
111  // If one has a priori information regarding the number of points (npts)
112  // and/or the minimum and maximum values of the data set, these can be
113  // supplied to improve performance. Note however, that if these values are
114  // not correct, the resulting median and/or quantile values will also not be
115  // correct (although see the following notes regarding max/min). Note that
116  // if this object has already had getStatistics() called, and the min and
117  // max were calculated, there is no need to pass these values in as they
118  // have been stored internally and will be used (although passing them
119  // explicitly shouldn't hurt anything). If provided, npts, the number of
120  // points falling in the specified ranges which are not masked and have
121  // weights > 0, should be correct. <src>min</src> can be less than the true
122  // minimum, and <src>max</src> can be greater than the True maximum, but for
123  // best performance, these should be as close to the actual min and max as
124  // possible (and ideally the actual min/max values of the data set).
125  AccumType getMedianAndQuantiles(
126  std::map<Double, AccumType>& quantiles,
127  const std::set<Double>& fractions,
128  CountedPtr<uInt64> knownNpts=nullptr,
129  CountedPtr<AccumType> knownMin=nullptr,
130  CountedPtr<AccumType> knownMax=nullptr,
131  uInt binningThreshholdSizeBytes=4096*4096,
132  Bool persistSortedArray=False, uInt nBins=10000
133  );
134 
135  // get the median of the absolute deviation about the median of the data.
136  AccumType getMedianAbsDevMed(
137  CountedPtr<uInt64> knownNpts=nullptr,
138  CountedPtr<AccumType> knownMin=nullptr,
139  CountedPtr<AccumType> knownMax=nullptr,
140  uInt binningThreshholdSizeBytes=4096*4096,
141  Bool persistSortedArray=False, uInt nBins=10000
142  );
143 
144  // Get the specified quantiles. <src>fractions</src> must be between 0 and
145  // 1, noninclusive.
146  std::map<Double, AccumType> getQuantiles(
147  const std::set<Double>& fractions,
148  CountedPtr<uInt64> knownNpts=nullptr,
149  CountedPtr<AccumType> knownMin=nullptr,
150  CountedPtr<AccumType> knownMax=nullptr,
151  uInt binningThreshholdSizeBytes=4096*4096,
152  Bool persistSortedArray=False, uInt nBins=10000
153  );
154  // </group>
155 
156  // scan the dataset(s) that have been added, and find the min and max.
157  // This method may be called even if setStatsToCaclulate has been called and
158  // MAX and MIN has been excluded.
159  virtual void getMinMax(AccumType& mymin, AccumType& mymax);
160 
161  // scan the dataset(s) that have been added, and find the number of good
162  // points. This method may be called even if setStatsToCaclulate has been
163  // called and NPTS has been excluded. If setCalculateAsAdded(True) has
164  // previously been called after this object has been (re)initialized, an
165  // exception will be thrown.
166  uInt64 getNPts();
167 
168  // reset object to initial state. Clears all private fields including data,
169  // accumulators, global range. It does not affect the center type, center
170  // value, or which "side" to use which were set at construction.
171  virtual void reset();
172 
173  // This class does not allow statistics to be calculated as datasets are
174  // added, so an exception will be thrown if <src>c</src> is True.
175  void setCalculateAsAdded(Bool c);
176 
177  // Override base class method by requiring mean to be computed in addition
178  // to what is added in stats if the requested center value is CMEAN.
179  virtual void setStatsToCalculate(std::set<StatisticsData::STATS>& stats);
180 
181 protected:
182 
183  virtual StatsData<AccumType> _getInitialStats() const;
184 
186 
188 
189  inline const StatsData<AccumType>& _getStatsData() const {
190  return _statsData;
191  }
192 
193  // <group>
194  // no weights, no mask, no ranges
195  void _unweightedStats(
196  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
197  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
198  );
199 
200  // no weights, no mask
201  void _unweightedStats(
202  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
203  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
204  const DataRanges& ranges, Bool isInclude
205  );
206 
207  void _unweightedStats(
208  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
209  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
210  const MaskIterator& maskBegin, uInt maskStride
211  );
212 
213  void _unweightedStats(
214  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
215  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
216  const MaskIterator& maskBegin, uInt maskStride,
217  const DataRanges& ranges, Bool isInclude
218  );
219  // </group>
220 
221  void _updateDataProviderMaxMin(const StatsData<AccumType>& threadStats);
222 
223  // <group>
224  // has weights, but no mask, no ranges
225  void _weightedStats(
226  StatsData<AccumType>& stats, LocationType& location,
227  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
228  uInt64 nr, uInt dataStride
229  );
230 
231  void _weightedStats(
232  StatsData<AccumType>& stats, LocationType& location,
233  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
234  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
235  );
236 
237  void _weightedStats(
238  StatsData<AccumType>& stats, LocationType& location,
239  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
240  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
241  uInt maskStride
242  );
243 
244  void _weightedStats(
245  StatsData<AccumType>& stats, LocationType& location,
246  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
247  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
248  uInt maskStride, const DataRanges& ranges, Bool isInclude
249  );
250  // </group>
251 
252 private:
255  AccumType _centerValue;
258  // these are the max and min for the real portion of the dataset
261  // This is used for convenience and performance. It should always
262  // be the same as the _range value used in the base
263  // ConstrainedRangeStatistics object
265 
266  // get the min max of the entire (real + virtual) data set. Only
267  // used for quantile computation
268  void _getMinMax(
269  CountedPtr<AccumType>& realMin, CountedPtr<AccumType>& realMax,
271  );
272 
273  // get the min/max of the real portion only of the dataset
274  void _getRealMinMax(AccumType& realMin, AccumType& realMax);
275 
276  void _setRange();
277 
278 };
279 
280 }
281 
282 #ifndef CASACORE_NO_AUTO_TEMPLATES
283 #include <casacore/scimath/StatsFramework/FitToHalfStatistics.tcc>
284 #endif
285 
286 #endif
virtual void reset()
reset object to initial state.
AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantiles, const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
get the median of the absolute deviation about the median of the data.
StatsData< AccumType > & _getStatsData()
Retrieve stats structure.
CountedPtr< std::pair< AccumType, AccumType > > _range
This is used for convenience and performance.
void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride)
has weights, but no mask, no ranges
unsigned long long uInt64
Definition: aipsxtype.h:39
AccumType getMedian(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
The median is just the center value, so none of the parameters to this method are used...
void _setRange()
derived classes need to implement how to set their respective range
StatsData< AccumType > _statsData
USE_DATA
which section of data to use, greater than or less than the center value
virtual StatsData< AccumType > _getStatistics()
FitToHalfStatistics(FitToHalfStatisticsData::CENTER center=FitToHalfStatisticsData::CMEAN, FitToHalfStatisticsData::USE_DATA useData=FitToHalfStatisticsData::LE_CENTER, AccumType value=0)
value is only used if center=CVALUE
FitToHalfStatisticsData::CENTER _centerType
ALGORITHM
implemented algorithms
virtual StatisticsData::ALGORITHM algorithm() const
get the algorithm that this object uses for computing stats
CENTER
choice of center point based on the corresponding statistics from the entire distribution of data...
CountedPtr< AccumType > _realMin
Referenced counted pointer for constant data.
Definition: CountedPtr.h:80
std::pair< Int64, Int64 > LocationType
void setCalculateAsAdded(Bool c)
This class does not allow statistics to be calculated as datasets are added, so an exception will be ...
Class to calculate statistics using the so-called fit to half algorithm.
#define DataRanges
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
std::map< Double, AccumType > getQuantiles(const std::set< Double > &fractions, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
Get the specified quantiles.
void _getRealMinMax(AccumType &realMin, AccumType &realMax)
get the min/max of the real portion only of the dataset
virtual StatisticsAlgorithm< CASA_STATP > * clone() const
Clone this instance.
Abstract base class for statistics algorithms which are characterized by a range of good values...
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
scan the dataset(s) that have been added, and find the min and max.
const Bool False
Definition: aipstype.h:44
void _updateDataProviderMaxMin(const StatsData< AccumType > &threadStats)
CountedPtr< AccumType > _realMax
these are the max and min for the real portion of the dataset
void _getMinMax(CountedPtr< AccumType > &realMin, CountedPtr< AccumType > &realMax, CountedPtr< AccumType > knownMin, CountedPtr< AccumType > knownMax)
get the min max of the entire (real + virtual) data set.
virtual StatsData< AccumType > _getInitialStats() const
virtual void setStatsToCalculate(std::set< StatisticsData::STATS > &stats)
Override base class method by requiring mean to be computed in addition to what is added in stats if ...
const Double c
Fundamental physical constants (SI units):
const StatsData< AccumType > & _getStatsData() const
FitToHalfStatistics< CASA_STATP > & operator=(const FitToHalfStatistics< CASA_STATP > &other)
copy semantics
uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride)
no weights, no mask, no ranges
LatticeExprNode value(const LatticeExprNode &expr)
This function returns the value of the expression without a mask.
unsigned int uInt
Definition: aipstype.h:51