casacore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ColumnsIndexArray.h
Go to the documentation of this file.
1 //# ColumnsIndexArray.h: Index to an array column in a table
2 //# Copyright (C) 2001,2002
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef TABLES_COLUMNSINDEXARRAY_H
29 #define TABLES_COLUMNSINDEXARRAY_H
30 
31 
32 //# Includes
33 #include <casacore/casa/aips.h>
38 
39 namespace casacore { //# NAMESPACE CASACORE - BEGIN
40 
41 //# Forward Declarations
42 class String;
43 class TableColumn;
44 
45 
46 // <summary>
47 // Index to an array column in a table.
48 // </summary>
49 
50 // <use visibility=export>
51 
52 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tColumnsIndexArray.cc" demos="">
53 // </reviewed>
54 
55 // <prerequisite>
56 // <li> <linkto class=Table>Table</linkto>
57 // <li> <linkto class=Record>Record</linkto>
58 // <li> <linkto class=RecordFieldPtr>RecordFieldPtr</linkto>
59 // </prerequisite>
60 
61 // <synopsis>
62 // This class makes it possible to use transient indices on top
63 // of an array column in a table in order to speed up the process of
64 // finding rows based on a given key or key range.
65 // It is similar to class <linkto class=ColumnsIndex>ColumnsIndex</linkto>
66 // which is meant for one or more scalar columns.
67 // <p>
68 // When constructing a <src>ColumnsIndexArray</src> object, one has to define
69 // which column forms the key for this index on the given
70 // <src>table</src> object.
71 // Not every data type is supported; only uChar, Short, Int, uInt, Int64 and
72 // String array columns are supported.
73 // The column can contain arrays of any shape and it can also contain
74 // empty cells. The class will probably mostly be used for vectors, as
75 // they seem to be the most logical way to hold multiple keys.
76 // <br>The data in the given column will be read, sorted,
77 // and stored in memory. When looking up a key or key range, the class
78 // will use a fast binary search on the data held in memory.
79 // <p>
80 // The <src>ColumnsIndexArray</src> object contains a
81 // <linkto class=Record>Record</linkto> object which can be used
82 // to define the key to be looked up. The record contains a field for
83 // the column in the index (with the same name and data type).
84 // The fastest way to fill the key is by creating a
85 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> object for
86 // the field in the record (see the example) and fill it as needed.
87 // However, one can also use the <src>Record::define</src> function,
88 // but that is slower.
89 // <br>
90 // A second record is available to define the upper key
91 // in case a key range has to be looked up. The keys can be accessed
92 // using the various <src>accessKey</src> functions.
93 // <p>
94 // When a key is defined, the <src>getRowNumbers</src> function can be
95 // used to find the table rows containing the given key (range).
96 // Function <src>getRowNumber</src> can be used to lookup a single key
97 // if all keys in the index are unique (which can be tested with the
98 // <src>isUnique</src> function).
99 // <p>
100 // Instead of using the internal records holding the keys, one can also
101 // pass its own Record object to <src>getRowNumbers</src>.
102 // However, it will be slower.
103 // <p>
104 // After an index is created, it is possible to change the data
105 // in the underlying columns. However, the <src>ColumnsIndexArray</src> can
106 // not detect if the column data have changed. It can only detect if
107 // the number of rows has changed. If the column data have changed,
108 // the user has to use the <src>setChanged</src> function to indicate
109 // that the column has changed.
110 // <br>If data have changed, the entire index will be recreated by
111 // rereading and resorting the data. This will be deferred
112 // until the next key lookup.
113 // </synopsis>
114 
115 // <example>
116 // Suppose one has table with a column NAME containing vectors.
117 // <srcblock>
118 // // Open the table and make an index for the column.
119 // Table tab("my.tab")
120 // ColumnsIndexArray colInx(tab, "NAME");
121 // // Make a RecordFieldPtr for the NAME field in the index key record.
122 // // Its data type has to match the data type of the column.
123 // RecordFieldPtr<String> nameFld(colInx.accessKey(), "NAME");
124 // // Find the row for a given name.
125 // Bool found;
126 // // Fill the key field and get the row number.
127 // // NAME is a unique key, so only one row number matches.
128 // // Otherwise function getRowNumbers had to be used.
129 // *nameFld = "MYNAME";
130 // rownr_t rownr = colInx.getRowNumber (found);
131 // if (!found) {
132 // cout << "Name MYNAME is unknown" << endl;
133 // }
134 // // Now get a range of names and return the row numbers in ascending order.
135 // // This uses the fact that the 'unique' argument also sorts the data.
136 // RecordFieldPtr<String> nameUpp(colInx.accessUpperKey(), "NAME");
137 // *nameFld = "LOWER";
138 // *nameUpp = "UPPER";
139 // RowNumbers rownrs = colInx.getRowNumbers (True, True, True);
140 // </srcblock>
141 
142 // <motivation>
143 // Bob Garwood needed such a class.
144 // </motivation>
145 
146 
148 {
149 public:
150  // Create an index on the given table for the given column.
151  // The column can be a scalar or an array column.
152  // If <src>noSort==True</src>, the table is already in order of that
153  // column and the sort step will not be done.
154  // It only supports String and integer columns.
155  ColumnsIndexArray (const Table&, const String& columnName);
156 
157  // Copy constructor (copy semantics).
158  ColumnsIndexArray (const ColumnsIndexArray& that);
159 
161 
162  // Assignment (copy semantics).
164 
165  // Are all keys in the index unique?
166  Bool isUnique() const;
167 
168  // Return the names of the columns forming the index.
169  const String& columnName() const;
170 
171  // Get the table for which this index is created.
172  const Table& table() const;
173 
174  // Something has changed in the table, so the index has to be recreated.
175  // The 2nd version indicates that a specific column has changed,
176  // so only that column might need to be reread. If that column is not
177  // part of the index, nothing will be done.
178  // <br>Note that the class itself is keeping track if the number of
179  // rows in the table changes.
180  // <group>
181  void setChanged();
182  void setChanged (const String& columnName);
183  // </group>
184 
185  // Access the key values.
186  // These functions allow you to create RecordFieldPtr<T> objects
187  // for each field in the key. In this way you can quickly fill in
188  // the key.
189  // <br>The records have a fixed type, so you cannot add or delete fields.
190  // <br>Note that <src>accessKey</src> and <src>accessLowerKey</src>
191  // are synonyms; they return the same underlying record.
192  // <group>
193  Record& accessKey();
196  // </group>
197 
198  // Find the row number matching the key. All keys have to be unique,
199  // otherwise an exception is thrown.
200  // If no match is found, <src>found</src> is set to False.
201  // The 2nd version makes it possible to pass in your own Record
202  // instead of using the internal record via the <src>accessKey</src>
203  // functions. Note that the given Record will be copied to the internal
204  // record, thus overwrites it.
205  // <group>
206  rownr_t getRowNumber (Bool& found);
207  rownr_t getRowNumber (Bool& found, const Record& key);
208  // </group>
209 
210  // Find the row numbers matching the key. It should be used instead
211  // of <src>getRowNumber</src> if the same key can exist multiple times.
212  // The 2nd version makes it possible to pass in your own Record
213  // instead of using the internal record via the <src>accessKey</src>
214  // functions. Note that the given Record will be copied to the internal
215  // record, thus overwrites it.
216  // <br>A row can contain multiple equal values. In such a case the
217  // same row number can occur multiple times in the output vector,
218  // unless <src>unique</src> is set to True. Note that making the row
219  // numbers unique implies a sort, so it can also be used to get the
220  // row numbers in ascending order.
221  // <group>
223  RowNumbers getRowNumbers (const Record& key, Bool unique=False);
224  // </group>
225 
226  // Find the row numbers matching the key range. The boolean arguments
227  // tell if the lower and upper key are part of the range.
228  // The 2nd version makes it possible to pass in your own Records
229  // instead of using the internal records via the
230  // <src>accessLower/UpperKey</src> functions.
231  // Note that the given Records will be copied to the internal
232  // records, thus overwrite them.
233  // <br>A row can contain multiple matching values. In such a case the
234  // same row number can occur multiple times in the output vector,
235  // unless <src>unique</src> is set to True. Note that making the row
236  // numbers unique implies a sort, so it can also be used to get the
237  // row numbers in ascending order.
238  // <group>
239  RowNumbers getRowNumbers (Bool lowerInclusive, Bool upperInclusive,
240  Bool unique=False);
241  RowNumbers getRowNumbers (const Record& lower, const Record& upper,
242  Bool lowerInclusive, Bool upperInclusive,
243  Bool unique=False);
244  // </group>
245 
246 protected:
247  // Copy that object to this.
248  void copy (const ColumnsIndexArray& that);
249 
250  // Delete all data in the object.
251  void deleteObjects();
252 
253  // Add a column to the record description for the keys.
254  // If the switch <src>arrayPossible</src> is True, the column can
255  // be an array. Otherwise it has to be a scalar.
256  void addColumnToDesc (RecordDesc& description,
257  const TableColumn& column);
258 
259  // Make the various internal <src>RecordFieldPtr</src> objects.
260  void makeObjects (const RecordDesc& description);
261 
262  // Read the data of the columns forming the index, sort them and
263  // form the index.
264  void readData();
265 
266  // Do a binary search on <src>itsUniqueIndexArray</src> for the key in
267  // <src>fieldPtrs</src>.
268  // If the key is found, <src>found</src> is set to True and the index
269  // in <src>itsUniqueIndexArray</src> is returned.
270  // If not found, <src>found</src> is set to False and the index
271  // of the next higher key is returned.
272  rownr_t bsearch (Bool& found, void* fieldPtr) const;
273 
274  // Compare the key in <src>fieldPtr</src> with the given index entry.
275  // -1 is returned when less, 0 when equal, 1 when greater.
276  static Int compare (void* fieldPtr,
277  void* dataPtr,
278  Int dataType,
279  rownr_t index);
280 
281  // Fill the row numbers vector for the given start till end in the
282  // <src>itsUniqueIndexArray</src> vector (end is not inclusive).
283  // If <src>unique</src> is True, the row numbers will be made unique.
284  void fillRowNumbers (Vector<rownr_t>& rows, rownr_t start, rownr_t end,
285  Bool unique) const;
286 
287  // Get the data if the column is an array.
288  // <group>
289  void getArray (Vector<uChar>& result, const String& name);
290  void getArray (Vector<Short>& result, const String& name);
291  void getArray (Vector<Int>& result, const String& name);
292  void getArray (Vector<uInt>& result, const String& name);
293  void getArray (Vector<Int64>& result, const String& name);
294  void getArray (Vector<String>& result, const String& name);
295  // </group>
296 
297  // Fill the rownrs belonging to each array value.
298  void fillRownrs (rownr_t npts, const Block<rownr_t>& nrel);
299 
300 private:
307  void* itsData; //# pointer to data in itsDataVector
308  //# The following 2 blocks are actually blocks of RecordFieldPtr<T>*.
309  //# They are used for fast access to the records.
313  Vector<rownr_t> itsDataIndex; //# Row numbers of all keys
314  //# Indices in itsDataIndex for each unique key
316  Block<rownr_t> itsRownrs; //# rownr for each value
317  rownr_t* itsDataInx; //# pointer to data in itsDataIndex
318  rownr_t* itsUniqueInx; //# pointer to data in itsUniqueIndex
319 };
320 
321 
323 {
325 }
326 inline const Table& ColumnsIndexArray::table() const
327 {
328  return itsTable;
329 }
331 {
332  return *itsLowerKeyPtr;
333 }
335 {
336  return *itsLowerKeyPtr;
337 }
339 {
340  return *itsUpperKeyPtr;
341 }
342 
343 
344 
345 } //# NAMESPACE CASACORE - END
346 
347 #endif
ColumnsIndexArray & operator=(const ColumnsIndexArray &that)
Assignment (copy semantics).
int Int
Definition: aipstype.h:50
size_t nelements() const
How many elements does this array have? Product of all axis lengths.
Definition: ArrayBase.h:103
void makeObjects(const RecordDesc &description)
Make the various internal RecordFieldPtr objects.
Main interface class to a read/write table.
Definition: Table.h:157
Vector< rownr_t > itsUniqueIndex
rownr_t getRowNumber(Bool &found)
Find the row number matching the key.
ColumnsIndexArray(const Table &, const String &columnName)
Create an index on the given table for the given column.
Index to an array column in a table.
Record & accessKey()
Access the key values.
RowNumbers getRowNumbers(Bool unique=False)
Find the row numbers matching the key.
Description of the fields in a record object.
Definition: RecordDesc.h:105
void fillRownrs(rownr_t npts, const Block< rownr_t > &nrel)
Fill the rownrs belonging to each array value.
A hierarchical collection of named fields of various types.
Definition: Record.h:180
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
Bool isUnique() const
Are all keys in the index unique?
static Int compare(void *fieldPtr, void *dataPtr, Int dataType, rownr_t index)
Compare the key in fieldPtr with the given index entry.
void copy(const ColumnsIndexArray &that)
Copy that object to this.
void fillRowNumbers(Vector< rownr_t > &rows, rownr_t start, rownr_t end, Bool unique) const
Fill the row numbers vector for the given start till end in the itsUniqueIndexArray vector (end is no...
Read/write access to a table column.
Definition: TableColumn.h:98
const Bool False
Definition: aipstype.h:44
void setChanged()
Something has changed in the table, so the index has to be recreated.
const Table & table() const
Get the table for which this index is created.
uInt64 rownr_t
Define the type of a row number in a table.
Definition: aipsxtype.h:46
void readData()
Read the data of the columns forming the index, sort them and form the index.
String: the storage and methods of handling collections of characters.
Definition: String.h:225
void deleteObjects()
Delete all data in the object.
void getArray(Vector< uChar > &result, const String &name)
Get the data if the column is an array.
const String & columnName() const
Return the names of the columns forming the index.
rownr_t bsearch(Bool &found, void *fieldPtr) const
Do a binary search on itsUniqueIndexArray for the key in fieldPtrs.
void addColumnToDesc(RecordDesc &description, const TableColumn &column)
Add a column to the record description for the keys.