casacore
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
tables
Tables.h
Go to the documentation of this file.
1
//# Tables.h: The Tables module - Casacore data storage
2
//# Copyright (C) 1994-2010
3
//# Associated Universities, Inc. Washington DC, USA.
4
//#
5
//# This library is free software; you can redistribute it and/or modify it
6
//# under the terms of the GNU Library General Public License as published by
7
//# the Free Software Foundation; either version 2 of the License, or (at your
8
//# option) any later version.
9
//#
10
//# This library is distributed in the hope that it will be useful, but WITHOUT
11
//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13
//# License for more details.
14
//#
15
//# You should have received a copy of the GNU Library General Public License
16
//# along with this library; if not, write to the Free Software Foundation,
17
//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18
//#
19
//# Correspondence concerning AIPS++ should be addressed as follows:
20
//# Internet email: aips2-request@nrao.edu.
21
//# Postal address: AIPS++ Project Office
22
//# National Radio Astronomy Observatory
23
//# 520 Edgemont Road
24
//# Charlottesville, VA 22903-2475 USA
25
//#
26
//# $Id$
27
28
#ifndef TABLES_TABLES_H
29
#define TABLES_TABLES_H
30
31
//# Includes
32
//# table description
33
#include <
casacore/casa/aips.h
>
34
#include <
casacore/tables/Tables/TableDesc.h
>
35
#include <
casacore/tables/Tables/ColumnDesc.h
>
36
#include <
casacore/tables/Tables/ScaColDesc.h
>
37
#include <
casacore/tables/Tables/ArrColDesc.h
>
38
#include <
casacore/tables/Tables/ScaRecordColDesc.h
>
39
40
//# table access
41
#include <
casacore/tables/Tables/Table.h
>
42
#include <
casacore/tables/Tables/TableLock.h
>
43
#include <
casacore/tables/Tables/SetupNewTab.h
>
44
#include <
casacore/tables/Tables/ScalarColumn.h
>
45
#include <
casacore/tables/Tables/ArrayColumn.h
>
46
#include <
casacore/tables/Tables/TableRow.h
>
47
#include <
casacore/tables/Tables/TableCopy.h
>
48
#include <
casacore/tables/Tables/TableUtil.h
>
49
#include <
casacore/casa/Arrays/Array.h
>
50
#include <
casacore/casa/Arrays/Slicer.h
>
51
#include <
casacore/casa/Arrays/Slice.h
>
52
53
//# keywords
54
#include <
casacore/tables/Tables/TableRecord.h
>
55
#include <
casacore/casa/Containers/RecordField.h
>
56
57
//# table lookup
58
#include <
casacore/tables/Tables/ColumnsIndex.h
>
59
#include <
casacore/tables/Tables/ColumnsIndexArray.h
>
60
61
//# table vectors
62
#include <
casacore/tables/Tables/TableVector.h
>
63
#include <
casacore/tables/Tables/TabVecMath.h
>
64
#include <
casacore/tables/Tables/TabVecLogic.h
>
65
66
//# data managers
67
#include <
casacore/tables/DataMan.h
>
68
69
//# table expressions (for selection of rows)
70
#include <
casacore/tables/TaQL.h
>
71
72
73
namespace
casacore {
//# NAMESPACE CASACORE - BEGIN
74
75
// <module>
76
77
// <summary>
78
// CTDS (Casacore Table Data System) is the data storage mechanism for Casacore
79
// </summary>
80
81
// <use visibility=export>
82
83
// <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos="">
84
// </reviewed>
85
86
// <prerequisite>
87
// <li> <linkto class="Record:description">Record</linkto> class
88
// </prerequisite>
89
90
// <etymology>
91
// "Table" is a formal term from relational database theory:
92
// <em> "The organizing principle in a relational database is the TABLE,
93
// a rectangular, row/column arrangement of data values."</em>
94
// Casacore tables are extensions to traditional tables, but are similar
95
// enough that we use the same name. There is also a strong resemblance
96
// between the uses of Casacore tables, and FITS binary tables, which
97
// provides another reason to use "Tables" to describe the Casacore data
98
// storage mechanism.
99
// </etymology>
100
101
// <synopsis>
102
// Tables are the fundamental storage mechanism for Casacore. This document
103
// explains <A HREF="#Tables:motivation">why</A> they had to be made,
104
// <A HREF="#Tables:properties">what</A> their properties are, and
105
// <A HREF="#Tables:open">how</A> to use them. The last subject is
106
// discussed and illustrated in a sequence of sections:
107
// <UL>
108
// <LI> <A HREF="#Tables:open">opening</A> an existing table,
109
// <LI> <A HREF="#Tables:read">reading</A> from a table,
110
// <LI> <A HREF="#Tables:creation">creating</A> a new table,
111
// <LI> <A HREF="#Tables:write">writing</A> into a table,
112
// <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table,
113
// <LI> <A HREF="#Tables:select and sort">selection and sorting</A>
114
// (see also <A HREF="../notes/199.html">Table Query Language</A>),
115
// <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A>
116
// <LI> <A HREF="#Tables:iterate">iterating</A> through a table,
117
// <LI> <A HREF="#Tables:LockSync">locking/synchronization</A>
118
// for concurrent access,
119
// <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup,
120
// <LI> <A HREF="#Tables:vectors">vector operations</A> on a column.
121
// <LI> <A HREF="#Tables:performance">performance and robustness</A>
122
// considerations with some information on
123
// <A HREF="#Tables:iotracing">IO tracing</A>.
124
// </UL>
125
// A few <A HREF="Tables:applications">applications</A> exist to inspect
126
// and manipulate a table.
127
//
128
// Several UML diagrams describe the class structure of the Tables module.
129
// <ul>
130
// <li> <a href="TableOverview.drawio.svg.html">Global overview of Table access</a>.
131
// <li> <a href="TableDesc.drawio.svg.html">Table and column descriptions</a>.
132
// <li> <a href="TableRecord.drawio.svg.html">Table keywords</a>.
133
// <li> <a href="Table.drawio.svg.html">Table class structure</a>.
134
// <li> <a href="PlainTable.drawio.svg.html">Detailed PlainTable class structure</a>.
135
// <li> <a href="DataManager.drawio.svg.html">DataManagers for storage</a>.
136
// </ul>
137
138
// <ANCHOR NAME="Tables:motivation">
139
// <motivation></ANCHOR>
140
//
141
// The Casacore tables are mainly based upon the ideas of Allen Farris,
142
// as laid out in the
143
// <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz">
144
// AIPS++ Database document</A>, from where the following paragraph is taken:
145
//
146
// <p>
147
// Traditional relational database tables have two features that
148
// decisively limit their applicability to scientific data. First, an item of
149
// data in a column of a table must be atomic -- it must have no internal
150
// structure. A consequence of this restriction is that relational
151
// databases are unable to deal with arrays of data items. Second, an
152
// item of data in a column of a table must not have any direct or
153
// implied linkages to other items of data or data aggregates. This
154
// restriction makes it difficult to model complex relationships between
155
// collections of data. While these restrictions may make it easy to
156
// define a mathematically complete set of data manipulation operations,
157
// they are simply intolerable in a scientific data-handling context.
158
// Multi-dimensional arrays are frequently the most natural modes in
159
// which to discuss and think about scientific data. In addition,
160
// scientific data often requires complex calibration operations that
161
// must draw on large bodies of data about equipment and its performance
162
// in various states. The restrictions imposed by the relational model
163
// make it very difficult to deal with complex problems of this nature.
164
// <p>
165
//
166
// In response to these limitations, and other needs, the Casacore tables were
167
// designed.
168
// </motivation>
169
170
// <ANCHOR NAME="Tables:properties">
171
// <h3>Table Properties</h3></ANCHOR>
172
//
173
// Casacore tables have the following properties:
174
// <ul>
175
// <li> A table consists of a number of rows and columns.
176
// <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined
177
// for the table as a whole and for individual columns. A keyword/value
178
// pair for a column could, for instance, define its unit.
179
// <li> Each table has a <A HREF="#Tables:Table Description">description</A>
180
// which specifies the number and type of columns, and maybe initial
181
// keyword sets and default values for the columns.
182
// <li> A cell in a column may contain
183
// <UL>
184
// <LI> a scalar;
185
// <LI> a "direct" array -- which must have the same shape in all
186
// cells of a column, is usually small, and is stored in the
187
// table itself;
188
// <LI> an "indirect" array -- which may have different shapes in
189
// different cells of the same column, is arbitrarily large,
190
// and is stored in a separate file;
191
// </UL>
192
// <li> A column may be
193
// <UL>
194
// <LI> "filled" -- containing actual data, or
195
// <LI> "virtual" -- containing a recipe telling how the data will
196
// be generated dynamically
197
// </UL>
198
// <li> Only the standard Casacore data types can be used in filled
199
// columns, be they scalars or arrays: Bool, uChar, Short, uShort,
200
// Int, uInt, Int64, float, double, Complex, DComplex and String.
201
// Furthermore scalars containing
202
// <linkto class=TableRecord>record</linkto> values are possible
203
// <li> A column can have a default value, which will automatically be stored
204
// in a cell of the column, when a row is added to the table.
205
// <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the
206
// reading, writing and generation of data. Each column in a table can
207
// be assigned its own data manager, which allows for optimization of
208
// the data storage per column. The choice of data manager determines
209
// whether a column is filled or virtual.
210
// <li> Table data are stored in a canonical format, so they can be read
211
// on any machine. To avoid needless swapping of bytes, the data can
212
// be stored in big endian (as used on e.g. SUN) or little endian
213
// (as used on Intel PC-s) canonical format.
214
// By default it uses the format specified in the aipsrc variable
215
// <code>table.endianformat</code> which defaults to
216
// <code>Table::LocalEndian</code> (the endian format of the
217
// machine being used when creating the table).
218
// <li> The SQL-like
219
// <a href="../notes/199.html">Table Query Language</a> (TaQL)
220
// can be used to do operations on tables like
221
// select, sort, update, insert, delete, and create.
222
// </ul>
223
//
224
// Tables can be in one of four forms:
225
// <ul>
226
// <li> A plain table is a table stored on disk.
227
// It can be shared by multiple processes.
228
// <li> A memory table is a table held in memory.
229
// It is a process specific table, thus not sharable.
230
// The <linkto class=Table>Table::copy</linkto> function can be used
231
// to turn a memory table into a plain table.
232
// <li> A reference table is a table referencing a plain or memory table.
233
// It is the result of a selection or sort on another table.
234
// A reference table references the data in the other table, thus
235
// changing data in a reference table means that the data in the
236
// original table are changed.
237
// The <linkto class=Table>Table::deepCopy</linkto> function can be
238
// used to turn a reference table into a plain table.
239
// <li> <A HREF="#Tables:concatenation">a concatenated table</A>
240
// is a union of tables (of any form) with the same description.
241
// They are concatenated in a virtual way, thus no copy is made.
242
// </ul>
243
// Concurrent access from different processes to the same plain table is
244
// fully supported by means of a <A HREF="#Tables:LockSync">
245
// locking/synchronization</A> mechanism. Concurrent access over NFS is also
246
// supported.
247
// <p>
248
// A (somewhat primitive) mechanism is available to do a
249
// <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents
250
// of a key.
251
252
// <ANCHOR NAME="Tables:open">
253
// <h3>Opening an Existing Table</h3></ANCHOR>
254
//
255
// To open an existing table you just create a
256
// <linkto class="Table:description">Table</linkto> object giving
257
// the name of the table, like:
258
//
259
// <srcblock>
260
// Table readonly_table ("tableName");
261
// // or
262
// Table read_and_write_table ("tableName", Table::Update);
263
// </srcblock>
264
//
265
// The constructor option determines whether the table will be opened as
266
// readonly or as read/write. A readonly table file must be opened
267
// as readonly, otherwise an exception is thrown. The functions
268
// <linkto class="Table">Table::isWritable(...)</linkto>
269
// can be used to determine if a table is writable.
270
//
271
// When the table is opened, the data managers are reinstantiated
272
// according to their definition at table creation.
273
// <p>
274
// <ANCHOR NAME="Tables:openTable">
275
// The static function <src>TableUtil::openTable</src> can be used to open a table,
276
// in particular a subtable, in a simple way by means of the :: notation like
277
// <src>maintable::subtable</src>. The :: notation is much better than specifying
278
// an explicit path (such as <src>maintable/subtable</src>, because it also works
279
// fine if the main table is a reference table (e.g. the result of a selection).
280
281
// <ANCHOR NAME="Tables:read">
282
// <h3>Reading from a Table</h3></ANCHOR>
283
//
284
// You can read data from a table column with the "get" functions
285
// in the classes
286
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>
287
// and
288
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>.
289
// For scalars of a standard data type (i.e. Bool, uChar, Int, Short,
290
// uShort, uInt, float, double, Complex, DComplex and String) you could
291
// instead use
292
// <linkto class="TableColumn">TableColumn::getScalar(...)</linkto> or
293
// <linkto class="TableColumn">TableColumn::asXXX(...)</linkto>.
294
// These functions offer an extra: they do automatic data type promotion;
295
// so that you can, for example, get a double value from a float column.
296
//
297
// These "get" functions are used in the same way as the simple "put"
298
// functions described in the previous section.
299
// <p>
300
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>
301
// can be constructed for a non-writable column. However, an exception
302
// is thrown if the put function is used for it.
303
// The same is true for
304
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> and
305
// <linkto class="TableColumn:description">TableColumn</linkto>.
306
// <p>
307
// A typical program could look like:
308
// <srcblock>
309
// #include <casacore/tables/Tables/Table.h>
310
// #include <casacore/tables/Tables/ScalarColumn.h>
311
// #include <casacore/tables/Tables/ArrayColumn.h>
312
// #include <casacore/casa/Arrays/Vector.h>
313
// #include <casacore/casa/Arrays/Slicer.h>
314
// #include <casacore/casa/Arrays/ArrayMath.h>
315
// #include <iostream>
316
//
317
// main()
318
// {
319
// // Open the table (readonly).
320
// Table tab ("some.name");
321
//
322
// // Construct the various column objects.
323
// // Their data type has to match the data type in the table description.
324
// ScalarColumn<Int> acCol (tab, "ac");
325
// ArrayColumn<Float> arr2Col (tab, "arr2");
326
//
327
// // Loop through all rows in the table.
328
// uInt nrrow = tab.nrow();
329
// for (uInt i=0; i<nrow; i++) {
330
// // Read the row for both columns.
331
// cout << "Column ac in row i = " << acCol(i) << endl;
332
// Array<Float> array = arr2Col.get (i);
333
// }
334
//
335
// // Show the entire column ac,
336
// // and show the 10th element of arr2 in each row..
337
// cout << ac.getColumn();
338
// cout << arr2.getColumn (Slicer(Slice(10)));
339
// }
340
// </srcblock>
341
342
// <ANCHOR NAME="Tables:creation">
343
// <h3>Creating a Table</h3></ANCHOR>
344
//
345
// The creation of a table is a multi-step process:
346
// <ol>
347
// <li>
348
// Create a <A HREF="#Tables:Table Description">table description</A>.
349
// <li>
350
// Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto>
351
// object with the name of the new table.
352
// <li>
353
// Create the necessary <A HREF="#Tables:Data Managers">data managers</A>.
354
// <li>
355
// Bind each column to the appropriate data manager.
356
// The system will bind unbound columns to data managers which
357
// are created internally using the default data manager name
358
// defined in the column description.
359
// <li>
360
// Define the shape of direct columns (if that was not already done in the
361
// column description).
362
// <li>
363
// Create the <linkto class="Table:description">Table</linkto>
364
// object from the SetupNewTable object. Here, a final check is performed
365
// and the necessary files are created.
366
// </ol>
367
// The recipe above is meant for the creation a plain table, but the
368
// creation of a memory table is exactly the same. The only difference
369
// is that in call to construct the Table object the Table::Memory
370
// type has to be given. Note that in the SetupNewTable object the columns
371
// can be bound to any data manager. <src>MemoryTable</src> will rebind
372
// stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto>
373
// storage manager, but virtual columns bindings are not changed.
374
//
375
// The following example shows how you can create a table. An example
376
// specifically illustrating the creation of the
377
// <A HREF="#Tables:Table Description">table description</A> is given
378
// in that section. Other sections discuss the access to the table.
379
//
380
// <srcblock>
381
// #include <casacore/tables/Tables/TableDesc.h>
382
// #include <casacore/tables/Tables/SetupNewTab.h>
383
// #include <casacore/tables/Tables/Table.h>
384
// #include <casacore/tables/Tables/ScaColDesc.h>
385
// #include <casacore/tables/Tables/ScaRecordColDesc.h>
386
// #include <casacore/tables/Tables/ArrColDesc.h>
387
// #include <casacore/tables/Tables/StandardStMan.h>
388
// #include <casacore/tables/Tables/IncrementalStMan.h>
389
//
390
// main()
391
// {
392
// // Step1 -- Build the table description.
393
// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
394
// td.comment() = "A test of class SetupNewTable";
395
// td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab"));
396
// td.addColumn (ScalarColumnDesc<Int> ("ac"));
397
// td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad"));
398
// td.addColumn (ScalarColumnDesc<Float> ("ae"));
399
// td.addColumn (ScalarRecordColumnDesc ("arec"));
400
// td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct));
401
// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
402
// td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct));
403
//
404
// // Step 2 -- Setup a new table from the description.
405
// SetupNewTable newtab("newtab.data", td, Table::New);
406
//
407
// // Step 3 -- Create storage managers for it.
408
// StandardStMan stmanStand_1;
409
// StandardStMan stmanStand_2;
410
// IncrementalStMan stmanIncr;
411
//
412
// // Step 4 -- First, bind all columns to the first storage
413
// // manager. Then, bind a few columns to another storage manager
414
// // (which will overwrite the previous bindings).
415
// newtab.bindAll (stmanStand_1);
416
// newtab.bindColumn ("ab", stmanStand_2);
417
// newtab.bindColumn ("ae", stmanIncr);
418
// newtab.bindColumn ("arr3", stmanIncr);
419
//
420
// // Step 5 -- Define the shape of the direct columns.
421
// // (this could have been done in the column description).
422
// newtab.setShapeColumn( "arr1", IPosition(3,2,3,4));
423
// newtab.setShapeColumn( "arr3", IPosition(3,3,4,5));
424
//
425
// // Step 6 -- Finally, create the table consisting of 10 rows.
426
// Table tab(newtab, 10);
427
//
428
// // Now we can fill the table, which is shown in a next section.
429
// // The Table destructor will flush the table to the files.
430
// }
431
// </srcblock>
432
// To create a table in memory, only step 6 has to be modified slightly to:
433
// <srcblock>
434
// Table tab(newtab, Table::Memory, 10);
435
// </srcblock>
436
//
437
// Note that the function <src>TableUtil::createTable</src> can be used to create a table
438
// in a simpler way. It can also be used to create a subtable using the :: notation
439
// similar to the <A HREF="#Tables:openTable"><src>Tableutil::openTable</src></A>
440
// function described above.
441
442
// <ANCHOR NAME="Tables:write">
443
// <h3>Writing into a Table</h3></ANCHOR>
444
//
445
// Once a table has been created or has been opened for read/write,
446
// you want to write data into it. Before doing that you may have
447
// to add one or more rows to the table.
448
// <note role=tip> If a table was created with a given number of rows, you
449
// do not need to add rows; you may not even be able to do so.
450
// </note>
451
//
452
// When adding new rows to the table, either via the
453
// <linkto class="Table">Table(...) constructor</linkto>
454
// or via the
455
// <linkto class="Table">Table::addRow(...)</linkto>
456
// function, you can choose to have those rows initialized with the
457
// default values given in the description.
458
//
459
// To actually write the data into the table you need the classes
460
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> and
461
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>.
462
// For each column you can construct one or
463
// more of these objects. Their put(...) functions
464
// let you write a value at a time or the entire column in one go.
465
// For arrays you can "put" subsections of the arrays.
466
//
467
// As an alternative for scalars of a standard data type (i.e. Bool,
468
// uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex
469
// and String) you could use the functions
470
// <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>.
471
// These functions offer an extra: automatic data type promotion; so that
472
// you can, for example, put a float value in a double column.
473
//
474
// A typical program could look like:
475
// <srcblock>
476
// #include <casacore/tables/Tables/TableDesc.h>
477
// #include <casacore/tables/Tables/SetupNewTab.h>
478
// #include <casacore/tables/Tables/Table.h>
479
// #include <casacore/tables/Tables/ScaColDesc.h>
480
// #include <casacore/tables/Tables/ArrColDesc.h>
481
// #include <casacore/tables/Tables/ScalarColumn.h>
482
// #include <casacore/tables/Tables/ArrayColumn.h>
483
// #include <casacore/casa/Arrays/Vector.h>
484
// #include <casacore/casa/Arrays/Slicer.h>
485
// #include <casacore/casa/Arrays/ArrayMath.h>
486
// #include <iostream>
487
//
488
// main()
489
// {
490
// // First build the table description.
491
// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
492
// td.comment() = "A test of class SetupNewTable";
493
// td.addColumn (ScalarColumnDesc<Int> ("ac"));
494
// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
495
//
496
// // Setup a new table from the description,
497
// // and create the (still empty) table.
498
// // Note that since we do not explicitly bind columns to
499
// // data managers, all columns will be bound to the default
500
// // standard storage manager StandardStMan.
501
// SetupNewTable newtab("newtab.data", td, Table::New);
502
// Table tab(newtab);
503
//
504
// // Construct the various column objects.
505
// // Their data type has to match the data type in the description.
506
// ScalarColumn<Int> ac (tab, "ac");
507
// ArrayColumn<Float> arr2 (tab, "arr2");
508
// Vector<Float> vec2(100);
509
//
510
// // Write the data into the columns.
511
// // In each cell arr2 will be a vector of length 100.
512
// // Since its shape is not set explicitly, it is done implicitly.
513
// for (uInt i=0; i<10; i++) {
514
// tab.addRow(); // First add a row.
515
// ac.put (i, i+10); // value is i+10 in row i
516
// indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119
517
// arr2.put (i, vec2);
518
// }
519
//
520
// // Finally, show the entire column ac,
521
// // and show the 10th element of arr2.
522
// cout << ac.getColumn();
523
// cout << arr2.getColumn (Slicer(Slice(10)));
524
//
525
// // The Table destructor writes the table.
526
// }
527
// </srcblock>
528
//
529
// In this example we added rows in the for loop, but we could also have
530
// created 10 rows straightaway by constructing the Table object as:
531
// <srcblock>
532
// Table tab(newtab, 10);
533
// </srcblock>
534
// in which case we would not include
535
// <srcblock>
536
// tab.addRow()
537
// </srcblock>
538
//
539
// The classes
540
// <linkto class="TableColumn:description">TableColumn</linkto>,
541
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>, and
542
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>
543
// contain several functions to put values into a single cell or into the
544
// whole column. This may look confusing, but is actually quite simple.
545
// The functions can be divided in two groups:
546
// <ol>
547
// <li>
548
// Put the given value into the column cell(s).
549
// <ul>
550
// <li>
551
// The simplest put functions,
552
// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and
553
// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
554
// put a value into the given column cell. For convenience, there is an
555
// <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto>
556
// to put only a part of the array.
557
// <li>
558
// <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and
559
// <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto>
560
// fill an entire column by putting the given value into all the cells
561
// of the column.
562
// <li>
563
// The simplest putColumn functions,
564
// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and
565
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>,
566
// put an array of values into the column. There is a special
567
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
568
// version which puts only a part of the arrays.
569
// </ul>
570
//
571
// <li>
572
// Copy values from another column to this column.<BR>
573
// These functions have the advantage that the
574
// data type of the input and/or output column can be unknown.
575
// The generic TableColumn objects can be used for this purpose.
576
// The put(Column) function checks the data types and, if possible,
577
// converts them. If the conversion is not possible, it throws an
578
// exception.
579
// <ul>
580
// <li>
581
// The put functions copy the value in a cell of the input column
582
// to a cell in the output column. The row numbers of the cells
583
// in the columns can be different.
584
// <li>
585
// The putColumn functions copy the entire contents of the input column
586
// to the output column. The lengths of the columns must be equal.
587
// </ul>
588
// Each class has its own set of these functions.
589
// <ul>
590
// <li>
591
// <linkto class="TableColumn">TableColumn::put(...)</linkto> and
592
// <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and
593
// are the most generic. They can be
594
// used if the data types of both input and output column are unknown.
595
// Note that these functions are virtual.
596
// <li>
597
// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>,
598
// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
599
// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and
600
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
601
// are less generic and therefore potentially more efficient.
602
// The most efficient variants are the ones taking a
603
// Scalar/ArrayColumn<T>, because they require no data type
604
// conversion.
605
// </ul>
606
// </ol>
607
608
// <ANCHOR NAME="Tables:row-access">
609
// <h3>Accessing rows in a Table</h3></ANCHOR>
610
//
611
// Apart from accessing a table column-wise as described in the
612
// previous two sections, it is also possible to access a table row-wise.
613
// The <linkto class=TableRow>TableRow</linkto> class makes it possible
614
// to access multiple fields in a table row as a whole. Note that like the
615
// XXColumn classes described above, there is also an ROTableRow class
616
// for access to readonly tables.
617
// <p>
618
// On construction of a TableRow object it has to be specified which
619
// fields (i.e. columns) are part of the row. For these fields a
620
// fixed structured <linkto class=TableRecord>TableRecord</linkto>
621
// object is constructed as part of the TableRow object. The TableRow::get
622
// function will fill this record with the table data for the given row.
623
// The user has access to the record and can use
624
// <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for
625
// speedier access to the record.
626
// <p>
627
// The class could be used as shown in the following example.
628
// <srcblock>
629
// // Open the table as readonly and define a row object to contain
630
// // the given columns.
631
// // Note that the function stringToVector is a very convenient
632
// // way to construct a Vector<String>.
633
// // Show the description of the fields in the row.
634
// Table table("Some.table");
635
// ROTableRow row (table, stringToVector("col1,col2,col3"));
636
// cout << row.record().description();
637
// // Since the structure of the record is known, the RecordFieldPtr
638
// // objects could be used to allow for easy and fast access to
639
// // the record which is refilled for each get.
640
// RORecordFieldPtr<String> col1(row.record(), "col1");
641
// RORecordFieldPtr<Double> col2(row.record(), "col2");
642
// RORecordFieldPtr<Array<Int> > col3(row.record(), "col3");
643
// for (uInt i=0; i<table.nrow(); i++) {
644
// row.get (i);
645
// someString = *col1;
646
// somedouble = *col2;
647
// someArrayInt = *col3;
648
// }
649
// </srcblock>
650
// The description of TableRow contains some more extensive examples.
651
652
// <ANCHOR NAME="Tables:select and sort">
653
// <h3>Table Selection and Sorting</h3></ANCHOR>
654
//
655
// The result of a select and sort of a table is another table,
656
// which references the original table. This means that an update
657
// of a sorted or selected table results in the update of the original
658
// table. The result is, however, a table in itself, so all table
659
// functions (including select and sort) can be used with it.
660
// Note that a true copy of such a reference table can be made with
661
// the <linkto class=Table>Table::deepCopy</linkto> function.
662
// <p>
663
// Rows or columns can be selected from a table. Columns can be selected
664
// by the
665
// <linkto class="Table">Table::project(...)</linkto>
666
// function, while rows can be selected by the various
667
// <linkto class="Table">Table operator()</linkto> functions.
668
// Usually a row is selected by giving a select expression with
669
// <linkto class="TableExprNode:description">TableExprNode</linkto>
670
// objects. These objects represent the various nodes
671
// in an expression, e.g. a constant, a column, or a subexpression.
672
// The Table function
673
// <linkto class="Table">Table::col(...)</linkto>
674
// creates a TableExprNode object for a column. The function
675
// <linkto class="Table">Table::key(...)</linkto>
676
// does the same for a keyword by reading
677
// the keyword value and storing it as a constant in an expression node.
678
// All column nodes in an expression must belong to the same table,
679
// otherwise an exception is thrown.
680
// In the following example we select all rows with RA>10:
681
// <srcblock>
682
// #include <casacore/tables/Tables/ExprNode.h>
683
// Table table ("Table.name");
684
// Table result = table (table.col("RA") > 10);
685
// </srcblock>
686
// while in the next one we select rows with RA and DEC in the given
687
// intervals:
688
// <srcblock>
689
// Table result = table (table.col("RA") > 10
690
// && table.col("RA") < 14
691
// && table.col("DEC") >= -10
692
// && table.col("DEC") <= 10);
693
// </srcblock>
694
// The following operators can be used to form arbitrarily
695
// complex expressions:
696
// <ul>
697
// <li> Relational operators ==, !=, >, >=, < and <=.
698
// <li> Logical operators &&, || and !.
699
// <li> Arithmetic operators +, -, *, /, %, and unary + and -.
700
// <li> Bit operators ^, &, |, and unary ~.
701
// <li> Operator() to take a subsection of an array.
702
// </ul>
703
// Many functions (like sin, max, conj) can be used in an expression.
704
// Class <linkto class=TableExprNode>TableExprNode</linkto> shows
705
// the available functions.
706
// E.g.
707
// <srcblock>
708
// Table result = table (sin (table.col("RA")) > 0.5);
709
// </srcblock>
710
// Function <src>in</src> can be used to select from a set of values.
711
// A value set can be constructed using class
712
// <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>.
713
// <srcblock>
714
// TableExprNodeSet set;
715
// set.add (TableExprNodeSetElem ("abc"));
716
// set.add (TableExprNodeSetElem ("defg"));
717
// set.add (TableExprNodeSetElem ("h"));
718
// Table result = table (table.col("NAME).in (set));
719
// </srcblock>
720
// select rows with a NAME equal to <src>abc</src>,
721
// <src>defg</src>, or <src>h</src>.
722
//
723
// <p>
724
// You can sort a table on one or more columns containing scalars.
725
// In this example we simply sort on column RA (default is ascending):
726
// <srcblock>
727
// Table table ("Table.name");
728
// Table result = table.sort ("RA");
729
// </srcblock>
730
// Multiple
731
// <linkto class="Table">Table::sort(...)</linkto>
732
// functions exist which allow for more flexible control over the sort order.
733
// In the next example we sort first on RA in descending order
734
// and then on DEC in ascending order:
735
// <srcblock>
736
// Table table ("Table.name");
737
// Block<String> sortKeys(2);
738
// Block<int> sortOrders(2);
739
// sortKeys(0) = "RA";
740
// sortOrders(0) = Sort::Descending;
741
// sortKeys(1) = "DEC";
742
// sortOrders(1) = Sort::Ascending;
743
// Table result = table.sort (sortKeys, sortOrders);
744
// </srcblock>
745
//
746
// Tables stemming from the same root, can be combined in several
747
// ways with the help of the various logical
748
// <linkto class="Table">Table operators</linkto> (operator|, etc.).
749
750
// <h4>Table Query Language</h4>
751
// The selection and sorting mechanism described above can only be used
752
// in a hard-coded way in a C++ program.
753
// There is, however, another way. Strings containing selection and
754
// sorting commands can be used.
755
// The syntax of these commands is based on SQL and is described in the
756
// <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199.
757
// The language supports UDFs (User Defined Functions) in dynamically
758
// loadable libraries as explained in the note.
759
// <br>A TaQL command can be executed with the static function
760
// <src>tableCommand</src> defined in class
761
// <linkto class=TableParse>TableParse</linkto>.
762
763
// <ANCHOR NAME="Tables:concatenation">
764
// <h3>Table Concatenation</h3></ANCHOR>
765
// Tables with identical descriptions can be concatenated in a virtual way
766
// using the Table concatenation constructor. Such a Table object behaves
767
// as any other Table object, thus any operation can be performed on it.
768
// An identical description means that the number of columns, the column names,
769
// and their data types of the columns must be the same. The columns do not
770
// need to be ordered in the same way nor to be stored in the same way.
771
// <br>Note that if tables have different column names, it is possible
772
// to form a projection (as described in the previous section) first
773
// to make them appear identical.
774
//
775
// Sometimes a MeasurementSet is partitioned, for instance in chunks of
776
// one hour. All those chunks can be virtually concatenated this way.
777
// Note that all tables in the concatenation will be opened, thus one might
778
// run out of file descriptors if there are many chunks.
779
//
780
// Similar to reference tables, it is possible to make a concatenated Table
781
// persistent by using the <src>rename</src> function. It will not copy the
782
// data; only the names of the tables used are written.
783
//
784
// The keywords of a concatenated table are taken from the first table.
785
// It is possible to change or add keywords, but that is not persistent,
786
// not even if the concatenated table is made persistent.
787
// <br>The keywords holding subtables can be handled in a special way.
788
// Normally the subtables of the concatenation are the subtables of the first
789
// table are used, but is it possible to concatenate subtables as well by
790
// giving their names in the constructor.
791
// In this way the, say, SYSCAL subtable of a MeasurementSet can be
792
// concatenated as well.
793
// <srcblock>
794
// // Create virtual concatenation of ms0 and ms1.
795
// Block<String> names(2);
796
// names[0] = "ms0";
797
// names[1] = "ms1";
798
// // Also concatenate their SYSCAL subtables.
799
// Block<String> subNames(1, "SYSCAL");
800
// Table concTab (names, subNames);
801
// </srcblock>
802
803
// <ANCHOR NAME="Tables:iterate">
804
// <h3>Table Iterators</h3></ANCHOR>
805
//
806
// You can iterate through a table in an arbitrary order by getting
807
// a subset of the table consisting of the rows in which the iteration
808
// columns have the same value.
809
// An iterator object is created by constructing a
810
// <linkto class="TableIterator:description">TableIterator</linkto>
811
// object with the appropriate column names.
812
//
813
// In the next example we define an iteration on the columns Time and
814
// Baseline. Each iteration step returns a table subset in which Time and
815
// Baseline have the same value.
816
//
817
// <srcblock>
818
// // Iterate over Time and Baseline (by default in ascending order).
819
// // Time is the main iteration order, thus the first column specified.
820
// Table t;
821
// Table tab ("UV_Table.data");
822
// Block<String> iv0(2);
823
// iv0[0] = "Time";
824
// iv0[1] = "Baseline";
825
// //
826
// // Create the iterator. This will prepare the first subtable.
827
// TableIterator iter(tab, iv0);
828
// Int nr = 0;
829
// while (!iter.pastEnd()) {
830
// // Get the first subtable.
831
// // This will contain rows with equal Time and Baseline.
832
// t = iter.table();
833
// cout << t.nrow() << " ";
834
// nr++;
835
// // Prepare the next subtable with the next Time,Baseline value.
836
// iter.next();
837
// }
838
// cout << endl << nr << " iteration steps" << endl;
839
// </srcblock>
840
//
841
// You can define more than one iterator on the same table; they operate
842
// independently.
843
//
844
// Note that the result of each iteration step is a table in itself which
845
// references the original table, just as in the case of a sort or select.
846
// This means that the resulting table can be used again in a sort, select,
847
// iteration, etc..
848
849
// <ANCHOR NAME="Tables:vectors">
850
// <h3>Table Vectors</h3></ANCHOR>
851
//
852
// A table vector makes it possible to treat a column in a table
853
// as a vector. Almost all operators and functions defined for normal
854
// vectors, are also defined for table vectors. So it is, for instance,
855
// possible to add a constant to a table vector. This has the effect
856
// that the underlying column gets changed.
857
//
858
// You can use the templated class
859
// <linkto class="TableVector:description">TableVector</linkto>
860
// to make a scalar column appear as a (table) vector.
861
// Columns containing arrays or tables are not supported.
862
// The data type of the TableVector object must match the
863
// data type of the column.
864
// A table vector can also hold a normal vector so that (temporary)
865
// results of table vector operations can be handled.
866
//
867
// In the following example we double the data in column COL1 and
868
// store the result in a temporary table vector.
869
// <srcblock>
870
// // Create a table vector for column COL1.
871
// // Note that if the table is readonly, putting data in the table vector
872
// // results in an exception.
873
// Table tab ("Table.data");
874
// TableVector<Int> tabvec(tab, "COL1");
875
// // Multiply it by a constant. Result is kept in a Vector in memory.
876
// TableVector<Int> temp = 2 * tabvec;
877
// </srcblock>
878
//
879
// In the next example we double the data in COL1 and put the result back
880
// in the column.
881
// <srcblock>
882
// // Create a table vector for column COL1.
883
// // It has to be a TableVector to be able to change the column.
884
// Table tab ("Table.data", Table::Update);
885
// TableVector<Int> tabvec(tab, "COL1");
886
// // Multiply it by a constant.
887
// tabvec *= 2;
888
// </srcblock>
889
890
// <ANCHOR NAME="Tables:keywords">
891
// <h3>Table Keywords</h3></ANCHOR>
892
//
893
// Any number of keyword/value pairs may be attached to the table as a whole,
894
// or to any individual column. They may be freely added, retrieved,
895
// re-assigned, or deleted. They are, in essence, a self-resizing list of
896
// values (any of the primitive types) indexed by Strings (the keyword).
897
//
898
// A table keyword/value pair might be
899
// <srcblock>
900
// Observer = Grote Reber
901
// Date = 10 october 1942
902
// </srcblock>
903
// Column keyword/value pairs might be
904
// <srcblock>
905
// Units = mJy
906
// Reference Pixel = 320
907
// </srcblock>
908
// The class
909
// <linkto class="TableRecord:description">TableRecord</linkto>
910
// represents the keywords in a table.
911
// It is (indirectly) derived from the standard record classes in the class
912
// <linkto class="Record:description">Record</linkto>
913
914
// <ANCHOR NAME="Tables:Table Description">
915
// <h3>Table Description</h3></ANCHOR>
916
//
917
// A table contains a description of itself, which defines the layout of the
918
// columns and the keyword sets for the table and for the individual columns.
919
// It may also define initial keyword sets and default values for the columns.
920
// Such a default value is automatically stored in a cell in the table column,
921
// whenever a row is added to the table.
922
//
923
// The creation of the table descriptor is the first step in the creation of
924
// a new table. The description is part of the table itself, but may also
925
// exist in a separate file. This is useful if you need to create a number
926
// of tables with the same structure; in other circumstances it probably
927
// should be avoided.
928
//
929
// The public classes to set up a table description are:
930
// <ul>
931
// <li> <linkto class="TableDesc:description">TableDesc</linkto>
932
// -- holds the table description.
933
// <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto>
934
// -- holds a generic column description.
935
// <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc<T>
936
// </linkto>
937
// -- defines a column containing a scalar value.
938
// <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc;
939
// </linkto>
940
// -- defines a column containing a scalar record value.
941
// <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc<T>
942
// </linkto>
943
// -- defines a column containing an (in)direct array.
944
// </ul>
945
//
946
// Here follows a typical example of the construction of a table
947
// description. For more specialized things -- like the definition of a
948
// default data manager -- we refer to the descriptions of the above
949
// mentioned classes.
950
//
951
// <srcblock>
952
// #include <casacore/tables/Tables/TableDesc.h>
953
// #include <casacore/tables/Tables/ScaColDesc.h>
954
// #include <casacore/tables/Tables/ArrColDesc.h>
955
// #include <casacore/tables/Tables/ScaRecordTabDesc.h>
956
// #include <casacore/tables/Tables/TableRecord.h>
957
// #include <casacore/casa/Arrays/IPosition.h>
958
// #include <casacore/casa/Arrays/Vector.h>
959
//
960
// main()
961
// {
962
// // Create a new table description
963
// // Define a comment for the table description.
964
// // Define some keywords.
965
// ColumnDesc colDesc1, colDesc2;
966
// TableDesc td("tTableDesc", "1", TableDesc::New);
967
// td.comment() = "A test of class TableDesc";
968
// td.rwKeywordSet().define ("ra" float(3.14));
969
// td.rwKeywordSet().define ("equinox", double(1950));
970
// td.rwKeywordSet().define ("aa", Int(1));
971
//
972
// // Define an integer column ab.
973
// td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab"));
974
//
975
// // Add a scalar integer column ac, define keywords for it
976
// // and define a default value 0.
977
// // Overwrite the value of keyword unit.
978
// ScalarColumnDesc<Int> acColumn("ac");
979
// acColumn.rwKeywordSet().define ("scale" Complex(0,0));
980
// acColumn.rwKeywordSet().define ("unit", "");
981
// acColumn.setDefault (0);
982
// td.addColumn (acColumn);
983
// td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG");
984
//
985
// // Add a scalar string column ad and define its comment string.
986
// td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad"));
987
//
988
// // Now define array columns.
989
// // This one is indirect and has no dimensionality mentioned yet.
990
// td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1"));
991
// // This one is indirect and has 3-dim arrays.
992
// td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3));
993
// // This one is direct and has 2-dim arrays with axes length 4 and 7.
994
// td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1",
995
// IPosition(2,4,7),
996
// ColumnDesc::Direct));
997
//
998
// // Add columns containing records.
999
// td.addColumn (ScalarRecordColumnDesc ("Rec1"));
1000
// }
1001
// </srcblock>
1002
1003
// <ANCHOR NAME="Tables:Data Managers">
1004
// <h3>Data Managers</h3></ANCHOR>
1005
//
1006
// Data managers take care of the actual access to the data in a column.
1007
// There are two kinds of data managers:
1008
// <ol>
1009
// <li> <A HREF="#Tables:storage managers">Storage managers</A> --
1010
// which store the data as such. They can only handle the standard
1011
// data types (Bool,...,String) as discussed in the section about the
1012
// <A HREF="#Tables:properties">table properties</A>).
1013
// <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A>
1014
// -- which manipulate the data.
1015
// An engine could be a simple thing like scaling the data (as done
1016
// in classic AIPS to reduce data storage), but it could also be an
1017
// elaborate thing like applying corrections on-the-fly.
1018
// <br>A special engine is VirtualTaQLColumn which can be used to define
1019
// the contents of a column by means of a TaQL expression. In particular,
1020
// it can be used to define a constant value for the entire column.
1021
// But it can also be used to calculate the UVW-coordinates on-the-fly.
1022
// <br>An engine must be used when storing data objects with a non-standard type.
1023
// It has to break down the object into items with standard data types
1024
// which can be stored with a storage manager.
1025
// </ol>
1026
// In general the user of a table does not need to be aware which
1027
// data managers are being used underneath. Only when the table is created
1028
// data managers have to be bound to the columns. Thereafter it is
1029
// completely transparent.
1030
//
1031
// Data managers needs to be registered, so they can be found when a table is
1032
// opened. All data managers mentioned below are part of the system and
1033
// pre-registered.
1034
// It is, however, also possible to load data managers on demand. If a data
1035
// manager is not registered it is tried to load a shared library with the
1036
// part of the data manager name (in lowercase) before a dot or left arrow.
1037
// The dot makes it possible to have multiple data managers in a shared library,
1038
// while the left arrow is meant for templated data manager classes.
1039
// <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared
1040
// library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If
1041
// successful, its function <src>register_bitflagsengine()</src> will be
1042
// executed which should register the data manager(s). Thereafter it is known
1043
// and will be used. For example in a file Register.h and Register.cc:
1044
// <srcblock>
1045
// // Declare in .h file as C function, so no name mangling is done.
1046
// extern "C" {
1047
// void register_bitflagsengine();
1048
// }
1049
// // Implement in .cc file.
1050
// void register_bitflagsengine()
1051
// {
1052
// BitFlagsEngine<uChar>::registerClass();
1053
// BitFlagsEngine<Short>::registerClass();
1054
// BitFlagsEngine<Int>::registerClass();
1055
// }
1056
// </srcblock>
1057
// There are several functions that can give information which data managers
1058
// are used for which columns and to obtain the characteristics and properties
1059
// of them. Class RODataManAccessor and derived classes can be used for it
1060
// as well as the functions <src>dataManagerInfo</src> and
1061
// <src>showStructure</src> in class Table.
1062
1063
// <ANCHOR NAME="Tables:storage managers">
1064
// <h3>Storage Managers</h3></ANCHOR>
1065
//
1066
// Storage managers are used to store the data contained in the column cells.
1067
// At table construction time the binding of columns to storage managers is done.
1068
// <br>Each storage manager uses one or more files (usually called table.fi_xxx
1069
// where i is a sequence number and _xxx is some kind of extension).
1070
// Typically several file are used to store the data of the columns of a table.
1071
// <br>In order to reduce the number of files (and to support large block sizes),
1072
// it is possible to have a single container file (a MultiFile) containing all
1073
// data files used by the storage managers. Such a file is called table.mf.
1074
// Note that the program <em>lsmf</em> can be used to see which
1075
// files are contained in a MultiFile. The program <em>tomf</em> can
1076
// convert the files in a MultiFile to regular files.
1077
// <br>At table creation time it is decided if a MultiFile will be used. It
1078
// can be done by means of the StorageOption object given to the SetupNewTable
1079
// constructor and/or by the aipsrc variables:
1080
// <ul>
1081
// <li> <src>table.storage.option</src> which can have the value
1082
// 'multifile', 'sepfile' (meaning separate files), or 'default'.
1083
// Currently the default is to use separate files.
1084
// <li> <src>table.storage.blocksize</src> defines the block size to be
1085
// used by a MultiFile. If 0 is given, the file system's block size
1086
// will be used.
1087
// </ul>
1088
// About all standard storage managers support the MultiFile.
1089
// The exception is StManAipsIO, because it is hardly ever used.
1090
//
1091
// Several storage managers exist, each with its own storage characteristics.
1092
// The default and preferred storage manager is <src>StandardStMan</src>.
1093
// Other storage managers should only be used if they pay off in
1094
// file space (like <src>IncrementalStMan</src> for slowly varying data)
1095
// or access speed (like the tiled storage managers for large data arrays).
1096
// <br>The storage managers store the data in a big or little endian
1097
// canonical format. The format can be specified when the table is created.
1098
// By default it uses the endian format as specified in the aipsrc variable
1099
// <code>table.endianformat</code> which can have the value local, big,
1100
// or little. The default is local.
1101
// <ol>
1102
// <li>
1103
// <linkto class="StandardStMan:description">StandardStMan</linkto>
1104
// stores all the values in so-called buckets (equally sized chunks
1105
// in the file). It requires little memory.
1106
// <br>It replaces the old <src>StManAipsIO</src>.
1107
//
1108
// <li>
1109
// <linkto class="IncrementalStMan:description">IncrementalStMan</linkto>
1110
// uses a storage mechanism resembling "incremental backups". A value
1111
// is only stored if it is different from the previous row. It is
1112
// very well suited for slowly varying data.
1113
// <br>The class <linkto class="ROIncrementalStManAccessor:description">
1114
// ROIncrementalStManAccessor</linkto> can be used to tune the
1115
// behaviour of the <src>IncrementalStMan</src>. It contains functions
1116
// to deal with the cache size and to show the behaviour of the cache.
1117
//
1118
// <li>
1119
// The <a href="#Tables:TiledStMan">Tiled Storage Managers</a>
1120
// store the data as a tiled hypercube allowing for more or less equally
1121
// efficient data access along all main axes. It can be used for
1122
// UV-data as well as for image data.
1123
//
1124
// <li>
1125
// <linkto class="StManAipsIO:description">StManAipsIO</linkto>
1126
// uses <src>AipsIO</src> to store the data in the columns.
1127
// It supports all table functionality, but its I/O is probably not
1128
// as efficient as other storage managers. It also requires that
1129
// a large part of the table fits in memory.
1130
// <br>It should not be used anymore, because it uses a lot of memory
1131
// for larger tables and because it is not very robust in case an
1132
// application or system crashes.
1133
//
1134
// <li>
1135
// <linkto class="MemoryStMan:description">MemoryStMan</linkto>
1136
// holds the data in memory. It means that data 'stored' with this
1137
// storage manager are NOT persistent.
1138
// <br>This storage manager is primarily meant for tables held in
1139
// memory, but it can also be useful for temporary columns in
1140
// normal tables. Note, however, that if a table is accessed
1141
// concurrently from multiple processes, MemoryStMan data cannot be
1142
// synchronized.
1143
//
1144
// <li>
1145
// @ref dyscostman.DyscoStMan is a class that stores data with lossy
1146
// compression. It combines non-linear least-squares quantization and
1147
// different kinds of normalizaton. With the typical factor of 4
1148
// compression, the loss in accuracy from lossy compression is
1149
// negligable. It should only be used for real (non-simulated) data
1150
// that is in a Measurement Set.
1151
// The method is described in this article:
1152
// https://arxiv.org/abs/1609.02019.
1153
//
1154
// <li>
1155
// <linkto class="Adios2StMan:description">Adios2StMan</linkto> uses the
1156
// <A HREF="https://github.com/ornladios/ADIOS2">ADIOS2 framework</A> to
1157
// store and load column data.
1158
// <br>ADIOS2 has several configurable storage backend itself, and this
1159
// flexibility is also available via Adios2StMan. This includes, among other
1160
// things, storing compressed data, or choosing a different on-disk formats.
1161
// <br>This storage manager is also special in that it provides parallel
1162
// writing capabilities for MPI processes, so that multiple processes can
1163
// write into different sections of the same column concurrently.
1164
// </ol>
1165
//
1166
// The storage manager framework makes it possible to support arbitrary files
1167
// as tables. This has been used in a case where a file is filled
1168
// by the data acquisition system of a telescope. The file is simultaneously
1169
// used as a table using a dedicated storage manager. The table
1170
// system and storage manager provide a sync function to synchronize
1171
// the processes, i.e. to make CTDS aware of changes
1172
// in the file size (thus in the table size) by the filling process.
1173
//
1174
// <note role=tip>
1175
// Not all data managers support all the table functionality. So, the choice
1176
// of a data manager can greatly influence the type of operations you can do
1177
// on the table as a whole.
1178
// For example, if a column uses the tiled storage manager,
1179
// it is not possible to delete rows from the table, because that storage
1180
// manager will not support deletion of rows.
1181
// However, it is always possible to delete all columns of a data
1182
// manager in one single call.
1183
// </note>
1184
1185
// <ANCHOR NAME="Tables:TiledStMan">
1186
// <h3>Tiled Storage Manager</h3></ANCHOR>
1187
// The Tiled Storage Managers allow one to store the data of
1188
// one or more columns in a tiled way. Tiling means
1189
// that the data are stored without a preferred order to make access
1190
// along the different main axes equally efficient. This is done by
1191
// storing the data in so-called tiles (i.e. equally shaped subsets of an
1192
// array) to increase data locality. The user can define the tile shape
1193
// to optimize for the most frequently used access.
1194
// <p>
1195
// The Tiled Storage Manager has the following properties:
1196
// <ul>
1197
// <li> There can be more than one Tiled Storage Manager in
1198
// a table; each with its own (unique) name.
1199
// <li> Each Tiled Storage Manager can store an
1200
// N-dimensional so-called hypercolumn.
1201
// Elaborate hypercolumns can be defined using
1202
// <linkto file="TableDesc.h#defineHypercolumn">
1203
// TableDesc::defineHypercolumn</linkto>).
1204
// <br>Note that defining a hypercolumn is only necessary if it
1205
// contains multiple columns or if the TiledDataStMan is used.
1206
// It means that in practice it is hardly ever needed to define a
1207
// hypercolumn.
1208
// <br>A hypercolumn consists of up to three types of columns:
1209
// <dl>
1210
// <dt> Data columns
1211
// <dd> contain the data to be stored in a tiled way. This will
1212
// be done in tiled hypercubes.
1213
// There must be at least one data column.
1214
// <br> For example: a table contains UV-data with
1215
// data columns "Visibility" and "Weight".
1216
// <dt> Coordinate columns
1217
// <dd> define the world coordinates of the pixels in the data columns.
1218
// Coordinate columns are optional, but if given there must
1219
// be N coordinate columns for an N-dimensional hypercolumn.
1220
// <br>
1221
// For example: the data in the example above is 4-dimensional
1222
// and has coordinate columns "Time", "Baseline", "Frequency",
1223
// and "Polarization".
1224
// <dt> Id columns
1225
// <dd> are needed if TiledDataStMan is used.
1226
// Different rows in the data columns can be stored in different
1227
// hypercubes. The values in the id column(s) uniquely identify
1228
// the hypercube a row is stored in.
1229
// <br>
1230
// For example: the line and continuum data in a MeasurementSet
1231
// table need to be stored in 2 different hypercubes (because
1232
// their shapes are different (see below)). A column containing
1233
// the type (line or continuum) has to be used as an id column.
1234
// </dl>
1235
// <li> If multiple data columns are used, the shape of their data
1236
// must be conforming in each individual row.
1237
// If data in different rows have different shapes, they must be
1238
// stored in different hypercubes, because a hypercube can only hold
1239
// data with conforming shapes.
1240
// <br>
1241
// Thus in the example above, rows with line data will have conforming
1242
// shapes and can be stored in one hypercube. The continuum data
1243
// will have another shape and can be stored in another hypercube.
1244
// <br>
1245
// The storage manager keeps track of the mapping of rows to/from
1246
// hypercubes.
1247
// <li> Each hypercube can be tiled in its own way. It is not required
1248
// that an integer number of tiles fits in the hypercube. The last
1249
// tiles will be padded as needed.
1250
// <li> The last axis of a hypercube can be extensible. This means that
1251
// the size of that axis does not need to be defined when the
1252
// hypercube is defined in the storage manager. Instead, the hypercube
1253
// can be extended when another chunk of data has to be stored.
1254
// This can be very useful in, for example, a (quasi-)realtime
1255
// environment where the size of the time axis is not known.
1256
// <li> If coordinate columns are defined, they describe the coordinates
1257
// of the axes of the hypercubes. Each hypercube has its own set of
1258
// coordinates.
1259
// <li> Data and id columns have to be stored with the Tiled
1260
// Storage Manager. However, coordinate columns do not need to be
1261
// stored with the Tiled Storage Manager.
1262
// Especially in the case where the coordinates for a hypercube axis
1263
// are varying (i.e. dependent on other axes), another storage manager
1264
// has to be used (because the Tiled Storage Manager can only
1265
// hold constant coordinates).
1266
// </ul>
1267
// <p>
1268
// The following Tiled Storage Managers are available:
1269
// <dl>
1270
// <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto>
1271
// <dd> can be seen as a specialization of <src>TiledDataStMan</src>
1272
// by using the array shape as the id value.
1273
// Similarly to <src>TiledDataStMan</src> it can maintain multiple
1274
// hypercubes and store multiple rows in a hypercube, but it is
1275
// easier to use, because the special <src>addHypercube</src> and
1276
// <src>extendHypercube</src> functions are not needed.
1277
// An hypercube is automatically added when a new array shape is
1278
// encountered.
1279
// <br>
1280
// This storage manager could be used for a table with a column
1281
// containing line and continuum data, which will result
1282
// in 2 hypercubes.
1283
// <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto>
1284
// <dd> creates (automatically) a new hypercube for each row.
1285
// Thus each row of the hypercolumn is stored in a separate hypercube.
1286
// Note that the row number serves as the id value. So an id column
1287
// is not needed, although there are multiple hypercubes.
1288
// <br>
1289
// This storage manager is meant for tables where the data arrays
1290
// in the different rows are not accessed together. One can think
1291
// of a column containing images. Each row contains an image and
1292
// only one image is shown at a time.
1293
// <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto>
1294
// <dd> creates one hypercube for the entire hypercolumn. Thus all cells
1295
// in the hypercube have to have the same shape and therefore this
1296
// storage manager is only possible if all columns in the hypercolumn
1297
// have the attribute FixedShape.
1298
// <br>
1299
// This storage manager could be used for a table with a column
1300
// containing images for the Stokes parameters I, Q, U, and V.
1301
// By storing them in one hypercube, it is possible to retrieve
1302
// the 4 Stokes values for a subset of the image or for an individual
1303
// pixel in a very efficient way.
1304
// <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto>
1305
// <dd> allows one to control the creation and extension of hypercubes.
1306
// This is done by means of the class
1307
// <linkto class=TiledDataStManAccessor:description>
1308
// TiledDataStManAccessor</linkto>.
1309
// It makes it possible to store, say, row 0-9 in hypercube A,
1310
// row 10-34 in hypercube B, row 35-54 in hypercube A again, etc..
1311
// <br>
1312
// The drawback of this storage manager is that its hypercubes are not
1313
// automatically extended when adding new rows. The special functions
1314
// <src>addHypercube</src> and <src>extendHypercube</src> have to be
1315
// used making it somewhat tedious to use.
1316
// Therefore this storage manager may become obsolete in the near future.
1317
// </dl>
1318
// The Tiled Storage Managers have 3 ways to access and cache the data.
1319
// Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an
1320
// access choice and use it in a Table constructor.
1321
// <ul>
1322
// <li> The old way (the only way until January 2010) uses a cache
1323
// of its own to keep tiles that might need to be reused. It will always
1324
// access entire tiles, even if only a small part is needed.
1325
// It is possible to define a maximum cache size. The description of class
1326
// <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
1327
// contains a discussion about the effect of defining a maximum cache
1328
// size.
1329
// <li> Memory-mapping the data files. In this way the operating system
1330
// takes care of the IO and caching. However, the limited address space
1331
// may preclude using it for large tables on 32-bit systems.
1332
// <li> Use buffered IO and let the kernel's file cache take care of caching.
1333
// It will access the data in chunks of the given buffer size, so the
1334
// entire tile does not need to be accessed if only a small part is
1335
// needed.
1336
// </ul>
1337
// Apart from reading, all access ways described above can also handle writing
1338
// and extending tables. They create fully equal files. Both little and big
1339
// endian data can be read or written.
1340
1341
// <ANCHOR NAME="Tables:virtual column engines">
1342
// <h3>Virtual Column Engines</h3></ANCHOR>
1343
//
1344
// Virtual column engines are used to implement the virtual (i.e.
1345
// calculated-on-the-fly) columns. CTDS provides
1346
// an abstract base class (or "interface class")
1347
// <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto>
1348
// that specifies the protocol for these engines.
1349
// The programmer must derive a concrete class to implement
1350
// the application-specific virtual column.
1351
// <p>
1352
// For example: the programmer
1353
// needs a column in a table which is the difference between two other
1354
// columns. (Perhaps these two other columns are updated periodically
1355
// during the execution of a program.) A good way to handle this would
1356
// be to have a virtual column in the table, and write a virtual column
1357
// engine which knows how to calculate the difference between corresponding
1358
// cells of the two other columns. So the result is that accessing a
1359
// particular cell of the virtual column invokes the virtual column engine,
1360
// which then gets the values from the other two columns, and returns their
1361
// difference. This particular example could be done using
1362
// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>.
1363
// <p>
1364
// Several virtual column engines exist:
1365
// <ol>
1366
// <li> The class
1367
// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>
1368
// makes it possible to define a column as an arbitrary expression of
1369
// other columns. It uses the <a href="../notes/199.html">TaQL</a>
1370
// CALC command. The virtual column can be a scalar or an array and
1371
// can have one of the standard data types supported by CTDS.
1372
// <li> The class
1373
// <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto>
1374
// maps an integer bit flags column to a Bool column. A read and write mask
1375
// can be defined telling which bits to take into account when mapping
1376
// to and from Bool (thus when reading or writing the Bool).
1377
// <li> The class
1378
// <linkto class="CompressFloat:description">CompressFloat</linkto>
1379
// compresses a single precision floating point array by scaling the
1380
// values to shorts (16-bit integer).
1381
// <li> The class
1382
// <linkto class="CompressComplex:description">CompressComplex</linkto>
1383
// compresses a single precision complex array by scaling the
1384
// values to shorts (16-bit integer). In fact, the 2 parts of the complex
1385
// number are combined to an 32-bit integer.
1386
// <li> The class
1387
// <linkto class="CompressComplexSD:description">CompressComplexSD</linkto>
1388
// does the same as CompressComplex, but optimizes for the case where the
1389
// imaginary part is zero (which is often the case for Single Dish data).
1390
// <li> The double templated class
1391
// <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto>
1392
// scales the data in an array from, for example,
1393
// float to short before putting it.
1394
// <li> The double templated class
1395
// <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto>
1396
// converts the data from one data type to another. Sometimes it might be
1397
// needed to store the residual data in an MS in double precision.
1398
// Because the imaging task can only handle single precision, this enigne
1399
// can be used to map the data from double to single precision.
1400
// <li> The double templated class
1401
// <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto>
1402
// converts the data from one data type to another with the possibility
1403
// to reduce the number of dimensions. For example, it can be used to
1404
// store an 2-d array of StokesVector objects as a 3-d array of floats
1405
// by treating the 4 data elements as an extra array axis. If the
1406
// StokesVector class is simple, it can be done very efficiently.
1407
// <li> The class
1408
// <linkto class="ForwardColumnEngine:description">
1409
// ForwardColumnEngine</linkto>
1410
// forwards the gets and puts on a row in a column to the same row
1411
// in a column with the same name in another table. This provides
1412
// a virtual copy of the referenced column.
1413
// <li> The class
1414
// <linkto class="ForwardColumnIndexedRowEngine:description">
1415
// ForwardColumnIndexedRowEngine</linkto>
1416
// is similar to <src>ForwardColumnEngine.</src>.
1417
// However, instead of forwarding it to the same row it uses a
1418
// a column to map its row number to a row number in the referenced
1419
// table. In this way multiple rows can share the same data.
1420
// This data manager only allows for get operations.
1421
// <li> The calibration module has implemented a virtual column engine
1422
// to do on-the-fly calibration in a transparent way.
1423
// </ol>
1424
// To handle arbitrary data types the templated abstract base class
1425
// <linkto class="VSCEngine:description">VSCEngine</linkto>
1426
// has been written. An example of how to use this class can be
1427
// found in the demo program <src>dVSCEngine.cc</src>.
1428
1429
// <ANCHOR NAME="Tables:LockSync">
1430
// <h3>Table locking and synchronization</h3></ANCHOR>
1431
//
1432
// Multiple concurrent readers and writers (also via NFS) of a
1433
// table are supported by means of a locking/synchronization mechanism.
1434
// This mechanism is not very sophisticated in the sense that it is
1435
// very coarsely grained. When locking, the entire table gets locked.
1436
// A special lock file is used to lock the table. This lock file also
1437
// contains some synchronization data.
1438
// <p>
1439
// Five ways of locking are supported (see class
1440
// <linkto class=TableLock>TableLock</linkto>):
1441
// <dl>
1442
// <dt> TableLock::PermanentLocking(Wait)
1443
// <dd> locks the table permanently (from open till close). This means
1444
// that one writer OR multiple readers are possible.
1445
// <dt> TableLock::AutoLocking
1446
// <dd> does the locking automatically. This is the default mode.
1447
// This mode makes it possible that a table is shared amongst
1448
// processes without the user needing to write any special code.
1449
// It also means that a lock is only released when needed.
1450
// <dt> TableLock::AutoNoReadLocking
1451
// <dd> is similar to AutoLocking. However, no lock is acquired when
1452
// reading the table making it possible to read the table while
1453
// another process holds a write-lock. It also means that for read
1454
// purposes no automatic synchronization is done when the table is
1455
// updated in another process.
1456
// Explicit synchronization can be done by means of the function
1457
// <src>Table::resync</src>.
1458
// <dt> TableLock::UserLocking
1459
// <dd> requires that the programmer explicitly acquires and releases
1460
// a lock on the table. This makes some kind of transaction
1461
// processing possible. E.g. set a write lock, add a row,
1462
// write all data into the row and release the lock.
1463
// The Table functions <src>lock</src> and <src>unlock</src>
1464
// have to be used to acquire and release a (read or write) lock.
1465
// <dt> TableLock::UserNoReadLocking
1466
// <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking
1467
// no lock is needed to read the table.
1468
// <dt> TableLock::NoLocking
1469
// <dd> does not use table locking. It is the responsibility of the
1470
// user to ensure that no concurrent access is done on the same
1471
// bucket or tile in a storage manager, otherwise a table might
1472
// get corrupted.
1473
// <br>This mode is always used if Casacore is built with
1474
// -DAIPS_TABLE_NOLOCKING.
1475
// </dl>
1476
// Synchronization of the processes accessing the same table is done
1477
// by means of the lock file. When a lock is released, the storage
1478
// managers flush their data into the table files. Some synchronization data
1479
// is written into the lock file telling the new number of table rows
1480
// and telling which storage managers have written data.
1481
// This information is read when another process acquires the lock
1482
// and is used to determine which storage managers have to refresh
1483
// their internal caches.
1484
// <br>Note that for the NoReadLocking modes (see above) explicit
1485
// synchronization might be needed using <src>Table::resync</src>.
1486
// <p>
1487
// The function <src>Table::hasDataChanged</src> can be used to check
1488
// if a table is (being) changed by another process. In this way
1489
// a program can react on it. E.g. the table browser can refresh its
1490
// screen when the underlying table is changed.
1491
// <p>
1492
// In general the default locking option will do.
1493
// From the above it should be clear that heavy concurrent access
1494
// results in a lot of flushing, thus will have a negative impact on
1495
// performance. If uninterrupted access to a table is needed,
1496
// the <src>PermanentLocking</src> option should be used.
1497
// If transaction-like processing is done (e.g. updating a table
1498
// containing an observation catalogue), the <src>UserLocking</src>
1499
// option is probably best.
1500
// <p>
1501
// Creation or deletion of a table is not possible if that table
1502
// is still open in another process. The function
1503
// <src>Table::isMultiUsed()</src> can be used to check if a table
1504
// is open in other processes.
1505
// <br>
1506
// The function <src>TableUtil::deleteTable</src> should be used to delete
1507
// a table. Before deleting the table it ensures that it is writable
1508
// and that it is not open in the current or another process.
1509
// <p>
1510
// The following example wants to read the table uninterrupted, thus it uses
1511
// the <src>PermanentLocking</src> option. It also wants to wait
1512
// until the lock is actually acquired.
1513
// Note that the destructor closes the table and releases the lock.
1514
// <srcblock>
1515
// // Open the table (readonly).
1516
// // Acquire a permanent (read) lock.
1517
// // It waits until the lock is acquired.
1518
// Table tab ("some.name",
1519
// TableLock(TableLock::PermanentLockingWait));
1520
// </srcblock>
1521
//
1522
// The following example uses the automatic locking..
1523
// It tells the system to check about every 20 seconds if another
1524
// process wants access to the table.
1525
// <srcblock>
1526
// // Open the table (readonly).
1527
// Table tab ("some.name",
1528
// TableLock(TableLock::AutoLocking, 20));
1529
// </srcblock>
1530
//
1531
// The following example gets data (say from a GUI) and writes it
1532
// as a row into the table. The lock the table as little as possible
1533
// the lock is acquired just before writing and released immediately
1534
// thereafter.
1535
// <srcblock>
1536
// // Open the table (writable).
1537
// Table tab ("some.name",
1538
// TableLock(TableLock::UserLocking),
1539
// Table::Update);
1540
// while (True) {
1541
// get input data
1542
// tab.lock(); // Acquire a write lock and wait for it.
1543
// tab.addRow();
1544
// write data into the row
1545
// tab.unlock(); // Release the lock.
1546
// }
1547
// </srcblock>
1548
//
1549
// The following example deletes a table if it is not used in
1550
// another process.
1551
// <srcblock>
1552
// Table tab ("some.name");
1553
// if (! tab.isMultiUsed()) {
1554
// tab.markForDelete();
1555
// }
1556
// </srcblock>
1557
1558
// <ANCHOR NAME="Tables:KeyLookup">
1559
// <h3>Table lookup based on a key</h3></ANCHOR>
1560
//
1561
// Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the
1562
// user a means to find the rows matching a given key or key range.
1563
// It is a somewhat primitive replacement of a B-tree index and in the
1564
// future it may be replaced by a proper B+-tree implementation.
1565
// <p>
1566
// The <src>ColumnsIndex</src> class makes it possible to build an
1567
// in-core index on one or more columns. Looking a key or key range
1568
// is done using a binary search on that index. It returns a vector
1569
// containing the row numbers of the rows matching the key (range).
1570
// <p>
1571
// The class is not capable of tracing changes in the underlying column(s).
1572
// It detects a change in the number of rows and updates the index
1573
// accordingly. However, it has to be told explicitly when a value
1574
// in the underlying column(s) changes.
1575
// <p>
1576
// The following example shows how the class can be used.
1577
// <example>
1578
// Suppose one has an antenna table with key ANTENNA.
1579
// <srcblock>
1580
// // Open the table and make an index for column ANTENNA.
1581
// Table tab("antenna.tab")
1582
// ColumnsIndex colInx(tab, "ANTENNA");
1583
// // Make a RecordFieldPtr for the ANTENNA field in the index key record.
1584
// // Its data type has to match the data type of the column.
1585
// RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
1586
// // Now loop in some way and find the row for the antenna
1587
// // involved in that loop.
1588
// Bool found;
1589
// while (...) {
1590
// // Fill the key field and get the row number.
1591
// // ANTENNA is a unique key, so only one row number matches.
1592
// // Otherwise function getRowNumbers had to be used.
1593
// *antFld = antenna;
1594
// uInt antRownr = colInx.getRowNumber (found);
1595
// if (!found) {
1596
// cout << "Antenna " << antenna << " is unknown" << endl;
1597
// } else {
1598
// // antRownr can now be used to get data from that row in
1599
// // the antenna table.
1600
// }
1601
// }
1602
// </srcblock>
1603
// </example>
1604
// <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more
1605
// advanced example. It shows how to use a private compare function
1606
// to adjust the lookup if the index does not contain single
1607
// key values, but intervals instead. This is useful if a row in
1608
// a (sub)table is valid for, say, a time range instead of a single
1609
// timestamp.
1610
1611
// <ANCHOR NAME="Tables:performance">
1612
// <h3>Performance and robustness considerations</h3></ANCHOR>
1613
//
1614
// CTDS resembles a database system, but it is not as robust.
1615
// It lacks the transaction and logging facilities common to data base systems.
1616
// It means that in case of a crash data might be lost.
1617
// To reduce the risk of data loss to
1618
// a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally
1619
// with an <tt>fsync</tt> to ensure that all data are really written.
1620
// However, that can degrade the performance because it involves extra writes.
1621
// So one should find the right balance between robustness and performance.
1622
//
1623
// To get a good feeling for the performance issues, it is important to
1624
// understand some of the internals of CTDS.
1625
// <br>The storage managers drive the performance. All storage managers use
1626
// buckets (called tiles for the TiledStMan) which contain the data.
1627
// All IO is done by bucket. The bucket/tile size is defined when creating
1628
// the storage manager objects. Sometimes the default will do, but usually
1629
// it is better to set it explicitly.
1630
//
1631
// It is best to do a flush when a tile is full.
1632
// For example: <br>
1633
// When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines
1634
// or N*(N+1) if auto-correlations are stored as well) it makes sense to
1635
// store, say, N/2 rows in a tile and do a flush each time all baselines
1636
// are written. In that way tiles are fully filled when doing the flush, so
1637
// no extra IO is involved.
1638
// <br>Here is some code showing this when creating a MeasurementSet.
1639
// The code should speak for itself.
1640
// <srcblock>
1641
// MS* createMS (const String& msName, int nrchan, int nrant)
1642
// {
1643
// // Get the MS main default table description.
1644
// TableDesc td = MS::requiredTableDesc();
1645
// // Add the data column and its unit.
1646
// MS::addColumnToDesc(td, MS::DATA, 2);
1647
// td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet().
1648
// define("UNIT","Jy");
1649
// // Store the DATA and FLAG column in two separate files.
1650
// // In this way accessing FLAG only is much cheaper than
1651
// // when combining DATA and FLAG.
1652
// // All data have the same shape, thus use TiledColumnStMan.
1653
// // Also store UVW with TiledColumnStMan.
1654
// Vector<String> tsmNames(1);
1655
// tsmNames[0] = MS::columnName(MS::DATA);
1656
// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1657
// td.defineHypercolumn("TiledData", 3, tsmNames);
1658
// tsmNames[0] = MS::columnName(MS::FLAG);
1659
// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1660
// td.defineHypercolumn("TiledFlag", 3, tsmNames);
1661
// tsmNames[0] = MS::columnName(MS::UVW);
1662
// td.defineHypercolumn("TiledUVW", 2, tsmNames);
1663
// // Setup the new table.
1664
// SetupNewTable newTab(msName, td, Table::New);
1665
// // Most columns vary slowly and use the IncrStMan.
1666
// IncrementalStMan incrStMan("ISMData");
1667
// // A few columns use he StandardStMan (set an appropriate bucket size).
1668
// StandardStMan stanStMan("SSMData", 32768);
1669
// // Store all pol and freq and some rows in a single tile.
1670
// // autocorrelations are written, thus in total there are
1671
// // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an
1672
// // integer number of tiles.
1673
// TiledColumnStMan tiledData("TiledData",
1674
// IPosition(3,4,nchan,(nrant+1)/2));
1675
// TiledColumnStMan tiledFlag("TiledFlag",
1676
// IPosition(3,4,nchan,8*(nrant+1)/2));
1677
// TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,));
1678
// IPosition(2,3,nrant*(nrant+1)/2));
1679
// newTab.bindAll (incrStMan);
1680
// newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan);
1681
// newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan);
1682
// newTab.bindColumn(MS::columnName(MS::DATA),tiledData);
1683
// newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag);
1684
// newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW);
1685
// // Create the MS and its subtables.
1686
// // Get access to its columns.
1687
// MS* msp = new MeasurementSet(newTab);
1688
// // Create all subtables.
1689
// // Do this after the creation of optional subtables,
1690
// // so the MS will know about those optional sutables.
1691
// msp->createDefaultSubtables (Table::New);
1692
// return msp;
1693
// }
1694
// </srcblock>
1695
1696
// <h4>Some more performance considerations</h4>
1697
// Which storage managers to use and how to use them depends heavily on
1698
// the type of data and the access patterns to the data. Here follow some
1699
// guidelines:
1700
// <ol>
1701
// <li> Scalar data can be stored with the StandardStMan (SSM) or
1702
// IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column
1703
// in a MeasurementSet) it is best to use the ISM. Otherwise the SSM.
1704
// Note that very long strings (longer than the bucketsize) can only
1705
// be stored with the SSM.
1706
// <li> Any number of storage managers can be used. In fact, each column
1707
// can have a storage manager of its own resulting in column-wise
1708
// stored data which is more and more used in data base systems.
1709
// In that way a query or sort on that column is very fast, because
1710
// the buckets to read only contain data of that column.
1711
// In practice one can decide to combine a few frequently used columns
1712
// in a storage manager.
1713
// <li> Array data can be stored with any column manager. Small fixed size
1714
// arrays can be stored directly with the SSM
1715
// (or ISM if not changing much).
1716
// However, they can also be stored with a TiledStMan (TSM) as shown
1717
// for the UVW column in the example above.
1718
// <br> Large arrays should usually be stored with a TSM. However,
1719
// if it must be possible to change the shape of an array after it
1720
// was stored, the SSM (or ISM) must be used. Note that in that
1721
// case a lot of disk space can be wasted, because the SSM and ISM
1722
// store the array data at the end of the file if the array got
1723
// bigger and do not reuse the old space. The only way to
1724
// reclaim it is by making a deep copy of the entire table.
1725
// <li> If an array is stored with a TSM, it is important to decide
1726
// which TSM to use.
1727
// <ol>
1728
// <li> The TiledColumnStMan is the most efficient, but only suitable
1729
// for arrays having the same shape in the entire column.
1730
// <li> The TiledShapeStMan is suitable for columns where the arrays
1731
// can have a few shapes.
1732
// <li> The TiledCellStMan is suitable for columns where the arrays
1733
// can have many different shapes.
1734
// </ol>
1735
// This is discussed in more detail
1736
// <a href="#Tables:TiledStMan">above</a>.
1737
// <li> If storing an array with a TSM, it can be very important to
1738
// choose the right tile shape. Not only does this define the size
1739
// of a tile, but it also defines if access in other directions
1740
// than the natural direction can be fast. It is also discussed in
1741
// more detail <a href="#Tables:TiledStMan">above</a>.
1742
// <li> Columns can be combined in a single TiledStMan. For instance, combining DATA
1743
// and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG
1744
// is used on its own (e.g. in combination with CORRECTED_DATA), it is better
1745
// to separate them, otherwise tiles containing FLAG also contain DATA making the
1746
// tiles much bigger, thus more expensive to access.
1747
// </ol>
1748
//
1749
// <ANCHOR NAME="Tables:iotracing">
1750
// <h4>IO Tracing</h4></ANCHOR>
1751
//
1752
// Several forms of tracing can be done to see how the Table I/O performs.
1753
// <ul>
1754
// <li> On Linux/UNIX systems the <src>strace</src> command can be used to
1755
// collect trace information about the physical IO.
1756
// <li> The function <src>showCacheStatistics</src> in class
1757
// TiledStManAccessor can be used to show the number of actual reads
1758
// and writes and the percentage of cache hits.
1759
// <li> The software has some options to trace the operations done on
1760
// tables. It is possible to specify the columns and/or the operations
1761
// to be traced. The following <src>aipsrc</src> variables can be used.
1762
// <ul>
1763
// <li> <src>table.trace.filename</src> specifies the file to write the
1764
// trace output to. If not given or empty, no tracing will be done.
1765
// The file name can contain environment variables or a tilde.
1766
// <li> <src>table.trace.operation</src> specifies the operations to be
1767
// traced. It is a string containing s, r, and/or w where
1768
// s means tracing RefTable construction (selection/sort),
1769
// r means column reads, and w means column writes.
1770
// If empty, only the high level table operations (open, create, close)
1771
// will be traced.
1772
// <li> <src>table.trace.columntype</src> specifies the types of columns to
1773
// be traced. It is a string containing the characters s, a, and/or r.
1774
// s means all scalar columns, a all array columns, and r all record
1775
// columns. If empty and if <src>table.trace.column</src> is empty,
1776
// its default value is a.
1777
// <li> <src>table.trace.column</src> specifies names of columns to be
1778
// traced. Its value can be one or more glob-like patterns separated
1779
// by commas without any whitespace. The default is empty.
1780
// For example:
1781
// <srcblock>
1782
// table.trace.column: *DATA,FLAG,WEIGHT*
1783
// </srcblock>
1784
// to trace all DATA, the FLAG, and all WEIGHT columns.
1785
// </ul>
1786
// The trace output is a text file with the following columns
1787
// separated by a space.
1788
// <ul>
1789
// <li> The UTC time the trace line was written (with msec accuracy).
1790
// <li> The operation: n(ew), o(pen), c(lose), t(able), r(ead), w(rite),
1791
// s(election/sort/iter), p(rojection).
1792
// t means an arbitrary table operation as given in the name column.
1793
// <li> The table-id (as t=i) given at table creation (new) or open.
1794
// <li> The table name, column name, or table operation
1795
// (as <src>*oper*</src>).
1796
// <src>*reftable*</src> means that the operation is on a RefTable
1797
// (thus result of selection, sort, projection, or iteration).
1798
// <li> The row or rows to access (* means all rows).
1799
// Multiple rows are given as a series of ranges like s:e:i,s:e:i,...
1800
// where e and i are only given if applicable (default i is 1).
1801
// Note that e is inclusive and defaults to s.
1802
// <li> The optional array shape to access (none means scalar).
1803
// In case multiple rows are accessed, the last shape value is the
1804
// number of rows.
1805
// <li> The optional slice of the array in each row as [start][end][stride].
1806
// </ul>
1807
// Shape, start, end, and stride are given in Fortran-order as
1808
// [n1,n2,...].
1809
// </ul>
1810
1811
// <ANCHOR NAME="Tables:applications">
1812
// <h4>Applications to inspect/manipulate a table</h4></ANCHOR>
1813
// <ul>
1814
// <li><em>showtableinfo</em> shows the structure of a table. It can show:
1815
// <ul>
1816
// <li> the columns and their format (optionally sorted on name)
1817
// <li> the data managers used to store the column data
1818
// <li> the table and/or column keywords and their values
1819
// <li> recursively the same info of the subtables
1820
// </ul>
1821
// <li><em>showtablelock</em> if a table is locked or opened and by
1822
// which process.
1823
// <li><em>lsmf</em> shows the virtual files contained in a MultiFile.
1824
// <li><em>tomf</em> copies the given files to a MultiFile.
1825
// <li><em>taql</em> can be used to query a table using the
1826
// <a href="../notes/199.html">Table Query Language</a> (TaQL).
1827
// </ul>
1828
//
1829
// </synopsis>
1830
// </module>
1831
1832
1833
1834
}
//# NAMESPACE CASACORE - END
1835
1836
#endif
Slicer.h
ColumnDesc.h
ColumnsIndexArray.h
SetupNewTab.h
TableVector.h
aips.h
ArrayColumn.h
ScaColDesc.h
ArrColDesc.h
TableRow.h
Array.h
TableLock.h
TableUtil.h
DataMan.h
ScaRecordColDesc.h
TableDesc.h
TableRecord.h
ColumnsIndex.h
TabVecMath.h
Slice.h
TableCopy.h
Table.h
TaQL.h
ScalarColumn.h
RecordField.h
TabVecLogic.h
Generated by
1.8.5