casacore
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
tables
Tables.h
Go to the documentation of this file.
1
//# Tables.h: The Tables module - Casacore data storage
2
//# Copyright (C) 1994-2010
3
//# Associated Universities, Inc. Washington DC, USA.
4
//#
5
//# This library is free software; you can redistribute it and/or modify it
6
//# under the terms of the GNU Library General Public License as published by
7
//# the Free Software Foundation; either version 2 of the License, or (at your
8
//# option) any later version.
9
//#
10
//# This library is distributed in the hope that it will be useful, but WITHOUT
11
//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13
//# License for more details.
14
//#
15
//# You should have received a copy of the GNU Library General Public License
16
//# along with this library; if not, write to the Free Software Foundation,
17
//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18
//#
19
//# Correspondence concerning AIPS++ should be addressed as follows:
20
//# Internet email: aips2-request@nrao.edu.
21
//# Postal address: AIPS++ Project Office
22
//# National Radio Astronomy Observatory
23
//# 520 Edgemont Road
24
//# Charlottesville, VA 22903-2475 USA
25
//#
26
//# $Id$
27
28
#ifndef TABLES_TABLES_H
29
#define TABLES_TABLES_H
30
31
//# Includes
32
//# table description
33
#include <
casacore/casa/aips.h
>
34
#include <
casacore/tables/Tables/TableDesc.h
>
35
#include <
casacore/tables/Tables/ColumnDesc.h
>
36
#include <
casacore/tables/Tables/ScaColDesc.h
>
37
#include <
casacore/tables/Tables/ArrColDesc.h
>
38
#include <
casacore/tables/Tables/ScaRecordColDesc.h
>
39
40
//# table access
41
#include <
casacore/tables/Tables/Table.h
>
42
#include <
casacore/tables/Tables/TableLock.h
>
43
#include <
casacore/tables/Tables/SetupNewTab.h
>
44
#include <
casacore/tables/Tables/ScalarColumn.h
>
45
#include <
casacore/tables/Tables/ArrayColumn.h
>
46
#include <
casacore/tables/Tables/TableRow.h
>
47
#include <
casacore/tables/Tables/TableCopy.h
>
48
#include <
casacore/tables/Tables/TableUtil.h
>
49
#include <
casacore/casa/Arrays/Array.h
>
50
#include <
casacore/casa/Arrays/Slicer.h
>
51
#include <
casacore/casa/Arrays/Slice.h
>
52
53
//# keywords
54
#include <
casacore/tables/Tables/TableRecord.h
>
55
#include <
casacore/casa/Containers/RecordField.h
>
56
57
//# table lookup
58
#include <
casacore/tables/Tables/ColumnsIndex.h
>
59
#include <
casacore/tables/Tables/ColumnsIndexArray.h
>
60
61
//# table vectors
62
#include <
casacore/tables/Tables/TableVector.h
>
63
#include <
casacore/tables/Tables/TabVecMath.h
>
64
#include <
casacore/tables/Tables/TabVecLogic.h
>
65
66
//# data managers
67
#include <
casacore/tables/DataMan.h
>
68
69
//# table expressions (for selection of rows)
70
#include <
casacore/tables/TaQL.h
>
71
72
73
namespace
casacore {
//# NAMESPACE CASACORE - BEGIN
74
75
// <module>
76
77
// <summary>
78
// CTDS (Casacore Table Data System) is the data storage mechanism for Casacore
79
// </summary>
80
81
// <use visibility=export>
82
83
// <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos="">
84
// </reviewed>
85
86
// <prerequisite>
87
// <li> <linkto class="Record:description">Record</linkto> class
88
// </prerequisite>
89
90
// <etymology>
91
// "Table" is a formal term from relational database theory:
92
// <em> "The organizing principle in a relational database is the TABLE,
93
// a rectangular, row/column arrangement of data values."</em>
94
// Casacore tables are extensions to traditional tables, but are similar
95
// enough that we use the same name. There is also a strong resemblance
96
// between the uses of Casacore tables, and FITS binary tables, which
97
// provides another reason to use "Tables" to describe the Casacore data
98
// storage mechanism.
99
// </etymology>
100
101
// <synopsis>
102
// Tables are the fundamental storage mechanism for Casacore. This document
103
// explains <A HREF="#Tables:motivation">why</A> they had to be made,
104
// <A HREF="#Tables:properties">what</A> their properties are, and
105
// <A HREF="#Tables:open">how</A> to use them. The last subject is
106
// discussed and illustrated in a sequence of sections:
107
// <UL>
108
// <LI> <A HREF="#Tables:open">opening</A> an existing table,
109
// <LI> <A HREF="#Tables:read">reading</A> from a table,
110
// <LI> <A HREF="#Tables:creation">creating</A> a new table,
111
// <LI> <A HREF="#Tables:write">writing</A> into a table,
112
// <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table,
113
// <LI> <A HREF="#Tables:select and sort">selection and sorting</A>
114
// (see also <A HREF="../notes/199.html">Table Query Language</A>),
115
// <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A>
116
// <LI> <A HREF="#Tables:iterate">iterating</A> through a table,
117
// <LI> <A HREF="#Tables:LockSync">locking/synchronization</A>
118
// for concurrent access,
119
// <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup,
120
// <LI> <A HREF="#Tables:vectors">vector operations</A> on a column.
121
// <LI> <A HREF="#Tables:performance">performance and robustness</A>
122
// considerations with some information on
123
// <A HREF="#Tables:iotracing">IO tracing</A>.
124
// </UL>
125
// A few <A HREF="Tables:applications">applications</A> exist to inspect
126
// and manipulate a table.
127
//
128
// Several UML diagrams describe the class structure of the Tables module.
129
// <ul>
130
// <li> <a href="TableOverview.drawio.svg.html">Global overview of Table access</a>.
131
// <li> <a href="TableDesc.drawio.svg.html">Table and column descriptions</a>.
132
// <li> <a href="TableRecord.drawio.svg.html">Table keywords</a>.
133
// <li> <a href="Table.drawio.svg.html">Table class structure</a>.
134
// <li> <a href="PlainTable.drawio.svg.html">Detailed PlainTable class structure</a>.
135
// <li> <a href="DataManager.drawio.svg.html">DataManagers for storage</a>.
136
// </ul>
137
138
// <ANCHOR NAME="Tables:motivation">
139
// <motivation></ANCHOR>
140
//
141
// The Casacore tables are mainly based upon the ideas of Allen Farris,
142
// as laid out in the
143
// <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz">
144
// AIPS++ Database document</A>, from where the following paragraph is taken:
145
//
146
// <p>
147
// Traditional relational database tables have two features that
148
// decisively limit their applicability to scientific data. First, an item of
149
// data in a column of a table must be atomic -- it must have no internal
150
// structure. A consequence of this restriction is that relational
151
// databases are unable to deal with arrays of data items. Second, an
152
// item of data in a column of a table must not have any direct or
153
// implied linkages to other items of data or data aggregates. This
154
// restriction makes it difficult to model complex relationships between
155
// collections of data. While these restrictions may make it easy to
156
// define a mathematically complete set of data manipulation operations,
157
// they are simply intolerable in a scientific data-handling context.
158
// Multi-dimensional arrays are frequently the most natural modes in
159
// which to discuss and think about scientific data. In addition,
160
// scientific data often requires complex calibration operations that
161
// must draw on large bodies of data about equipment and its performance
162
// in various states. The restrictions imposed by the relational model
163
// make it very difficult to deal with complex problems of this nature.
164
// <p>
165
//
166
// In response to these limitations, and other needs, the Casacore tables were
167
// designed.
168
// </motivation>
169
170
// <ANCHOR NAME="Tables:properties">
171
// <h3>Table Properties</h3></ANCHOR>
172
//
173
// Casacore tables have the following properties:
174
// <ul>
175
// <li> A table consists of a number of rows and columns.
176
// <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined
177
// for the table as a whole and for individual columns. A keyword/value
178
// pair for a column could, for instance, define its unit.
179
// <li> Each table has a <A HREF="#Tables:Table Description">description</A>
180
// which specifies the number and type of columns, and maybe initial
181
// keyword sets and default values for the columns.
182
// <li> A cell in a column may contain
183
// <UL>
184
// <LI> a scalar;
185
// <LI> a "direct" array -- which must have the same shape in all
186
// cells of a column, is usually small, and is stored in the
187
// table itself;
188
// <LI> an "indirect" array -- which may have different shapes in
189
// different cells of the same column, is arbitrarily large,
190
// and is stored in a separate file;
191
// </UL>
192
// <li> A column may be
193
// <UL>
194
// <LI> "filled" -- containing actual data, or
195
// <LI> "virtual" -- containing a recipe telling how the data will
196
// be generated dynamically
197
// </UL>
198
// <li> Only the standard Casacore data types can be used in filled
199
// columns, be they scalars or arrays: Bool, uChar, Short, uShort,
200
// Int, uInt, Int64, float, double, Complex, DComplex and String.
201
// Furthermore scalars containing
202
// <linkto class=TableRecord>record</linkto> values are possible
203
// <li> A column can have a default value, which will automatically be stored
204
// in a cell of the column, when a row is added to the table.
205
// <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the
206
// reading, writing and generation of data. Each column in a table can
207
// be assigned its own data manager, which allows for optimization of
208
// the data storage per column. The choice of data manager determines
209
// whether a column is filled or virtual.
210
// <li> Table data are stored in a canonical format, so they can be read
211
// on any machine. To avoid needless swapping of bytes, the data can
212
// be stored in big endian (as used on e.g. SUN) or little endian
213
// (as used on Intel PC-s) canonical format.
214
// By default it uses the format specified in the aipsrc variable
215
// <code>table.endianformat</code> which defaults to
216
// <code>Table::LocalEndian</code> (the endian format of the
217
// machine being used when creating the table).
218
// <li> The SQL-like
219
// <a href="../notes/199.html">Table Query Language</a> (TaQL)
220
// can be used to do operations on tables like
221
// select, sort, update, insert, delete, and create.
222
// </ul>
223
//
224
// Tables can be in one of four forms:
225
// <ul>
226
// <li> A plain table is a table stored on disk.
227
// It can be shared by multiple processes.
228
// <li> A memory table is a table held in memory.
229
// It is a process specific table, thus not sharable.
230
// The <linkto class=Table>Table::copy</linkto> function can be used
231
// to turn a memory table into a plain table.
232
// <li> A reference table is a table referencing a plain or memory table.
233
// It is the result of a selection or sort on another table.
234
// A reference table references the data in the other table, thus
235
// changing data in a reference table means that the data in the
236
// original table are changed.
237
// The <linkto class=Table>Table::deepCopy</linkto> function can be
238
// used to turn a reference table into a plain table.
239
// <li> <A HREF="#Tables:concatenation">a concatenated table</A>
240
// is a union of tables (of any form) with the same description.
241
// They are concatenated in a virtual way, thus no copy is made.
242
// </ul>
243
// Concurrent access from different processes to the same plain table is
244
// fully supported by means of a <A HREF="#Tables:LockSync">
245
// locking/synchronization</A> mechanism. Concurrent access over NFS is also
246
// supported.
247
// <p>
248
// A (somewhat primitive) mechanism is available to do a
249
// <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents
250
// of a key.
251
252
// <ANCHOR NAME="Tables:open">
253
// <h3>Opening an Existing Table</h3></ANCHOR>
254
//
255
// To open an existing table you just create a
256
// <linkto class="Table:description">Table</linkto> object giving
257
// the name of the table, like:
258
//
259
// <srcblock>
260
// Table readonly_table ("tableName");
261
// // or
262
// Table read_and_write_table ("tableName", Table::Update);
263
// </srcblock>
264
//
265
// The constructor option determines whether the table will be opened as
266
// readonly or as read/write. A readonly table file must be opened
267
// as readonly, otherwise an exception is thrown. The functions
268
// <linkto class="Table">Table::isWritable(...)</linkto>
269
// can be used to determine if a table is writable.
270
//
271
// When the table is opened, the data managers are reinstantiated
272
// according to their definition at table creation.
273
// <p>
274
// <ANCHOR NAME="Tables:openTable">
275
// The static function <src>TableUtil::openTable</src> can be used to open a table,
276
// in particular a subtable, in a simple way by means of the :: notation like
277
// <src>maintable::subtable</src>. The :: notation is much better than specifying
278
// an explicit path (such as <src>maintable/subtable</src>, because it also works
279
// fine if the main table is a reference table (e.g. the result of a selection).
280
281
// <ANCHOR NAME="Tables:read">
282
// <h3>Reading from a Table</h3></ANCHOR>
283
//
284
// You can read data from a table column with the "get" functions
285
// in the classes
286
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>
287
// and
288
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>.
289
// For scalars of a standard data type (i.e. Bool, uChar, Int, Short,
290
// uShort, uInt, float, double, Complex, DComplex and String) you could
291
// instead use
292
// <linkto class="TableColumn">TableColumn::getScalar(...)</linkto> or
293
// <linkto class="TableColumn">TableColumn::asXXX(...)</linkto>.
294
// These functions offer an extra: they do automatic data type promotion;
295
// so that you can, for example, get a double value from a float column.
296
//
297
// These "get" functions are used in the same way as the simple "put"
298
// functions described in the previous section.
299
// <p>
300
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>
301
// can be constructed for a non-writable column. However, an exception
302
// is thrown if the put function is used for it.
303
// The same is true for
304
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> and
305
// <linkto class="TableColumn:description">TableColumn</linkto>.
306
// <p>
307
// A typical program could look like:
308
// <srcblock>
309
// #include <casacore/tables/Tables/Table.h>
310
// #include <casacore/tables/Tables/ScalarColumn.h>
311
// #include <casacore/tables/Tables/ArrayColumn.h>
312
// #include <casacore/casa/Arrays/Vector.h>
313
// #include <casacore/casa/Arrays/Slicer.h>
314
// #include <casacore/casa/Arrays/ArrayMath.h>
315
// #include <iostream>
316
//
317
// main()
318
// {
319
// // Open the table (readonly).
320
// Table tab ("some.name");
321
//
322
// // Construct the various column objects.
323
// // Their data type has to match the data type in the table description.
324
// ScalarColumn<Int> acCol (tab, "ac");
325
// ArrayColumn<Float> arr2Col (tab, "arr2");
326
//
327
// // Loop through all rows in the table.
328
// uInt nrrow = tab.nrow();
329
// for (uInt i=0; i<nrow; i++) {
330
// // Read the row for both columns.
331
// cout << "Column ac in row i = " << acCol(i) << endl;
332
// Array<Float> array = arr2Col.get (i);
333
// }
334
//
335
// // Show the entire column ac,
336
// // and show the 10th element of arr2 in each row..
337
// cout << ac.getColumn();
338
// cout << arr2.getColumn (Slicer(Slice(10)));
339
// }
340
// </srcblock>
341
342
// <ANCHOR NAME="Tables:creation">
343
// <h3>Creating a Table</h3></ANCHOR>
344
//
345
// The creation of a table is a multi-step process:
346
// <ol>
347
// <li>
348
// Create a <A HREF="#Tables:Table Description">table description</A>.
349
// <li>
350
// Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto>
351
// object with the name of the new table.
352
// <li>
353
// Create the necessary <A HREF="#Tables:Data Managers">data managers</A>.
354
// <li>
355
// Bind each column to the appropriate data manager.
356
// The system will bind unbound columns to data managers which
357
// are created internally using the default data manager name
358
// defined in the column description.
359
// <li>
360
// Define the shape of direct columns (if that was not already done in the
361
// column description).
362
// <li>
363
// Create the <linkto class="Table:description">Table</linkto>
364
// object from the SetupNewTable object. Here, a final check is performed
365
// and the necessary files are created.
366
// </ol>
367
// The recipe above is meant for the creation a plain table, but the
368
// creation of a memory table is exactly the same. The only difference
369
// is that in call to construct the Table object the Table::Memory
370
// type has to be given. Note that in the SetupNewTable object the columns
371
// can be bound to any data manager. <src>MemoryTable</src> will rebind
372
// stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto>
373
// storage manager, but virtual columns bindings are not changed.
374
//
375
// The following example shows how you can create a table. An example
376
// specifically illustrating the creation of the
377
// <A HREF="#Tables:Table Description">table description</A> is given
378
// in that section. Other sections discuss the access to the table.
379
//
380
// <srcblock>
381
// #include <casacore/tables/Tables/TableDesc.h>
382
// #include <casacore/tables/Tables/SetupNewTab.h>
383
// #include <casacore/tables/Tables/Table.h>
384
// #include <casacore/tables/Tables/ScaColDesc.h>
385
// #include <casacore/tables/Tables/ScaRecordColDesc.h>
386
// #include <casacore/tables/Tables/ArrColDesc.h>
387
// #include <casacore/tables/Tables/StandardStMan.h>
388
// #include <casacore/tables/Tables/IncrementalStMan.h>
389
//
390
// main()
391
// {
392
// // Step1 -- Build the table description.
393
// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
394
// td.comment() = "A test of class SetupNewTable";
395
// td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab"));
396
// td.addColumn (ScalarColumnDesc<Int> ("ac"));
397
// td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad"));
398
// td.addColumn (ScalarColumnDesc<Float> ("ae"));
399
// td.addColumn (ScalarRecordColumnDesc ("arec"));
400
// td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct));
401
// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
402
// td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct));
403
//
404
// // Step 2 -- Setup a new table from the description.
405
// SetupNewTable newtab("newtab.data", td, Table::New);
406
//
407
// // Step 3 -- Create storage managers for it.
408
// StandardStMan stmanStand_1;
409
// StandardStMan stmanStand_2;
410
// IncrementalStMan stmanIncr;
411
//
412
// // Step 4 -- First, bind all columns to the first storage
413
// // manager. Then, bind a few columns to another storage manager
414
// // (which will overwrite the previous bindings).
415
// newtab.bindAll (stmanStand_1);
416
// newtab.bindColumn ("ab", stmanStand_2);
417
// newtab.bindColumn ("ae", stmanIncr);
418
// newtab.bindColumn ("arr3", stmanIncr);
419
//
420
// // Step 5 -- Define the shape of the direct columns.
421
// // (this could have been done in the column description).
422
// newtab.setShapeColumn( "arr1", IPosition(3,2,3,4));
423
// newtab.setShapeColumn( "arr3", IPosition(3,3,4,5));
424
//
425
// // Step 6 -- Finally, create the table consisting of 10 rows.
426
// Table tab(newtab, 10);
427
//
428
// // Now we can fill the table, which is shown in a next section.
429
// // The Table destructor will flush the table to the files.
430
// }
431
// </srcblock>
432
// To create a table in memory, only step 6 has to be modified slightly to:
433
// <srcblock>
434
// Table tab(newtab, Table::Memory, 10);
435
// </srcblock>
436
//
437
// Note that the function <src>TableUtil::createTable</src> can be used to create a table
438
// in a simpler way. It can also be used to create a subtable using the :: notation
439
// similar to the <A HREF="#Tables:openTable"><src>Tableutil::openTable</src></A>
440
// function described above.
441
442
// <ANCHOR NAME="Tables:write">
443
// <h3>Writing into a Table</h3></ANCHOR>
444
//
445
// Once a table has been created or has been opened for read/write,
446
// you want to write data into it. Before doing that you may have
447
// to add one or more rows to the table.
448
// <note role=tip> If a table was created with a given number of rows, you
449
// do not need to add rows; you may not even be able to do so.
450
// </note>
451
//
452
// When adding new rows to the table, either via the
453
// <linkto class="Table">Table(...) constructor</linkto>
454
// or via the
455
// <linkto class="Table">Table::addRow(...)</linkto>
456
// function, you can choose to have those rows initialized with the
457
// default values given in the description.
458
//
459
// To actually write the data into the table you need the classes
460
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> and
461
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>.
462
// For each column you can construct one or
463
// more of these objects. Their put(...) functions
464
// let you write a value at a time or the entire column in one go.
465
// For arrays you can "put" subsections of the arrays.
466
//
467
// As an alternative for scalars of a standard data type (i.e. Bool,
468
// uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex
469
// and String) you could use the functions
470
// <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>.
471
// These functions offer an extra: automatic data type promotion; so that
472
// you can, for example, put a float value in a double column.
473
//
474
// A typical program could look like:
475
// <srcblock>
476
// #include <casacore/tables/Tables/TableDesc.h>
477
// #include <casacore/tables/Tables/SetupNewTab.h>
478
// #include <casacore/tables/Tables/Table.h>
479
// #include <casacore/tables/Tables/ScaColDesc.h>
480
// #include <casacore/tables/Tables/ArrColDesc.h>
481
// #include <casacore/tables/Tables/ScalarColumn.h>
482
// #include <casacore/tables/Tables/ArrayColumn.h>
483
// #include <casacore/casa/Arrays/Vector.h>
484
// #include <casacore/casa/Arrays/Slicer.h>
485
// #include <casacore/casa/Arrays/ArrayMath.h>
486
// #include <iostream>
487
//
488
// main()
489
// {
490
// // First build the table description.
491
// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
492
// td.comment() = "A test of class SetupNewTable";
493
// td.addColumn (ScalarColumnDesc<Int> ("ac"));
494
// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
495
//
496
// // Setup a new table from the description,
497
// // and create the (still empty) table.
498
// // Note that since we do not explicitly bind columns to
499
// // data managers, all columns will be bound to the default
500
// // standard storage manager StandardStMan.
501
// SetupNewTable newtab("newtab.data", td, Table::New);
502
// Table tab(newtab);
503
//
504
// // Construct the various column objects.
505
// // Their data type has to match the data type in the description.
506
// ScalarColumn<Int> ac (tab, "ac");
507
// ArrayColumn<Float> arr2 (tab, "arr2");
508
// Vector<Float> vec2(100);
509
//
510
// // Write the data into the columns.
511
// // In each cell arr2 will be a vector of length 100.
512
// // Since its shape is not set explicitly, it is done implicitly.
513
// for (uInt i=0; i<10; i++) {
514
// tab.addRow(); // First add a row.
515
// ac.put (i, i+10); // value is i+10 in row i
516
// indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119
517
// arr2.put (i, vec2);
518
// }
519
//
520
// // Finally, show the entire column ac,
521
// // and show the 10th element of arr2.
522
// cout << ac.getColumn();
523
// cout << arr2.getColumn (Slicer(Slice(10)));
524
//
525
// // The Table destructor writes the table.
526
// }
527
// </srcblock>
528
//
529
// In this example we added rows in the for loop, but we could also have
530
// created 10 rows straightaway by constructing the Table object as:
531
// <srcblock>
532
// Table tab(newtab, 10);
533
// </srcblock>
534
// in which case we would not include
535
// <srcblock>
536
// tab.addRow()
537
// </srcblock>
538
//
539
// The classes
540
// <linkto class="TableColumn:description">TableColumn</linkto>,
541
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>, and
542
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>
543
// contain several functions to put values into a single cell or into the
544
// whole column. This may look confusing, but is actually quite simple.
545
// The functions can be divided in two groups:
546
// <ol>
547
// <li>
548
// Put the given value into the column cell(s).
549
// <ul>
550
// <li>
551
// The simplest put functions,
552
// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and
553
// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
554
// put a value into the given column cell. For convenience, there is an
555
// <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto>
556
// to put only a part of the array.
557
// <li>
558
// <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and
559
// <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto>
560
// fill an entire column by putting the given value into all the cells
561
// of the column.
562
// <li>
563
// The simplest putColumn functions,
564
// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and
565
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>,
566
// put an array of values into the column. There is a special
567
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
568
// version which puts only a part of the arrays.
569
// </ul>
570
//
571
// <li>
572
// Copy values from another column to this column.<BR>
573
// These functions have the advantage that the
574
// data type of the input and/or output column can be unknown.
575
// The generic TableColumn objects can be used for this purpose.
576
// The put(Column) function checks the data types and, if possible,
577
// converts them. If the conversion is not possible, it throws an
578
// exception.
579
// <ul>
580
// <li>
581
// The put functions copy the value in a cell of the input column
582
// to a cell in the output column. The row numbers of the cells
583
// in the columns can be different.
584
// <li>
585
// The putColumn functions copy the entire contents of the input column
586
// to the output column. The lengths of the columns must be equal.
587
// </ul>
588
// Each class has its own set of these functions.
589
// <ul>
590
// <li>
591
// <linkto class="TableColumn">TableColumn::put(...)</linkto> and
592
// <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and
593
// are the most generic. They can be
594
// used if the data types of both input and output column are unknown.
595
// Note that these functions are virtual.
596
// <li>
597
// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>,
598
// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
599
// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and
600
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
601
// are less generic and therefore potentially more efficient.
602
// The most efficient variants are the ones taking a
603
// Scalar/ArrayColumn<T>, because they require no data type
604
// conversion.
605
// </ul>
606
// </ol>
607
608
// <ANCHOR NAME="Tables:row-access">
609
// <h3>Accessing rows in a Table</h3></ANCHOR>
610
//
611
// Apart from accessing a table column-wise as described in the
612
// previous two sections, it is also possible to access a table row-wise.
613
// The <linkto class=TableRow>TableRow</linkto> class makes it possible
614
// to access multiple fields in a table row as a whole. Note that like the
615
// XXColumn classes described above, there is also an ROTableRow class
616
// for access to readonly tables.
617
// <p>
618
// On construction of a TableRow object it has to be specified which
619
// fields (i.e. columns) are part of the row. For these fields a
620
// fixed structured <linkto class=TableRecord>TableRecord</linkto>
621
// object is constructed as part of the TableRow object. The TableRow::get
622
// function will fill this record with the table data for the given row.
623
// The user has access to the record and can use
624
// <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for
625
// speedier access to the record.
626
// <p>
627
// The class could be used as shown in the following example.
628
// <srcblock>
629
// // Open the table as readonly and define a row object to contain
630
// // the given columns.
631
// // Note that the function stringToVector is a very convenient
632
// // way to construct a Vector<String>.
633
// // Show the description of the fields in the row.
634
// Table table("Some.table");
635
// ROTableRow row (table, stringToVector("col1,col2,col3"));
636
// cout << row.record().description();
637
// // Since the structure of the record is known, the RecordFieldPtr
638
// // objects could be used to allow for easy and fast access to
639
// // the record which is refilled for each get.
640
// RORecordFieldPtr<String> col1(row.record(), "col1");
641
// RORecordFieldPtr<Double> col2(row.record(), "col2");
642
// RORecordFieldPtr<Array<Int> > col3(row.record(), "col3");
643
// for (uInt i=0; i<table.nrow(); i++) {
644
// row.get (i);
645
// someString = *col1;
646
// somedouble = *col2;
647
// someArrayInt = *col3;
648
// }
649
// </srcblock>
650
// The description of TableRow contains some more extensive examples.
651
652
// <ANCHOR NAME="Tables:select and sort">
653
// <h3>Table Selection and Sorting</h3></ANCHOR>
654
//
655
// The result of a select and sort of a table is another table,
656
// which references the original table. This means that an update
657
// of a sorted or selected table results in the update of the original
658
// table. The result is, however, a table in itself, so all table
659
// functions (including select and sort) can be used with it.
660
// Note that a true copy of such a reference table can be made with
661
// the <linkto class=Table>Table::deepCopy</linkto> function.
662
// <p>
663
// Rows or columns can be selected from a table. Columns can be selected
664
// by the
665
// <linkto class="Table">Table::project(...)</linkto>
666
// function, while rows can be selected by the various
667
// <linkto class="Table">Table operator()</linkto> functions.
668
// Usually a row is selected by giving a select expression with
669
// <linkto class="TableExprNode:description">TableExprNode</linkto>
670
// objects. These objects represent the various nodes
671
// in an expression, e.g. a constant, a column, or a subexpression.
672
// The Table function
673
// <linkto class="Table">Table::col(...)</linkto>
674
// creates a TableExprNode object for a column. The function
675
// <linkto class="Table">Table::key(...)</linkto>
676
// does the same for a keyword by reading
677
// the keyword value and storing it as a constant in an expression node.
678
// All column nodes in an expression must belong to the same table,
679
// otherwise an exception is thrown.
680
// In the following example we select all rows with RA>10:
681
// <srcblock>
682
// #include <casacore/tables/Tables/ExprNode.h>
683
// Table table ("Table.name");
684
// Table result = table (table.col("RA") > 10);
685
// </srcblock>
686
// while in the next one we select rows with RA and DEC in the given
687
// intervals:
688
// <srcblock>
689
// Table result = table (table.col("RA") > 10
690
// && table.col("RA") < 14
691
// && table.col("DEC") >= -10
692
// && table.col("DEC") <= 10);
693
// </srcblock>
694
// The following operators can be used to form arbitrarily
695
// complex expressions:
696
// <ul>
697
// <li> Relational operators ==, !=, >, >=, < and <=.
698
// <li> Logical operators &&, || and !.
699
// <li> Arithmetic operators +, -, *, /, %, and unary + and -.
700
// <li> Bit operators ^, &, |, and unary ~.
701
// <li> Operator() to take a subsection of an array.
702
// </ul>
703
// Many functions (like sin, max, conj) can be used in an expression.
704
// Class <linkto class=TableExprNode>TableExprNode</linkto> shows
705
// the available functions.
706
// E.g.
707
// <srcblock>
708
// Table result = table (sin (table.col("RA")) > 0.5);
709
// </srcblock>
710
// Function <src>in</src> can be used to select from a set of values.
711
// A value set can be constructed using class
712
// <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>.
713
// <srcblock>
714
// TableExprNodeSet set;
715
// set.add (TableExprNodeSetElem ("abc"));
716
// set.add (TableExprNodeSetElem ("defg"));
717
// set.add (TableExprNodeSetElem ("h"));
718
// Table result = table (table.col("NAME).in (set));
719
// </srcblock>
720
// select rows with a NAME equal to <src>abc</src>,
721
// <src>defg</src>, or <src>h</src>.
722
//
723
// <p>
724
// You can sort a table on one or more columns containing scalars.
725
// In this example we simply sort on column RA (default is ascending):
726
// <srcblock>
727
// Table table ("Table.name");
728
// Table result = table.sort ("RA");
729
// </srcblock>
730
// Multiple
731
// <linkto class="Table">Table::sort(...)</linkto>
732
// functions exist which allow for more flexible control over the sort order.
733
// In the next example we sort first on RA in descending order
734
// and then on DEC in ascending order:
735
// <srcblock>
736
// Table table ("Table.name");
737
// Block<String> sortKeys(2);
738
// Block<int> sortOrders(2);
739
// sortKeys(0) = "RA";
740
// sortOrders(0) = Sort::Descending;
741
// sortKeys(1) = "DEC";
742
// sortOrders(1) = Sort::Ascending;
743
// Table result = table.sort (sortKeys, sortOrders);
744
// </srcblock>
745
//
746
// Tables stemming from the same root, can be combined in several
747
// ways with the help of the various logical
748
// <linkto class="Table">Table operators</linkto> (operator|, etc.).
749
750
// <h4>Table Query Language</h4>
751
// The selection and sorting mechanism described above can only be used
752
// in a hard-coded way in a C++ program.
753
// There is, however, another way. Strings containing selection and
754
// sorting commands can be used.
755
// The syntax of these commands is based on SQL and is described in the
756
// <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199.
757
// The language supports UDFs (User Defined Functions) in dynamically
758
// loadable libraries as explained in the note.
759
// <br>A TaQL command can be executed with the static function
760
// <src>tableCommand</src> defined in class
761
// <linkto class=TableParse>TableParse</linkto>.
762
763
// <ANCHOR NAME="Tables:concatenation">
764
// <h3>Table Concatenation</h3></ANCHOR>
765
// Tables with identical descriptions can be concatenated in a virtual way
766
// using the Table concatenation constructor. Such a Table object behaves
767
// as any other Table object, thus any operation can be performed on it.
768
// An identical description means that the number of columns, the column names,
769
// and their data types of the columns must be the same. The columns do not
770
// need to be ordered in the same way nor to be stored in the same way.
771
// <br>Note that if tables have different column names, it is possible
772
// to form a projection (as described in the previous section) first
773
// to make them appear identical.
774
//
775
// Sometimes a MeasurementSet is partitioned, for instance in chunks of
776
// one hour. All those chunks can be virtually concatenated this way.
777
// Note that all tables in the concatenation will be opened, thus one might
778
// run out of file descriptors if there are many chunks.
779
//
780
// Similar to reference tables, it is possible to make a concatenated Table
781
// persistent by using the <src>rename</src> function. It will not copy the
782
// data; only the names of the tables used are written.
783
//
784
// The keywords of a concatenated table are taken from the first table.
785
// It is possible to change or add keywords, but that is not persistent,
786
// not even if the concatenated table is made persistent.
787
// <br>The keywords holding subtables can be handled in a special way.
788
// Normally the subtables of the concatenation are the subtables of the first
789
// table are used, but is it possible to concatenate subtables as well by
790
// giving their names in the constructor.
791
// In this way the, say, SYSCAL subtable of a MeasurementSet can be
792
// concatenated as well.
793
// <srcblock>
794
// // Create virtual concatenation of ms0 and ms1.
795
// Block<String> names(2);
796
// names[0] = "ms0";
797
// names[1] = "ms1";
798
// // Also concatenate their SYSCAL subtables.
799
// Block<String> subNames(1, "SYSCAL");
800
// Table concTab (names, subNames);
801
// </srcblock>
802
803
// <ANCHOR NAME="Tables:iterate">
804
// <h3>Table Iterators</h3></ANCHOR>
805
//
806
// You can iterate through a table in an arbitrary order by getting
807
// a subset of the table consisting of the rows in which the iteration
808
// columns have the same value.
809
// An iterator object is created by constructing a
810
// <linkto class="TableIterator:description">TableIterator</linkto>
811
// object with the appropriate column names.
812
//
813
// In the next example we define an iteration on the columns Time and
814
// Baseline. Each iteration step returns a table subset in which Time and
815
// Baseline have the same value.
816
//
817
// <srcblock>
818
// // Iterate over Time and Baseline (by default in ascending order).
819
// // Time is the main iteration order, thus the first column specified.
820
// Table t;
821
// Table tab ("UV_Table.data");
822
// Block<String> iv0(2);
823
// iv0[0] = "Time";
824
// iv0[1] = "Baseline";
825
// //
826
// // Create the iterator. This will prepare the first subtable.
827
// TableIterator iter(tab, iv0);
828
// Int nr = 0;
829
// while (!iter.pastEnd()) {
830
// // Get the first subtable.
831
// // This will contain rows with equal Time and Baseline.
832
// t = iter.table();
833
// cout << t.nrow() << " ";
834
// nr++;
835
// // Prepare the next subtable with the next Time,Baseline value.
836
// iter.next();
837
// }
838
// cout << endl << nr << " iteration steps" << endl;
839
// </srcblock>
840
//
841
// You can define more than one iterator on the same table; they operate
842
// independently.
843
//
844
// Note that the result of each iteration step is a table in itself which
845
// references the original table, just as in the case of a sort or select.
846
// This means that the resulting table can be used again in a sort, select,
847
// iteration, etc..
848
849
// <ANCHOR NAME="Tables:vectors">
850
// <h3>Table Vectors</h3></ANCHOR>
851
//
852
// A table vector makes it possible to treat a column in a table
853
// as a vector. Almost all operators and functions defined for normal
854
// vectors, are also defined for table vectors. So it is, for instance,
855
// possible to add a constant to a table vector. This has the effect
856
// that the underlying column gets changed.
857
//
858
// You can use the templated class
859
// <linkto class="TableVector:description">TableVector</linkto>
860
// to make a scalar column appear as a (table) vector.
861
// Columns containing arrays or tables are not supported.
862
// The data type of the TableVector object must match the
863
// data type of the column.
864
// A table vector can also hold a normal vector so that (temporary)
865
// results of table vector operations can be handled.
866
//
867
// In the following example we double the data in column COL1 and
868
// store the result in a temporary table vector.
869
// <srcblock>
870
// // Create a table vector for column COL1.
871
// // Note that if the table is readonly, putting data in the table vector
872
// // results in an exception.
873
// Table tab ("Table.data");
874
// TableVector<Int> tabvec(tab, "COL1");
875
// // Multiply it by a constant. Result is kept in a Vector in memory.
876
// TableVector<Int> temp = 2 * tabvec;
877
// </srcblock>
878
//
879
// In the next example we double the data in COL1 and put the result back
880
// in the column.
881
// <srcblock>
882
// // Create a table vector for column COL1.
883
// // It has to be a TableVector to be able to change the column.
884
// Table tab ("Table.data", Table::Update);
885
// TableVector<Int> tabvec(tab, "COL1");
886
// // Multiply it by a constant.
887
// tabvec *= 2;
888
// </srcblock>
889
890
// <ANCHOR NAME="Tables:keywords">
891
// <h3>Table Keywords</h3></ANCHOR>
892
//
893
// Any number of keyword/value pairs may be attached to the table as a whole,
894
// or to any individual column. They may be freely added, retrieved,
895
// re-assigned, or deleted. They are, in essence, a self-resizing list of
896
// values (any of the primitive types) indexed by Strings (the keyword).
897
//
898
// A table keyword/value pair might be
899
// <srcblock>
900
// Observer = Grote Reber
901
// Date = 10 october 1942
902
// </srcblock>
903
// Column keyword/value pairs might be
904
// <srcblock>
905
// Units = mJy
906
// Reference Pixel = 320
907
// </srcblock>
908
// The class
909
// <linkto class="TableRecord:description">TableRecord</linkto>
910
// represents the keywords in a table.
911
// It is (indirectly) derived from the standard record classes in the class
912
// <linkto class="Record:description">Record</linkto>
913
914
// <ANCHOR NAME="Tables:Table Description">
915
// <h3>Table Description</h3></ANCHOR>
916
//
917
// A table contains a description of itself, which defines the layout of the
918
// columns and the keyword sets for the table and for the individual columns.
919
// It may also define initial keyword sets and default values for the columns.
920
// Such a default value is automatically stored in a cell in the table column,
921
// whenever a row is added to the table.
922
//
923
// The creation of the table descriptor is the first step in the creation of
924
// a new table. The description is part of the table itself, but may also
925
// exist in a separate file. This is useful if you need to create a number
926
// of tables with the same structure; in other circumstances it probably
927
// should be avoided.
928
//
929
// The public classes to set up a table description are:
930
// <ul>
931
// <li> <linkto class="TableDesc:description">TableDesc</linkto>
932
// -- holds the table description.
933
// <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto>
934
// -- holds a generic column description.
935
// <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc<T>
936
// </linkto>
937
// -- defines a column containing a scalar value.
938
// <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc;
939
// </linkto>
940
// -- defines a column containing a scalar record value.
941
// <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc<T>
942
// </linkto>
943
// -- defines a column containing an (in)direct array.
944
// </ul>
945
//
946
// Here follows a typical example of the construction of a table
947
// description. For more specialized things -- like the definition of a
948
// default data manager -- we refer to the descriptions of the above
949
// mentioned classes.
950
//
951
// <srcblock>
952
// #include <casacore/tables/Tables/TableDesc.h>
953
// #include <casacore/tables/Tables/ScaColDesc.h>
954
// #include <casacore/tables/Tables/ArrColDesc.h>
955
// #include <casacore/tables/Tables/ScaRecordTabDesc.h>
956
// #include <casacore/tables/Tables/TableRecord.h>
957
// #include <casacore/casa/Arrays/IPosition.h>
958
// #include <casacore/casa/Arrays/Vector.h>
959
//
960
// main()
961
// {
962
// // Create a new table description
963
// // Define a comment for the table description.
964
// // Define some keywords.
965
// ColumnDesc colDesc1, colDesc2;
966
// TableDesc td("tTableDesc", "1", TableDesc::New);
967
// td.comment() = "A test of class TableDesc";
968
// td.rwKeywordSet().define ("ra" float(3.14));
969
// td.rwKeywordSet().define ("equinox", double(1950));
970
// td.rwKeywordSet().define ("aa", Int(1));
971
//
972
// // Define an integer column ab.
973
// td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab"));
974
//
975
// // Add a scalar integer column ac, define keywords for it
976
// // and define a default value 0.
977
// // Overwrite the value of keyword unit.
978
// ScalarColumnDesc<Int> acColumn("ac");
979
// acColumn.rwKeywordSet().define ("scale" Complex(0,0));
980
// acColumn.rwKeywordSet().define ("unit", "");
981
// acColumn.setDefault (0);
982
// td.addColumn (acColumn);
983
// td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG");
984
//
985
// // Add a scalar string column ad and define its comment string.
986
// td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad"));
987
//
988
// // Now define array columns.
989
// // This one is indirect and has no dimensionality mentioned yet.
990
// td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1"));
991
// // This one is indirect and has 3-dim arrays.
992
// td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3));
993
// // This one is direct and has 2-dim arrays with axes length 4 and 7.
994
// td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1",
995
// IPosition(2,4,7),
996
// ColumnDesc::Direct));
997
//
998
// // Add columns containing records.
999
// td.addColumn (ScalarRecordColumnDesc ("Rec1"));
1000
// }
1001
// </srcblock>
1002
1003
// <ANCHOR NAME="Tables:Data Managers">
1004
// <h3>Data Managers</h3></ANCHOR>
1005
//
1006
// Data managers take care of the actual access to the data in a column.
1007
// There are two kinds of data managers:
1008
// <ol>
1009
// <li> <A HREF="#Tables:storage managers">Storage managers</A> --
1010
// which store the data as such. They can only handle the standard
1011
// data types (Bool,...,String) as discussed in the section about the
1012
// <A HREF="#Tables:properties">table properties</A>).
1013
// <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A>
1014
// -- which manipulate the data.
1015
// An engine could be a simple thing like scaling the data (as done
1016
// in classic AIPS to reduce data storage), but it could also be an
1017
// elaborate thing like applying corrections on-the-fly.
1018
// <br>A special engine is VirtualTaQLColumn which can be used to define
1019
// the contents of a column by means of a TaQL expression. In particular,
1020
// it can be used to define a constant value for the entire column.
1021
// But it can also be used to calculate the UVW-coordinates on-the-fly.
1022
// <br>An engine must be used when storing data objects with a non-standard type.
1023
// It has to break down the object into items with standard data types
1024
// which can be stored with a storage manager.
1025
// </ol>
1026
// In general the user of a table does not need to be aware which
1027
// data managers are being used underneath. Only when the table is created
1028
// data managers have to be bound to the columns. Thereafter it is
1029
// completely transparent.
1030
//
1031
// Data managers needs to be registered, so they can be found when a table is
1032
// opened. All data managers mentioned below are part of the system and
1033
// pre-registered.
1034
// It is, however, also possible to load data managers on demand. If a data
1035
// manager is not registered it is tried to load a shared library with the
1036
// part of the data manager name (in lowercase) before a dot or left arrow.
1037
// The dot makes it possible to have multiple data managers in a shared library,
1038
// while the left arrow is meant for templated data manager classes.
1039
// <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared
1040
// library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If
1041
// successful, its function <src>register_bitflagsengine()</src> will be
1042
// executed which should register the data manager(s). Thereafter it is known
1043
// and will be used. For example in a file Register.h and Register.cc:
1044
// <srcblock>
1045
// // Declare in .h file as C function, so no name mangling is done.
1046
// extern "C" {
1047
// void register_bitflagsengine();
1048
// }
1049
// // Implement in .cc file.
1050
// void register_bitflagsengine()
1051
// {
1052
// BitFlagsEngine<uChar>::registerClass();
1053
// BitFlagsEngine<Short>::registerClass();
1054
// BitFlagsEngine<Int>::registerClass();
1055
// }
1056
// </srcblock>
1057
// There are several functions that can give information which data managers
1058
// are used for which columns and to obtain the characteristics and properties
1059
// of them. Class RODataManAccessor and derived classes can be used for it
1060
// as well as the functions <src>dataManagerInfo</src> and
1061
// <src>showStructure</src> in class Table.
1062
1063
// <ANCHOR NAME="Tables:storage managers">
1064
// <h3>Storage Managers</h3></ANCHOR>
1065
//
1066
// Storage managers are used to store the data contained in the column cells.
1067
// At table construction time the binding of columns to storage managers is done.
1068
// <br>Each storage manager uses one or more files (usually called table.fi_xxx
1069
// where i is a sequence number and _xxx is some kind of extension).
1070
// Typically several file are used to store the data of the columns of a table.
1071
// <br>In order to reduce the number of files (and to support large block sizes),
1072
// it is possible to have a single container file (a MultiFile) containing all
1073
// data files used by the storage managers. Such a file is called table.mf.
1074
// Note that the program <em>lsmf</em> can be used to see which
1075
// files are contained in a MultiFile. The program <em>tomf</em> can
1076
// convert the files in a MultiFile to regular files.
1077
// <br>At table creation time it is decided if a MultiFile will be used. It
1078
// can be done by means of the StorageOption object given to the SetupNewTable
1079
// constructor and/or by the aipsrc variables:
1080
// <ul>
1081
// <li> <src>table.storage.option</src> which can have the value
1082
// 'multifile', 'sepfile' (meaning separate files), or 'default'.
1083
// Currently the default is to use separate files.
1084
// <li> <src>table.storage.blocksize</src> defines the block size to be
1085
// used by a MultiFile. If 0 is given, the file system's block size
1086
// will be used.
1087
// </ul>
1088
// About all standard storage managers support the MultiFile.
1089
// The exception is StManAipsIO, because it is hardly ever used.
1090
//
1091
// Several storage managers exist, each with its own storage characteristics.
1092
// The default and preferred storage manager is <src>StandardStMan</src>.
1093
// Other storage managers should only be used if they pay off in
1094
// file space (like <src>IncrementalStMan</src> for slowly varying data)
1095
// or access speed (like the tiled storage managers for large data arrays).
1096
// <br>The storage managers store the data in a big or little endian
1097
// canonical format. The format can be specified when the table is created.
1098
// By default it uses the endian format as specified in the aipsrc variable
1099
// <code>table.endianformat</code> which can have the value local, big,
1100
// or little. The default is local.
1101
// <ol>
1102
// <li>
1103
// <linkto class="StandardStMan:description">StandardStMan</linkto>
1104
// stores all the values in so-called buckets (equally sized chunks
1105
// in the file). It requires little memory.
1106
// <br>It replaces the old <src>StManAipsIO</src>.
1107
//
1108
// <li>
1109
// <linkto class="IncrementalStMan:description">IncrementalStMan</linkto>
1110
// uses a storage mechanism resembling "incremental backups". A value
1111
// is only stored if it is different from the previous row. It is
1112
// very well suited for slowly varying data.
1113
// <br>The class <linkto class="ROIncrementalStManAccessor:description">
1114
// ROIncrementalStManAccessor</linkto> can be used to tune the
1115
// behaviour of the <src>IncrementalStMan</src>. It contains functions
1116
// to deal with the cache size and to show the behaviour of the cache.
1117
//
1118
// <li>
1119
// The <a href="#Tables:TiledStMan">Tiled Storage Managers</a>
1120
// store the data as a tiled hypercube allowing for more or less equally
1121
// efficient data access along all main axes. It can be used for
1122
// UV-data as well as for image data.
1123
//
1124
// <li>
1125
// <linkto class="StManAipsIO:description">StManAipsIO</linkto>
1126
// uses <src>AipsIO</src> to store the data in the columns.
1127
// It supports all table functionality, but its I/O is probably not
1128
// as efficient as other storage managers. It also requires that
1129
// a large part of the table fits in memory.
1130
// <br>It should not be used anymore, because it uses a lot of memory
1131
// for larger tables and because it is not very robust in case an
1132
// application or system crashes.
1133
//
1134
// <li>
1135
// <linkto class="MemoryStMan:description">MemoryStMan</linkto>
1136
// holds the data in memory. It means that data 'stored' with this
1137
// storage manager are NOT persistent.
1138
// <br>This storage manager is primarily meant for tables held in
1139
// memory, but it can also be useful for temporary columns in
1140
// normal tables. Note, however, that if a table is accessed
1141
// concurrently from multiple processes, MemoryStMan data cannot be
1142
// synchronized.
1143
//
1144
// <li>
1145
// @ref dyscostman.DyscoStMan is a class that stores data with lossy
1146
// compression. It combines non-linear least-squares quantization and
1147
// different kinds of normalizaton. With the typical factor of 4
1148
// compression, the loss in accuracy from lossy compression is
1149
// negligable. It should only be used for real (non-simulated) data
1150
// that is in a Measurement Set.
1151
// The method is described in this article:
1152
// https://arxiv.org/abs/1609.02019.
1153
//
1154
// <li>
1155
// <linkto class="Adios2StMan:description">Adios2StMan</linkto> uses the
1156
// <A HREF="https://github.com/ornladios/ADIOS2">ADIOS2 framework</A> to
1157
// store and load column data.
1158
// <br>ADIOS2 has several configurable storage backend itself, and this
1159
// flexibility is also available via Adios2StMan. This includes, among other
1160
// things, storing compressed data, or choosing a different on-disk formats.
1161
// <br>This storage manager is also special in that it provides parallel
1162
// writing capabilities for MPI processes, so that multiple processes can
1163
// write into different sections of the same column concurrently.
1164
// </ol>
1165
//
1166
// The storage manager framework makes it possible to support arbitrary files
1167
// as tables. This has been used in a case where a file is filled
1168
// by the data acquisition system of a telescope. The file is simultaneously
1169
// used as a table using a dedicated storage manager. The table
1170
// system and storage manager provide a sync function to synchronize
1171
// the processes, i.e. to make CTDS aware of changes
1172
// in the file size (thus in the table size) by the filling process.
1173
//
1174
// <note role=tip>
1175
// Not all data managers support all the table functionality. So, the choice
1176
// of a data manager can greatly influence the type of operations you can do
1177
// on the table as a whole.
1178
// For example, if a column uses the tiled storage manager,
1179
// it is not possible to delete rows from the table, because that storage
1180
// manager will not support deletion of rows.
1181
// However, it is always possible to delete all columns of a data
1182
// manager in one single call.
1183
// </note>
1184
1185
// <ANCHOR NAME="Tables:TiledStMan">
1186
// <h3>Tiled Storage Manager</h3></ANCHOR>
1187
// The Tiled Storage Managers allow one to store the data of
1188
// one or more columns in a tiled way. Tiling means
1189
// that the data are stored without a preferred order to make access
1190
// along the different main axes equally efficient. This is done by
1191
// storing the data in so-called tiles (i.e. equally shaped subsets of an
1192
// array) to increase data locality. The user can define the tile shape
1193
// to optimize for the most frequently used access.
1194
// <p>
1195
// The Tiled Storage Manager has the following properties:
1196
// <ul>
1197
// <li> There can be more than one Tiled Storage Manager in
1198
// a table; each with its own (unique) name.
1199
// <li> Each Tiled Storage Manager can store an
1200
// N-dimensional so-called hypercolumn.
1201
// Elaborate hypercolumns can be defined using
1202
// <linkto file="TableDesc.h#defineHypercolumn">
1203
// TableDesc::defineHypercolumn</linkto>).
1204
// <br>Note that defining a hypercolumn is only necessary if it
1205
// contains multiple columns or if the TiledDataStMan is used.
1206
// It means that in practice it is hardly ever needed to define a
1207
// hypercolumn.
1208
// <br>A hypercolumn consists of up to three types of columns:
1209
// <dl>
1210
// <dt> Data columns
1211
// <dd> contain the data to be stored in a tiled way. This will
1212
// be done in tiled hypercubes.
1213
// There must be at least one data column.
1214
// <br> For example: a table contains UV-data with
1215
// data columns "Visibility" and "Weight".
1216
// <dt> Coordinate columns
1217
// <dd> define the world coordinates of the pixels in the data columns.
1218
// Coordinate columns are optional, but if given there must
1219
// be N coordinate columns for an N-dimensional hypercolumn.
1220
// <br>
1221
// For example: the data in the example above is 4-dimensional
1222
// and has coordinate columns "Time", "Baseline", "Frequency",
1223
// and "Polarization".
1224
// <dt> Id columns
1225
// <dd> are needed if TiledDataStMan is used.
1226
// Different rows in the data columns can be stored in different
1227
// hypercubes. The values in the id column(s) uniquely identify
1228
// the hypercube a row is stored in.
1229
// <br>
1230
// For example: the line and continuum data in a MeasurementSet
1231
// table need to be stored in 2 different hypercubes (because
1232
// their shapes are different (see below)). A column containing
1233
// the type (line or continuum) has to be used as an id column.
1234
// </dl>
1235
// <li> If multiple data columns are used, the shape of their data
1236
// must be conforming in each individual row.
1237
// If data in different rows have different shapes, they must be
1238
// stored in different hypercubes, because a hypercube can only hold
1239
// data with conforming shapes.
1240
// <br>
1241
// Thus in the example above, rows with line data will have conforming
1242
// shapes and can be stored in one hypercube. The continuum data
1243
// will have another shape and can be stored in another hypercube.
1244
// <br>
1245
// The storage manager keeps track of the mapping of rows to/from
1246
// hypercubes.
1247
// <li> Each hypercube can be tiled in its own way. It is not required
1248
// that an integer number of tiles fits in the hypercube. The last
1249
// tiles will be padded as needed.
1250
// <li> The last axis of a hypercube can be extensible. This means that
1251
// the size of that axis does not need to be defined when the
1252
// hypercube is defined in the storage manager. Instead, the hypercube
1253
// can be extended when another chunk of data has to be stored.
1254
// This can be very useful in, for example, a (quasi-)realtime
1255
// environment where the size of the time axis is not known.
1256
// <li> If coordinate columns are defined, they describe the coordinates
1257
// of the axes of the hypercubes. Each hypercube has its own set of
1258
// coordinates.
1259
// <li> Data and id columns have to be stored with the Tiled
1260
// Storage Manager. However, coordinate columns do not need to be
1261
// stored with the Tiled Storage Manager.
1262
// Especially in the case where the coordinates for a hypercube axis
1263
// are varying (i.e. dependent on other axes), another storage manager
1264
// has to be used (because the Tiled Storage Manager can only
1265
// hold constant coordinates).
1266
// </ul>
1267
// <p>
1268
// The following Tiled Storage Managers are available:
1269
// <dl>
1270
// <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto>
1271
// <dd> can be seen as a specialization of <src>TiledDataStMan</src>
1272
// by using the array shape as the id value.
1273
// Similarly to <src>TiledDataStMan</src> it can maintain multiple
1274
// hypercubes and store multiple rows in a hypercube, but it is
1275
// easier to use, because the special <src>addHypercube</src> and
1276
// <src>extendHypercube</src> functions are not needed.
1277
// An hypercube is automatically added when a new array shape is
1278
// encountered.
1279
// <br>
1280
// This storage manager could be used for a table with a column
1281
// containing line and continuum data, which will result
1282
// in 2 hypercubes.
1283
// <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto>
1284
// <dd> creates (automatically) a new hypercube for each row.
1285
// Thus each row of the hypercolumn is stored in a separate hypercube.
1286
// Note that the row number serves as the id value. So an id column
1287
// is not needed, although there are multiple hypercubes.
1288
// <br>
1289
// This storage manager is meant for tables where the data arrays
1290
// in the different rows are not accessed together. One can think
1291
// of a column containing images. Each row contains an image and
1292
// only one image is shown at a time.
1293
// <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto>
1294
// <dd> creates one hypercube for the entire hypercolumn. Thus all cells
1295
// in the hypercube have to have the same shape and therefore this
1296
// storage manager is only possible if all columns in the hypercolumn
1297
// have the attribute FixedShape.
1298
// <br>
1299
// This storage manager could be used for a table with a column
1300
// containing images for the Stokes parameters I, Q, U, and V.
1301
// By storing them in one hypercube, it is possible to retrieve
1302
// the 4 Stokes values for a subset of the image or for an individual
1303
// pixel in a very efficient way.
1304
// <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto>
1305
// <dd> allows one to control the creation and extension of hypercubes.
1306
// This is done by means of the class
1307
// <linkto class=TiledDataStManAccessor:description>
1308
// TiledDataStManAccessor</linkto>.
1309
// It makes it possible to store, say, row 0-9 in hypercube A,
1310
// row 10-34 in hypercube B, row 35-54 in hypercube A again, etc..
1311
// <br>
1312
// The drawback of this storage manager is that its hypercubes are not
1313
// automatically extended when adding new rows. The special functions
1314
// <src>addHypercube</src> and <src>extendHypercube</src> have to be
1315
// used making it somewhat tedious to use.
1316
// Therefore this storage manager may become obsolete in the near future.
1317
// </dl>
1318
// The Tiled Storage Managers have 3 ways to access and cache the data.
1319
// Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an
1320
// access choice and use it in a Table constructor.
1321
// <ul>
1322
// <li> The old way (the only way until January 2010) uses a cache
1323
// of its own to keep tiles that might need to be reused. It will always
1324
// access entire tiles, even if only a small part is needed.
1325
// It is possible to define a maximum cache size. The description of class
1326
// <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
1327
// contains a discussion about the effect of defining a maximum cache
1328
// size.
1329
// <li> Memory-mapping the data files. In this way the operating system
1330
// takes care of the IO and caching. However, the limited address space
1331
// may preclude using it for large tables on 32-bit systems.
1332
// <li> Use buffered IO and let the kernel's file cache take care of caching.
1333
// It will access the data in chunks of the given buffer size, so the
1334
// entire tile does not need to be accessed if only a small part is
1335
// needed.
1336
// </ul>
1337
// Apart from reading, all access ways described above can also handle writing
1338
// and extending tables. They create fully equal files. Both little and big
1339
// endian data can be read or written.
1340
1341
// <ANCHOR NAME="Tables:virtual column engines">
1342
// <h3>Virtual Column Engines</h3></ANCHOR>
1343
//
1344
// Virtual column engines are used to implement the virtual (i.e.
1345
// calculated-on-the-fly) columns. CTDS provides
1346
// an abstract base class (or "interface class")
1347
// <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto>
1348
// that specifies the protocol for these engines.
1349
// The programmer must derive a concrete class to implement
1350
// the application-specific virtual column.
1351
// <p>
1352
// For example: the programmer
1353
// needs a column in a table which is the difference between two other
1354
// columns. (Perhaps these two other columns are updated periodically
1355
// during the execution of a program.) A good way to handle this would
1356
// be to have a virtual column in the table, and write a virtual column
1357
// engine which knows how to calculate the difference between corresponding
1358
// cells of the two other columns. So the result is that accessing a
1359
// particular cell of the virtual column invokes the virtual column engine,
1360
// which then gets the values from the other two columns, and returns their
1361
// difference. This particular example could be done using
1362
// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>.
1363
// <p>
1364
// Several virtual column engines exist:
1365
// <ol>
1366
// <li> The class
1367
// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>
1368
// makes it possible to define a column as an arbitrary expression of
1369
// other columns. It uses the <a href="../notes/199.html">TaQL</a>
1370
// CALC command. The virtual column can be a scalar or an array and
1371
// can have one of the standard data types supported by CTDS.
1372
// <li> The class
1373
// <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto>
1374
// maps an integer bit flags column to a Bool column. A read and write mask
1375
// can be defined telling which bits to take into account when mapping
1376
// to and from Bool (thus when reading or writing the Bool).
1377
// <li> The class
1378
// <linkto class="CompressFloat:description">CompressFloat</linkto>
1379
// compresses a single precision floating point array by scaling the
1380
// values to shorts (16-bit integer).
1381
// <li> The class
1382
// <linkto class="CompressComplex:description">CompressComplex</linkto>
1383
// compresses a single precision complex array by scaling the
1384
// values to shorts (16-bit integer). In fact, the 2 parts of the complex
1385
// number are combined to an 32-bit integer.
1386
// <li> The class
1387
// <linkto class="CompressComplexSD:description">CompressComplexSD</linkto>
1388
// does the same as CompressComplex, but optimizes for the case where the
1389
// imaginary part is zero (which is often the case for Single Dish data).
1390
// <li> The double templated class
1391
// <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto>
1392
// scales the data in an array from, for example,
1393
// float to short before putting it.
1394
// <li> The double templated class
1395
// <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto>
1396
// converts the data from one data type to another. Sometimes it might be
1397
// needed to store the residual data in an MS in double precision.
1398
// Because the imaging task can only handle single precision, this enigne
1399
// can be used to map the data from double to single precision.
1400
// <li> The double templated class
1401
// <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto>
1402
// converts the data from one data type to another with the possibility
1403
// to reduce the number of dimensions. For example, it can be used to
1404
// store an 2-d array of StokesVector objects as a 3-d array of floats
1405
// by treating the 4 data elements as an extra array axis. If the
1406
// StokesVector class is simple, it can be done very efficiently.
1407
// <li> The class
1408
// <linkto class="ForwardColumnEngine:description">
1409
// ForwardColumnEngine</linkto>
1410
// forwards the gets and puts on a row in a column to the same row
1411
// in a column with the same name in another table. This provides
1412
// a virtual copy of the referenced column.
1413
// <li> The class
1414
// <linkto class="ForwardColumnIndexedRowEngine:description">
1415
// ForwardColumnIndexedRowEngine</linkto>
1416
// is similar to <src>ForwardColumnEngine.</src>.
1417
// However, instead of forwarding it to the same row it uses a
1418
// a column to map its row number to a row number in the referenced
1419
// table. In this way multiple rows can share the same data.
1420
// This data manager only allows for get operations.
1421
// <li> The calibration module has implemented a virtual column engine
1422
// to do on-the-fly calibration in a transparent way.
1423
// </ol>
1424
// To handle arbitrary data types the templated abstract base class
1425
// <linkto class="VSCEngine:description">VSCEngine</linkto>
1426
// has been written. An example of how to use this class can be
1427
// found in the demo program <src>dVSCEngine.cc</src>.
1428
1429
// <ANCHOR NAME="Tables:LockSync">
1430
// <h3>Table locking and synchronization</h3></ANCHOR>
1431
//
1432
// Multiple concurrent readers and writers (also via NFS) of a
1433
// table are supported by means of a locking/synchronization mechanism.
1434
// This mechanism is not very sophisticated in the sense that it is
1435
// very coarsely grained. When locking, the entire table gets locked.
1436
// A special lock file is used to lock the table. This lock file also
1437
// contains some synchronization data.
1438
// <p>
1439
// Five ways of locking are supported (see class
1440
// <linkto class=TableLock>TableLock</linkto>):
1441
// <dl>
1442
// <dt> TableLock::PermanentLocking(Wait)
1443
// <dd> locks the table permanently (from open till close). This means
1444
// that one writer OR multiple readers are possible.
1445
// <dt> TableLock::AutoLocking
1446
// <dd> does the locking automatically. This is the default mode.
1447
// This mode makes it possible that a table is shared amongst
1448
// processes without the user needing to write any special code.
1449
// It also means that a lock is only released when needed.
1450
// <dt> TableLock::AutoNoReadLocking
1451
// <dd> is similar to AutoLocking. However, no lock is acquired when
1452
// reading the table making it possible to read the table while
1453
// another process holds a write-lock. It also means that for read
1454
// purposes no automatic synchronization is done when the table is
1455
// updated in another process.
1456
// Explicit synchronization can be done by means of the function
1457
// <src>Table::resync</src>.
1458
// <dt> TableLock::UserLocking
1459
// <dd> requires that the programmer explicitly acquires and releases
1460
// a lock on the table. This makes some kind of transaction
1461
// processing possible. E.g. set a write lock, add a row,
1462
// write all data into the row and release the lock.
1463
// The Table functions <src>lock</src> and <src>unlock</src>
1464
// have to be used to acquire and release a (read or write) lock.
1465
// <dt> TableLock::UserNoReadLocking
1466
// <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking
1467
// no lock is needed to read the table.
1468
// <dt> TableLock::NoLocking
1469
// <dd> does not use table locking. It is the responsibility of the
1470
// user to ensure that no concurrent access is done on the same
1471
// bucket or tile in a storage manager, otherwise a table might
1472
// get corrupted.
1473
// <br>This mode is always used if Casacore is built with
1474
// -DAIPS_TABLE_NOLOCKING.
1475
// </dl>
1476
// Synchronization of the processes accessing the same table is done
1477
// by means of the lock file. When a lock is released, the storage
1478
// managers flush their data into the table files. Some synchronization data
1479
// is written into the lock file telling the new number of table rows
1480
// and telling which storage managers have written data.
1481
// This information is read when another process acquires the lock
1482
// and is used to determine which storage managers have to refresh
1483
// their internal caches.
1484
// <br>Note that for the NoReadLocking modes (see above) explicit
1485
// synchronization might be needed using <src>Table::resync</src>.
1486
// <p>
1487
// The function <src>Table::hasDataChanged</src> can be used to check
1488
// if a table is (being) changed by another process. In this way
1489
// a program can react on it. E.g. the table browser can refresh its
1490
// screen when the underlying table is changed.
1491
// <p>
1492
// In general the default locking option will do.
1493
// From the above it should be clear that heavy concurrent access
1494
// results in a lot of flushing, thus will have a negative impact on
1495
// performance. If uninterrupted access to a table is needed,
1496
// the <src>PermanentLocking</src> option should be used.
1497
// If transaction-like processing is done (e.g. updating a table
1498
// containing an observation catalogue), the <src>UserLocking</src>
1499
// option is probably best.
1500
// <p>
1501
// Creation or deletion of a table is not possible if that table
1502
// is still open in another process. The function
1503
// <src>Table::isMultiUsed()</src> can be used to check if a table
1504
// is open in other processes.
1505
// <br>
1506
// The function <src>TableUtil::deleteTable</src> should be used to delete
1507
// a table. Before deleting the table it ensures that it is writable
1508
// and that it is not open in the current or another process.
1509
// <p>
1510
// The following example wants to read the table uninterrupted, thus it uses
1511
// the <src>PermanentLocking</src> option. It also wants to wait
1512
// until the lock is actually acquired.
1513
// Note that the destructor closes the table and releases the lock.
1514
// <srcblock>
1515
// // Open the table (readonly).
1516
// // Acquire a permanent (read) lock.
1517
// // It waits until the lock is acquired.
1518
// Table tab ("some.name",
1519
// TableLock(TableLock::PermanentLockingWait));
1520
// </srcblock>
1521
//
1522
// The following example uses the automatic locking..
1523
// It tells the system to check about every 20 seconds if another
1524
// process wants access to the table.
1525
// <srcblock>
1526
// // Open the table (readonly).
1527
// Table tab ("some.name",
1528
// TableLock(TableLock::AutoLocking, 20));
1529
// </srcblock>
1530
//
1531
// The following example gets data (say from a GUI) and writes it
1532
// as a row into the table. The lock the table as little as possible
1533
// the lock is acquired just before writing and released immediately
1534
// thereafter.
1535
// <srcblock>
1536
// // Open the table (writable).
1537
// Table tab ("some.name",
1538
// TableLock(TableLock::UserLocking),
1539
// Table::Update);
1540
// while (True) {
1541
// get input data
1542
// tab.lock(); // Acquire a write lock and wait for it.
1543
// tab.addRow();
1544
// write data into the row
1545
// tab.unlock(); // Release the lock.
1546
// }
1547
// </srcblock>
1548
//
1549
// The following example deletes a table if it is not used in
1550
// another process.
1551
// <srcblock>
1552
// Table tab ("some.name");
1553
// if (! tab.isMultiUsed()) {
1554
// tab.markForDelete();
1555
// }
1556
// </srcblock>
1557
1558
// <ANCHOR NAME="Tables:KeyLookup">
1559
// <h3>Table lookup based on a key</h3></ANCHOR>
1560
//
1561
// Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the
1562
// user a means to find the rows matching a given key or key range.
1563
// It is a somewhat primitive replacement of a B-tree index and in the
1564
// future it may be replaced by a proper B+-tree implementation.
1565
// <p>
1566
// The <src>ColumnsIndex</src> class makes it possible to build an
1567
// in-core index on one or more columns. Looking a key or key range
1568
// is done using a binary search on that index. It returns a vector
1569
// containing the row numbers of the rows matching the key (range).
1570
// <p>
1571
// The class is not capable of tracing changes in the underlying column(s).
1572
// It detects a change in the number of rows and updates the index
1573
// accordingly. However, it has to be told explicitly when a value
1574
// in the underlying column(s) changes.
1575
// <p>
1576
// The following example shows how the class can be used.
1577
// <example>
1578
// Suppose one has an antenna table with key ANTENNA.
1579
// <srcblock>
1580
// // Open the table and make an index for column ANTENNA.
1581
// Table tab("antenna.tab")
1582
// ColumnsIndex colInx(tab, "ANTENNA");
1583
// // Make a RecordFieldPtr for the ANTENNA field in the index key record.
1584
// // Its data type has to match the data type of the column.
1585
// RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
1586
// // Now loop in some way and find the row for the antenna
1587
// // involved in that loop.
1588
// Bool found;
1589
// while (...) {
1590
// // Fill the key field and get the row number.
1591
// // ANTENNA is a unique key, so only one row number matches.
1592
// // Otherwise function getRowNumbers had to be used.
1593
// *antFld = antenna;
1594
// uInt antRownr = colInx.getRowNumber (found);
1595
// if (!found) {
1596
// cout << "Antenna " << antenna << " is unknown" << endl;
1597
// } else {
1598
// // antRownr can now be used to get data from that row in
1599
// // the antenna table.
1600
// }
1601
// }
1602
// </srcblock>
1603
// </example>
1604
// <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more
1605
// advanced example. It shows how to use a private compare function
1606
// to adjust the lookup if the index does not contain single
1607
// key values, but intervals instead. This is useful if a row in
1608
// a (sub)table is valid for, say, a time range instead of a single
1609
// timestamp.
1610
1611
// <ANCHOR NAME="Tables:performance">
1612
// <h3>Performance and robustness considerations</h3></ANCHOR>
1613
//
1614
// CTDS resembles a database system, but it is not as robust.
1615
// It lacks the transaction and logging facilities common to data base systems.
1616
// It means that in case of a crash data might be lost.
1617
// To reduce the risk of data loss to
1618
// a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally
1619
// with an <tt>fsync</tt> to ensure that all data are really written.
1620
// However, that can degrade the performance because it involves extra writes.
1621
// So one should find the right balance between robustness and performance.
1622
//
1623
// To get a good feeling for the performance issues, it is important to
1624
// understand some of the internals of CTDS.
1625
// <br>The storage managers drive the performance. All storage managers use
1626
// buckets (called tiles for the TiledStMan) which contain the data.
1627
// All IO is done by bucket. The bucket/tile size is defined when creating
1628
// the storage manager objects. Sometimes the default will do, but usually
1629
// it is better to set it explicitly.
1630
//
1631
// It is best to do a flush when a tile is full.
1632
// For example: <br>
1633
// When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines
1634
// or N*(N+1) if auto-correlations are stored as well) it makes sense to
1635
// store, say, N/2 rows in a tile and do a flush each time all baselines
1636
// are written. In that way tiles are fully filled when doing the flush, so
1637
// no extra IO is involved.
1638
// <br>Here is some code showing this when creating a MeasurementSet.
1639
// The code should speak for itself.
1640
// <srcblock>
1641
// MS* createMS (const String& msName, int nrchan, int nrant)
1642
// {
1643
// // Get the MS main default table description.
1644
// TableDesc td = MS::requiredTableDesc();
1645
// // Add the data column and its unit.
1646
// MS::addColumnToDesc(td, MS::DATA, 2);
1647
// td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet().
1648
// define("UNIT","Jy");
1649
// // Store the DATA and FLAG column in two separate files.
1650
// // In this way accessing FLAG only is much cheaper than
1651
// // when combining DATA and FLAG.
1652
// // All data have the same shape, thus use TiledColumnStMan.
1653
// // Also store UVW with TiledColumnStMan.
1654
// Vector<String> tsmNames(1);
1655
// tsmNames[0] = MS::columnName(MS::DATA);
1656
// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1657
// td.defineHypercolumn("TiledData", 3, tsmNames);
1658
// tsmNames[0] = MS::columnName(MS::FLAG);
1659
// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1660
// td.defineHypercolumn("TiledFlag", 3, tsmNames);
1661
// tsmNames[0] = MS::columnName(MS::UVW);
1662
// td.defineHypercolumn("TiledUVW", 2, tsmNames);
1663
// // Setup the new table.
1664
// SetupNewTable newTab(msName, td, Table::New);
1665
// // Most columns vary slowly and use the IncrStMan.
1666
// IncrementalStMan incrStMan("ISMData");
1667
// // A few columns use he StandardStMan (set an appropriate bucket size).
1668
// StandardStMan stanStMan("SSMData", 32768);
1669
// // Store all pol and freq and some rows in a single tile.
1670
// // autocorrelations are written, thus in total there are
1671
// // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an
1672
// // integer number of tiles.
1673
// TiledColumnStMan tiledData("TiledData",
1674
// IPosition(3,4,nchan,(nrant+1)/2));
1675
// TiledColumnStMan tiledFlag("TiledFlag",
1676
// IPosition(3,4,nchan,8*(nrant+1)/2));
1677
// TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,));
1678
// IPosition(2,3,nrant*(nrant+1)/2));
1679
// newTab.bindAll (incrStMan);
1680
// newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan);
1681
// newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan);
1682
// newTab.bindColumn(MS::columnName(MS::DATA),tiledData);
1683
// newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag);
1684
// newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW);
1685
// // Create the MS and its subtables.
1686
// // Get access to its columns.
1687
// MS* msp = new MeasurementSet(newTab);
1688
// // Create all subtables.
1689
// // Do this after the creation of optional subtables,
1690
// // so the MS will know about those optional sutables.
1691
// msp->createDefaultSubtables (Table::New);
1692
// return msp;
1693
// }
1694
// </srcblock>
1695
1696
// <h4>Some more performance considerations</h4>
1697
// Which storage managers to use and how to use them depends heavily on
1698
// the type of data and the access patterns to the data. Here follow some
1699
// guidelines:
1700
// <ol>
1701
// <li> Scalar data can be stored with the StandardStMan (SSM) or
1702
// IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column
1703
// in a MeasurementSet) it is best to use the ISM. Otherwise the SSM.
1704
// Note that very long strings (longer than the bucketsize) can only
1705
// be stored with the SSM.
1706
// <li> Any number of storage managers can be used. In fact, each column
1707
// can have a storage manager of its own resulting in column-wise
1708
// stored data which is more and more used in data base systems.
1709
// In that way a query or sort on that column is very fast, because
1710
// the buckets to read only contain data of that column.
1711
// In practice one can decide to combine a few frequently used columns
1712
// in a storage manager.
1713
// <li> Array data can be stored with any column manager. Small fixed size
1714
// arrays can be stored directly with the SSM
1715
// (or ISM if not changing much).
1716
// However, they can also be stored with a TiledStMan (TSM) as shown
1717
// for the UVW column in the example above.
1718
// <br> Large arrays should usually be stored with a TSM. However,
1719
// if it must be possible to change the shape of an array after it
1720
// was stored, the SSM (or ISM) must be used. Note that in that
1721
// case a lot of disk space can be wasted, because the SSM and ISM
1722
// store the array data at the end of the file if the array got
1723
// bigger and do not reuse the old space. The only way to
1724
// reclaim it is by making a deep copy of the entire table.
1725
// <li> If an array is stored with a TSM, it is important to decide
1726
// which TSM to use.
1727
// <ol>
1728
// <li> The TiledColumnStMan is the most efficient, but only suitable
1729
// for arrays having the same shape in the entire column.
1730
// <li> The TiledShapeStMan is suitable for columns where the arrays
1731
// can have a few shapes.
1732
// <li> The TiledCellStMan is suitable for columns where the arrays
1733
// can have many different shapes.
1734
// </ol>
1735
// This is discussed in more detail
1736
// <a href="#Tables:TiledStMan">above</a>.
1737
// <li> If storing an array with a TSM, it can be very important to
1738
// choose the right tile shape. Not only does this define the size
1739
// of a tile, but it also defines if access in other directions
1740
// than the natural direction can be fast. It is also discussed in
1741
// more detail <a href="#Tables:TiledStMan">above</a>.
1742
// <li> Columns can be combined in a single TiledStMan. For instance, combining DATA
1743
// and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG
1744
// is used on its own (e.g. in combination with CORRECTED_DATA), it is better
1745
// to separate them, otherwise tiles containing FLAG also contain DATA making the
1746
// tiles much bigger, thus more expensive to access.
1747
// </ol>
1748
//
1749
// <ANCHOR NAME="Tables:iotracing">
1750
// <h4>IO Tracing</h4></ANCHOR>
1751
//
1752
// Several forms of tracing can be done to see how the Table I/O performs.
1753
// <ul>
1754
// <li> On Linux/UNIX systems the <src>strace</src> command can be used to
1755
// collect trace information about the physical IO.
1756
// <li> The function <src>showCacheStatistics</src> in class
1757
// TiledStManAccessor can be used to show the number of actual reads
1758
// and writes and the percentage of cache hits.
1759
// <li> The software has some options to trace the operations done on
1760
// tables. It is possible to specify the columns and/or the operations
1761
// to be traced. The following <src>aipsrc</src> variables can be used.
1762
// <ul>
1763
// <li> <src>table.trace.filename</src> specifies the file to write the
1764
// trace output to. If not given or empty, no tracing will be done.
1765
// The file name can contain environment variables or a tilde.
1766
// <li> <src>table.trace.operation</src> specifies the operations to be
1767
// traced. It is a string containing s, r, and/or w where
1768
// s means tracing RefTable construction (selection/sort),
1769
// r means column reads, and w means column writes.
1770
// If empty, only the high level table operations (open, create, close)
1771
// will be traced.
1772
// <li> <src>table.trace.columntype</src> specifies the types of columns to
1773
// be traced. It is a string containing the characters s, a, and/or r.
1774
// s means all scalar columns, a all array columns, and r all record
1775
// columns. If empty and if <src>table.trace.column</src> is empty,
1776
// its default value is a.
1777
// <li> <src>table.trace.column</src> specifies names of columns to be
1778
// traced. Its value can be one or more glob-like patterns separated
1779
// by commas without any whitespace. The default is empty.
1780
// For example:
1781
// <srcblock>
1782
// table.trace.column: *DATA,FLAG,WEIGHT*
1783
// </srcblock>
1784
// to trace all DATA, the FLAG, and all WEIGHT columns.
1785
// </ul>
1786
// The trace output is a text file with the following columns
1787
// separated by a space.
1788
// <ul>
1789
// <li> The UTC time the trace line was written (with msec accuracy).
1790
// <li> The operation: n(ew), o(pen), c(lose), t(able), r(ead), w(rite),
1791
// s(election/sort/iter), p(rojection).
1792
// t means an arbitrary table operation as given in the name column.
1793
// <li> The table-id (as t=i) given at table creation (new) or open.
1794
// <li> The table name, column name, or table operation
1795
// (as <src>*oper*</src>).
1796
// <src>*reftable*</src> means that the operation is on a RefTable
1797
// (thus result of selection, sort, projection, or iteration).
1798
// <li> The row or rows to access (* means all rows).
1799
// Multiple rows are given as a series of ranges like s:e:i,s:e:i,...
1800
// where e and i are only given if applicable (default i is 1).
1801
// Note that e is inclusive and defaults to s.
1802
// <li> The optional array shape to access (none means scalar).
1803
// In case multiple rows are accessed, the last shape value is the
1804
// number of rows.
1805
// <li> The optional slice of the array in each row as [start][end][stride].
1806
// </ul>
1807
// Shape, start, end, and stride are given in Fortran-order as
1808
// [n1,n2,...].
1809
// </ul>
1810
1811
// <ANCHOR NAME="Tables:applications">
1812
// <h4>Applications to inspect/manipulate a table</h4></ANCHOR>
1813
// <ul>
1814
// <li><em>showtableinfo</em> shows the structure of a table. It can show:
1815
// <ul>
1816
// <li> the columns and their format (optionally sorted on name)
1817
// <li> the data managers used to store the column data
1818
// <li> the table and/or column keywords and their values
1819
// <li> recursively the same info of the subtables
1820
// </ul>
1821
// <li><em>showtablelock</em> if a table is locked or opened and by
1822
// which process.
1823
// <li><em>lsmf</em> shows the virtual files contained in a MultiFile.
1824
// <li><em>tomf</em> copies the given files to a MultiFile.
1825
// <li><em>taql</em> can be used to query a table using the
1826
// <a href="../notes/199.html">Table Query Language</a> (TaQL).
1827
// </ul>
1828
//
1829
// </synopsis>
1830
// </module>
1831
1832
1833
1834
}
//# NAMESPACE CASACORE - END
1835
1836
#endif
Slicer.h
ColumnDesc.h
ColumnsIndexArray.h
SetupNewTab.h
TableVector.h
aips.h
ArrayColumn.h
ScaColDesc.h
ArrColDesc.h
TableRow.h
Array.h
TableLock.h
TableUtil.h
DataMan.h
ScaRecordColDesc.h
TableDesc.h
TableRecord.h
ColumnsIndex.h
TabVecMath.h
Slice.h
TableCopy.h
Table.h
TaQL.h
ScalarColumn.h
RecordField.h
TabVecLogic.h
Generated by
1.8.5