casacore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Tables.h
Go to the documentation of this file.
1 //# Tables.h: The Tables module - Casacore data storage
2 //# Copyright (C) 1994-2010
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef TABLES_TABLES_H
29 #define TABLES_TABLES_H
30 
31 //# Includes
32 //# table description
33 #include <casacore/casa/aips.h>
39 
40 //# table access
52 
53 //# keywords
56 
57 //# table lookup
60 
61 //# table vectors
65 
66 //# data managers
68 
69 //# table expressions (for selection of rows)
70 #include <casacore/tables/TaQL.h>
71 
72 
73 namespace casacore { //# NAMESPACE CASACORE - BEGIN
74 
75 // <module>
76 
77 // <summary>
78 // CTDS (Casacore Table Data System) is the data storage mechanism for Casacore
79 // </summary>
80 
81 // <use visibility=export>
82 
83 // <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos="">
84 // </reviewed>
85 
86 // <prerequisite>
87 // <li> <linkto class="Record:description">Record</linkto> class
88 // </prerequisite>
89 
90 // <etymology>
91 // "Table" is a formal term from relational database theory:
92 // <em> "The organizing principle in a relational database is the TABLE,
93 // a rectangular, row/column arrangement of data values."</em>
94 // Casacore tables are extensions to traditional tables, but are similar
95 // enough that we use the same name. There is also a strong resemblance
96 // between the uses of Casacore tables, and FITS binary tables, which
97 // provides another reason to use "Tables" to describe the Casacore data
98 // storage mechanism.
99 // </etymology>
100 
101 // <synopsis>
102 // Tables are the fundamental storage mechanism for Casacore. This document
103 // explains <A HREF="#Tables:motivation">why</A> they had to be made,
104 // <A HREF="#Tables:properties">what</A> their properties are, and
105 // <A HREF="#Tables:open">how</A> to use them. The last subject is
106 // discussed and illustrated in a sequence of sections:
107 // <UL>
108 // <LI> <A HREF="#Tables:open">opening</A> an existing table,
109 // <LI> <A HREF="#Tables:read">reading</A> from a table,
110 // <LI> <A HREF="#Tables:creation">creating</A> a new table,
111 // <LI> <A HREF="#Tables:write">writing</A> into a table,
112 // <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table,
113 // <LI> <A HREF="#Tables:select and sort">selection and sorting</A>
114 // (see also <A HREF="../notes/199.html">Table Query Language</A>),
115 // <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A>
116 // <LI> <A HREF="#Tables:iterate">iterating</A> through a table,
117 // <LI> <A HREF="#Tables:LockSync">locking/synchronization</A>
118 // for concurrent access,
119 // <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup,
120 // <LI> <A HREF="#Tables:vectors">vector operations</A> on a column.
121 // <LI> <A HREF="#Tables:performance">performance and robustness</A>
122 // considerations with some information on
123 // <A HREF="#Tables:iotracing">IO tracing</A>.
124 // </UL>
125 // A few <A HREF="Tables:applications">applications</A> exist to inspect
126 // and manipulate a table.
127 //
128 // Several UML diagrams describe the class structure of the Tables module.
129 // <ul>
130 // <li> <a href="TableOverview.drawio.svg.html">Global overview of Table access</a>.
131 // <li> <a href="TableDesc.drawio.svg.html">Table and column descriptions</a>.
132 // <li> <a href="TableRecord.drawio.svg.html">Table keywords</a>.
133 // <li> <a href="Table.drawio.svg.html">Table class structure</a>.
134 // <li> <a href="PlainTable.drawio.svg.html">Detailed PlainTable class structure</a>.
135 // <li> <a href="DataManager.drawio.svg.html">DataManagers for storage</a>.
136 // </ul>
137 
138 // <ANCHOR NAME="Tables:motivation">
139 // <motivation></ANCHOR>
140 //
141 // The Casacore tables are mainly based upon the ideas of Allen Farris,
142 // as laid out in the
143 // <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz">
144 // AIPS++ Database document</A>, from where the following paragraph is taken:
145 //
146 // <p>
147 // Traditional relational database tables have two features that
148 // decisively limit their applicability to scientific data. First, an item of
149 // data in a column of a table must be atomic -- it must have no internal
150 // structure. A consequence of this restriction is that relational
151 // databases are unable to deal with arrays of data items. Second, an
152 // item of data in a column of a table must not have any direct or
153 // implied linkages to other items of data or data aggregates. This
154 // restriction makes it difficult to model complex relationships between
155 // collections of data. While these restrictions may make it easy to
156 // define a mathematically complete set of data manipulation operations,
157 // they are simply intolerable in a scientific data-handling context.
158 // Multi-dimensional arrays are frequently the most natural modes in
159 // which to discuss and think about scientific data. In addition,
160 // scientific data often requires complex calibration operations that
161 // must draw on large bodies of data about equipment and its performance
162 // in various states. The restrictions imposed by the relational model
163 // make it very difficult to deal with complex problems of this nature.
164 // <p>
165 //
166 // In response to these limitations, and other needs, the Casacore tables were
167 // designed.
168 // </motivation>
169 
170 // <ANCHOR NAME="Tables:properties">
171 // <h3>Table Properties</h3></ANCHOR>
172 //
173 // Casacore tables have the following properties:
174 // <ul>
175 // <li> A table consists of a number of rows and columns.
176 // <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined
177 // for the table as a whole and for individual columns. A keyword/value
178 // pair for a column could, for instance, define its unit.
179 // <li> Each table has a <A HREF="#Tables:Table Description">description</A>
180 // which specifies the number and type of columns, and maybe initial
181 // keyword sets and default values for the columns.
182 // <li> A cell in a column may contain
183 // <UL>
184 // <LI> a scalar;
185 // <LI> a "direct" array -- which must have the same shape in all
186 // cells of a column, is usually small, and is stored in the
187 // table itself;
188 // <LI> an "indirect" array -- which may have different shapes in
189 // different cells of the same column, is arbitrarily large,
190 // and is stored in a separate file;
191 // </UL>
192 // <li> A column may be
193 // <UL>
194 // <LI> "filled" -- containing actual data, or
195 // <LI> "virtual" -- containing a recipe telling how the data will
196 // be generated dynamically
197 // </UL>
198 // <li> Only the standard Casacore data types can be used in filled
199 // columns, be they scalars or arrays: Bool, uChar, Short, uShort,
200 // Int, uInt, Int64, float, double, Complex, DComplex and String.
201 // Furthermore scalars containing
202 // <linkto class=TableRecord>record</linkto> values are possible
203 // <li> A column can have a default value, which will automatically be stored
204 // in a cell of the column, when a row is added to the table.
205 // <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the
206 // reading, writing and generation of data. Each column in a table can
207 // be assigned its own data manager, which allows for optimization of
208 // the data storage per column. The choice of data manager determines
209 // whether a column is filled or virtual.
210 // <li> Table data are stored in a canonical format, so they can be read
211 // on any machine. To avoid needless swapping of bytes, the data can
212 // be stored in big endian (as used on e.g. SUN) or little endian
213 // (as used on Intel PC-s) canonical format.
214 // By default it uses the format specified in the aipsrc variable
215 // <code>table.endianformat</code> which defaults to
216 // <code>Table::LocalEndian</code> (the endian format of the
217 // machine being used when creating the table).
218 // <li> The SQL-like
219 // <a href="../notes/199.html">Table Query Language</a> (TaQL)
220 // can be used to do operations on tables like
221 // select, sort, update, insert, delete, and create.
222 // </ul>
223 //
224 // Tables can be in one of four forms:
225 // <ul>
226 // <li> A plain table is a table stored on disk.
227 // It can be shared by multiple processes.
228 // <li> A memory table is a table held in memory.
229 // It is a process specific table, thus not sharable.
230 // The <linkto class=Table>Table::copy</linkto> function can be used
231 // to turn a memory table into a plain table.
232 // <li> A reference table is a table referencing a plain or memory table.
233 // It is the result of a selection or sort on another table.
234 // A reference table references the data in the other table, thus
235 // changing data in a reference table means that the data in the
236 // original table are changed.
237 // The <linkto class=Table>Table::deepCopy</linkto> function can be
238 // used to turn a reference table into a plain table.
239 // <li> <A HREF="#Tables:concatenation">a concatenated table</A>
240 // is a union of tables (of any form) with the same description.
241 // They are concatenated in a virtual way, thus no copy is made.
242 // </ul>
243 // Concurrent access from different processes to the same plain table is
244 // fully supported by means of a <A HREF="#Tables:LockSync">
245 // locking/synchronization</A> mechanism. Concurrent access over NFS is also
246 // supported.
247 // <p>
248 // A (somewhat primitive) mechanism is available to do a
249 // <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents
250 // of a key.
251 
252 // <ANCHOR NAME="Tables:open">
253 // <h3>Opening an Existing Table</h3></ANCHOR>
254 //
255 // To open an existing table you just create a
256 // <linkto class="Table:description">Table</linkto> object giving
257 // the name of the table, like:
258 //
259 // <srcblock>
260 // Table readonly_table ("tableName");
261 // // or
262 // Table read_and_write_table ("tableName", Table::Update);
263 // </srcblock>
264 //
265 // The constructor option determines whether the table will be opened as
266 // readonly or as read/write. A readonly table file must be opened
267 // as readonly, otherwise an exception is thrown. The functions
268 // <linkto class="Table">Table::isWritable(...)</linkto>
269 // can be used to determine if a table is writable.
270 //
271 // When the table is opened, the data managers are reinstantiated
272 // according to their definition at table creation.
273 // <p>
274 // <ANCHOR NAME="Tables:openTable">
275 // The static function <src>TableUtil::openTable</src> can be used to open a table,
276 // in particular a subtable, in a simple way by means of the :: notation like
277 // <src>maintable::subtable</src>. The :: notation is much better than specifying
278 // an explicit path (such as <src>maintable/subtable</src>, because it also works
279 // fine if the main table is a reference table (e.g. the result of a selection).
280 
281 // <ANCHOR NAME="Tables:read">
282 // <h3>Reading from a Table</h3></ANCHOR>
283 //
284 // You can read data from a table column with the "get" functions
285 // in the classes
286 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
287 // and
288 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
289 // For scalars of a standard data type (i.e. Bool, uChar, Int, Short,
290 // uShort, uInt, float, double, Complex, DComplex and String) you could
291 // instead use
292 // <linkto class="TableColumn">TableColumn::getScalar(...)</linkto> or
293 // <linkto class="TableColumn">TableColumn::asXXX(...)</linkto>.
294 // These functions offer an extra: they do automatic data type promotion;
295 // so that you can, for example, get a double value from a float column.
296 //
297 // These "get" functions are used in the same way as the simple "put"
298 // functions described in the previous section.
299 // <p>
300 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
301 // can be constructed for a non-writable column. However, an exception
302 // is thrown if the put function is used for it.
303 // The same is true for
304 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto> and
305 // <linkto class="TableColumn:description">TableColumn</linkto>.
306 // <p>
307 // A typical program could look like:
308 // <srcblock>
309 // #include <casacore/tables/Tables/Table.h>
310 // #include <casacore/tables/Tables/ScalarColumn.h>
311 // #include <casacore/tables/Tables/ArrayColumn.h>
312 // #include <casacore/casa/Arrays/Vector.h>
313 // #include <casacore/casa/Arrays/Slicer.h>
314 // #include <casacore/casa/Arrays/ArrayMath.h>
315 // #include <iostream>
316 //
317 // main()
318 // {
319 // // Open the table (readonly).
320 // Table tab ("some.name");
321 //
322 // // Construct the various column objects.
323 // // Their data type has to match the data type in the table description.
324 // ScalarColumn<Int> acCol (tab, "ac");
325 // ArrayColumn<Float> arr2Col (tab, "arr2");
326 //
327 // // Loop through all rows in the table.
328 // uInt nrrow = tab.nrow();
329 // for (uInt i=0; i<nrow; i++) {
330 // // Read the row for both columns.
331 // cout << "Column ac in row i = " << acCol(i) << endl;
332 // Array<Float> array = arr2Col.get (i);
333 // }
334 //
335 // // Show the entire column ac,
336 // // and show the 10th element of arr2 in each row..
337 // cout << ac.getColumn();
338 // cout << arr2.getColumn (Slicer(Slice(10)));
339 // }
340 // </srcblock>
341 
342 // <ANCHOR NAME="Tables:creation">
343 // <h3>Creating a Table</h3></ANCHOR>
344 //
345 // The creation of a table is a multi-step process:
346 // <ol>
347 // <li>
348 // Create a <A HREF="#Tables:Table Description">table description</A>.
349 // <li>
350 // Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto>
351 // object with the name of the new table.
352 // <li>
353 // Create the necessary <A HREF="#Tables:Data Managers">data managers</A>.
354 // <li>
355 // Bind each column to the appropriate data manager.
356 // The system will bind unbound columns to data managers which
357 // are created internally using the default data manager name
358 // defined in the column description.
359 // <li>
360 // Define the shape of direct columns (if that was not already done in the
361 // column description).
362 // <li>
363 // Create the <linkto class="Table:description">Table</linkto>
364 // object from the SetupNewTable object. Here, a final check is performed
365 // and the necessary files are created.
366 // </ol>
367 // The recipe above is meant for the creation a plain table, but the
368 // creation of a memory table is exactly the same. The only difference
369 // is that in call to construct the Table object the Table::Memory
370 // type has to be given. Note that in the SetupNewTable object the columns
371 // can be bound to any data manager. <src>MemoryTable</src> will rebind
372 // stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto>
373 // storage manager, but virtual columns bindings are not changed.
374 //
375 // The following example shows how you can create a table. An example
376 // specifically illustrating the creation of the
377 // <A HREF="#Tables:Table Description">table description</A> is given
378 // in that section. Other sections discuss the access to the table.
379 //
380 // <srcblock>
381 // #include <casacore/tables/Tables/TableDesc.h>
382 // #include <casacore/tables/Tables/SetupNewTab.h>
383 // #include <casacore/tables/Tables/Table.h>
384 // #include <casacore/tables/Tables/ScaColDesc.h>
385 // #include <casacore/tables/Tables/ScaRecordColDesc.h>
386 // #include <casacore/tables/Tables/ArrColDesc.h>
387 // #include <casacore/tables/Tables/StandardStMan.h>
388 // #include <casacore/tables/Tables/IncrementalStMan.h>
389 //
390 // main()
391 // {
392 // // Step1 -- Build the table description.
393 // TableDesc td("tTableDesc", "1", TableDesc::Scratch);
394 // td.comment() = "A test of class SetupNewTable";
395 // td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab"));
396 // td.addColumn (ScalarColumnDesc<Int> ("ac"));
397 // td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad"));
398 // td.addColumn (ScalarColumnDesc<Float> ("ae"));
399 // td.addColumn (ScalarRecordColumnDesc ("arec"));
400 // td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct));
401 // td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
402 // td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct));
403 //
404 // // Step 2 -- Setup a new table from the description.
405 // SetupNewTable newtab("newtab.data", td, Table::New);
406 //
407 // // Step 3 -- Create storage managers for it.
408 // StandardStMan stmanStand_1;
409 // StandardStMan stmanStand_2;
410 // IncrementalStMan stmanIncr;
411 //
412 // // Step 4 -- First, bind all columns to the first storage
413 // // manager. Then, bind a few columns to another storage manager
414 // // (which will overwrite the previous bindings).
415 // newtab.bindAll (stmanStand_1);
416 // newtab.bindColumn ("ab", stmanStand_2);
417 // newtab.bindColumn ("ae", stmanIncr);
418 // newtab.bindColumn ("arr3", stmanIncr);
419 //
420 // // Step 5 -- Define the shape of the direct columns.
421 // // (this could have been done in the column description).
422 // newtab.setShapeColumn( "arr1", IPosition(3,2,3,4));
423 // newtab.setShapeColumn( "arr3", IPosition(3,3,4,5));
424 //
425 // // Step 6 -- Finally, create the table consisting of 10 rows.
426 // Table tab(newtab, 10);
427 //
428 // // Now we can fill the table, which is shown in a next section.
429 // // The Table destructor will flush the table to the files.
430 // }
431 // </srcblock>
432 // To create a table in memory, only step 6 has to be modified slightly to:
433 // <srcblock>
434 // Table tab(newtab, Table::Memory, 10);
435 // </srcblock>
436 //
437 // Note that the function <src>TableUtil::createTable</src> can be used to create a table
438 // in a simpler way. It can also be used to create a subtable using the :: notation
439 // similar to the <A HREF="#Tables:openTable"><src>Tableutil::openTable</src></A>
440 // function described above.
441 
442 // <ANCHOR NAME="Tables:write">
443 // <h3>Writing into a Table</h3></ANCHOR>
444 //
445 // Once a table has been created or has been opened for read/write,
446 // you want to write data into it. Before doing that you may have
447 // to add one or more rows to the table.
448 // <note role=tip> If a table was created with a given number of rows, you
449 // do not need to add rows; you may not even be able to do so.
450 // </note>
451 //
452 // When adding new rows to the table, either via the
453 // <linkto class="Table">Table(...) constructor</linkto>
454 // or via the
455 // <linkto class="Table">Table::addRow(...)</linkto>
456 // function, you can choose to have those rows initialized with the
457 // default values given in the description.
458 //
459 // To actually write the data into the table you need the classes
460 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto> and
461 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
462 // For each column you can construct one or
463 // more of these objects. Their put(...) functions
464 // let you write a value at a time or the entire column in one go.
465 // For arrays you can "put" subsections of the arrays.
466 //
467 // As an alternative for scalars of a standard data type (i.e. Bool,
468 // uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex
469 // and String) you could use the functions
470 // <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>.
471 // These functions offer an extra: automatic data type promotion; so that
472 // you can, for example, put a float value in a double column.
473 //
474 // A typical program could look like:
475 // <srcblock>
476 // #include <casacore/tables/Tables/TableDesc.h>
477 // #include <casacore/tables/Tables/SetupNewTab.h>
478 // #include <casacore/tables/Tables/Table.h>
479 // #include <casacore/tables/Tables/ScaColDesc.h>
480 // #include <casacore/tables/Tables/ArrColDesc.h>
481 // #include <casacore/tables/Tables/ScalarColumn.h>
482 // #include <casacore/tables/Tables/ArrayColumn.h>
483 // #include <casacore/casa/Arrays/Vector.h>
484 // #include <casacore/casa/Arrays/Slicer.h>
485 // #include <casacore/casa/Arrays/ArrayMath.h>
486 // #include <iostream>
487 //
488 // main()
489 // {
490 // // First build the table description.
491 // TableDesc td("tTableDesc", "1", TableDesc::Scratch);
492 // td.comment() = "A test of class SetupNewTable";
493 // td.addColumn (ScalarColumnDesc<Int> ("ac"));
494 // td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
495 //
496 // // Setup a new table from the description,
497 // // and create the (still empty) table.
498 // // Note that since we do not explicitly bind columns to
499 // // data managers, all columns will be bound to the default
500 // // standard storage manager StandardStMan.
501 // SetupNewTable newtab("newtab.data", td, Table::New);
502 // Table tab(newtab);
503 //
504 // // Construct the various column objects.
505 // // Their data type has to match the data type in the description.
506 // ScalarColumn<Int> ac (tab, "ac");
507 // ArrayColumn<Float> arr2 (tab, "arr2");
508 // Vector<Float> vec2(100);
509 //
510 // // Write the data into the columns.
511 // // In each cell arr2 will be a vector of length 100.
512 // // Since its shape is not set explicitly, it is done implicitly.
513 // for (uInt i=0; i<10; i++) {
514 // tab.addRow(); // First add a row.
515 // ac.put (i, i+10); // value is i+10 in row i
516 // indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119
517 // arr2.put (i, vec2);
518 // }
519 //
520 // // Finally, show the entire column ac,
521 // // and show the 10th element of arr2.
522 // cout << ac.getColumn();
523 // cout << arr2.getColumn (Slicer(Slice(10)));
524 //
525 // // The Table destructor writes the table.
526 // }
527 // </srcblock>
528 //
529 // In this example we added rows in the for loop, but we could also have
530 // created 10 rows straightaway by constructing the Table object as:
531 // <srcblock>
532 // Table tab(newtab, 10);
533 // </srcblock>
534 // in which case we would not include
535 // <srcblock>
536 // tab.addRow()
537 // </srcblock>
538 //
539 // The classes
540 // <linkto class="TableColumn:description">TableColumn</linkto>,
541 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>, and
542 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>
543 // contain several functions to put values into a single cell or into the
544 // whole column. This may look confusing, but is actually quite simple.
545 // The functions can be divided in two groups:
546 // <ol>
547 // <li>
548 // Put the given value into the column cell(s).
549 // <ul>
550 // <li>
551 // The simplest put functions,
552 // <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and
553 // <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
554 // put a value into the given column cell. For convenience, there is an
555 // <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto>
556 // to put only a part of the array.
557 // <li>
558 // <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and
559 // <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto>
560 // fill an entire column by putting the given value into all the cells
561 // of the column.
562 // <li>
563 // The simplest putColumn functions,
564 // <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and
565 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>,
566 // put an array of values into the column. There is a special
567 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
568 // version which puts only a part of the arrays.
569 // </ul>
570 //
571 // <li>
572 // Copy values from another column to this column.<BR>
573 // These functions have the advantage that the
574 // data type of the input and/or output column can be unknown.
575 // The generic TableColumn objects can be used for this purpose.
576 // The put(Column) function checks the data types and, if possible,
577 // converts them. If the conversion is not possible, it throws an
578 // exception.
579 // <ul>
580 // <li>
581 // The put functions copy the value in a cell of the input column
582 // to a cell in the output column. The row numbers of the cells
583 // in the columns can be different.
584 // <li>
585 // The putColumn functions copy the entire contents of the input column
586 // to the output column. The lengths of the columns must be equal.
587 // </ul>
588 // Each class has its own set of these functions.
589 // <ul>
590 // <li>
591 // <linkto class="TableColumn">TableColumn::put(...)</linkto> and
592 // <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and
593 // are the most generic. They can be
594 // used if the data types of both input and output column are unknown.
595 // Note that these functions are virtual.
596 // <li>
597 // <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>,
598 // <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
599 // <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and
600 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
601 // are less generic and therefore potentially more efficient.
602 // The most efficient variants are the ones taking a
603 // Scalar/ArrayColumn&lt;T&gt;, because they require no data type
604 // conversion.
605 // </ul>
606 // </ol>
607 
608 // <ANCHOR NAME="Tables:row-access">
609 // <h3>Accessing rows in a Table</h3></ANCHOR>
610 //
611 // Apart from accessing a table column-wise as described in the
612 // previous two sections, it is also possible to access a table row-wise.
613 // The <linkto class=TableRow>TableRow</linkto> class makes it possible
614 // to access multiple fields in a table row as a whole. Note that like the
615 // XXColumn classes described above, there is also an ROTableRow class
616 // for access to readonly tables.
617 // <p>
618 // On construction of a TableRow object it has to be specified which
619 // fields (i.e. columns) are part of the row. For these fields a
620 // fixed structured <linkto class=TableRecord>TableRecord</linkto>
621 // object is constructed as part of the TableRow object. The TableRow::get
622 // function will fill this record with the table data for the given row.
623 // The user has access to the record and can use
624 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for
625 // speedier access to the record.
626 // <p>
627 // The class could be used as shown in the following example.
628 // <srcblock>
629 // // Open the table as readonly and define a row object to contain
630 // // the given columns.
631 // // Note that the function stringToVector is a very convenient
632 // // way to construct a Vector<String>.
633 // // Show the description of the fields in the row.
634 // Table table("Some.table");
635 // ROTableRow row (table, stringToVector("col1,col2,col3"));
636 // cout << row.record().description();
637 // // Since the structure of the record is known, the RecordFieldPtr
638 // // objects could be used to allow for easy and fast access to
639 // // the record which is refilled for each get.
640 // RORecordFieldPtr<String> col1(row.record(), "col1");
641 // RORecordFieldPtr<Double> col2(row.record(), "col2");
642 // RORecordFieldPtr<Array<Int> > col3(row.record(), "col3");
643 // for (uInt i=0; i<table.nrow(); i++) {
644 // row.get (i);
645 // someString = *col1;
646 // somedouble = *col2;
647 // someArrayInt = *col3;
648 // }
649 // </srcblock>
650 // The description of TableRow contains some more extensive examples.
651 
652 // <ANCHOR NAME="Tables:select and sort">
653 // <h3>Table Selection and Sorting</h3></ANCHOR>
654 //
655 // The result of a select and sort of a table is another table,
656 // which references the original table. This means that an update
657 // of a sorted or selected table results in the update of the original
658 // table. The result is, however, a table in itself, so all table
659 // functions (including select and sort) can be used with it.
660 // Note that a true copy of such a reference table can be made with
661 // the <linkto class=Table>Table::deepCopy</linkto> function.
662 // <p>
663 // Rows or columns can be selected from a table. Columns can be selected
664 // by the
665 // <linkto class="Table">Table::project(...)</linkto>
666 // function, while rows can be selected by the various
667 // <linkto class="Table">Table operator()</linkto> functions.
668 // Usually a row is selected by giving a select expression with
669 // <linkto class="TableExprNode:description">TableExprNode</linkto>
670 // objects. These objects represent the various nodes
671 // in an expression, e.g. a constant, a column, or a subexpression.
672 // The Table function
673 // <linkto class="Table">Table::col(...)</linkto>
674 // creates a TableExprNode object for a column. The function
675 // <linkto class="Table">Table::key(...)</linkto>
676 // does the same for a keyword by reading
677 // the keyword value and storing it as a constant in an expression node.
678 // All column nodes in an expression must belong to the same table,
679 // otherwise an exception is thrown.
680 // In the following example we select all rows with RA>10:
681 // <srcblock>
682 // #include <casacore/tables/Tables/ExprNode.h>
683 // Table table ("Table.name");
684 // Table result = table (table.col("RA") > 10);
685 // </srcblock>
686 // while in the next one we select rows with RA and DEC in the given
687 // intervals:
688 // <srcblock>
689 // Table result = table (table.col("RA") > 10
690 // && table.col("RA") < 14
691 // && table.col("DEC") >= -10
692 // && table.col("DEC") <= 10);
693 // </srcblock>
694 // The following operators can be used to form arbitrarily
695 // complex expressions:
696 // <ul>
697 // <li> Relational operators ==, !=, >, >=, < and <=.
698 // <li> Logical operators &&, || and !.
699 // <li> Arithmetic operators +, -, *, /, %, and unary + and -.
700 // <li> Bit operators ^, &, |, and unary ~.
701 // <li> Operator() to take a subsection of an array.
702 // </ul>
703 // Many functions (like sin, max, conj) can be used in an expression.
704 // Class <linkto class=TableExprNode>TableExprNode</linkto> shows
705 // the available functions.
706 // E.g.
707 // <srcblock>
708 // Table result = table (sin (table.col("RA")) > 0.5);
709 // </srcblock>
710 // Function <src>in</src> can be used to select from a set of values.
711 // A value set can be constructed using class
712 // <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>.
713 // <srcblock>
714 // TableExprNodeSet set;
715 // set.add (TableExprNodeSetElem ("abc"));
716 // set.add (TableExprNodeSetElem ("defg"));
717 // set.add (TableExprNodeSetElem ("h"));
718 // Table result = table (table.col("NAME).in (set));
719 // </srcblock>
720 // select rows with a NAME equal to <src>abc</src>,
721 // <src>defg</src>, or <src>h</src>.
722 //
723 // <p>
724 // You can sort a table on one or more columns containing scalars.
725 // In this example we simply sort on column RA (default is ascending):
726 // <srcblock>
727 // Table table ("Table.name");
728 // Table result = table.sort ("RA");
729 // </srcblock>
730 // Multiple
731 // <linkto class="Table">Table::sort(...)</linkto>
732 // functions exist which allow for more flexible control over the sort order.
733 // In the next example we sort first on RA in descending order
734 // and then on DEC in ascending order:
735 // <srcblock>
736 // Table table ("Table.name");
737 // Block<String> sortKeys(2);
738 // Block<int> sortOrders(2);
739 // sortKeys(0) = "RA";
740 // sortOrders(0) = Sort::Descending;
741 // sortKeys(1) = "DEC";
742 // sortOrders(1) = Sort::Ascending;
743 // Table result = table.sort (sortKeys, sortOrders);
744 // </srcblock>
745 //
746 // Tables stemming from the same root, can be combined in several
747 // ways with the help of the various logical
748 // <linkto class="Table">Table operators</linkto> (operator|, etc.).
749 
750 // <h4>Table Query Language</h4>
751 // The selection and sorting mechanism described above can only be used
752 // in a hard-coded way in a C++ program.
753 // There is, however, another way. Strings containing selection and
754 // sorting commands can be used.
755 // The syntax of these commands is based on SQL and is described in the
756 // <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199.
757 // The language supports UDFs (User Defined Functions) in dynamically
758 // loadable libraries as explained in the note.
759 // <br>A TaQL command can be executed with the static function
760 // <src>tableCommand</src> defined in class
761 // <linkto class=TableParse>TableParse</linkto>.
762 
763 // <ANCHOR NAME="Tables:concatenation">
764 // <h3>Table Concatenation</h3></ANCHOR>
765 // Tables with identical descriptions can be concatenated in a virtual way
766 // using the Table concatenation constructor. Such a Table object behaves
767 // as any other Table object, thus any operation can be performed on it.
768 // An identical description means that the number of columns, the column names,
769 // and their data types of the columns must be the same. The columns do not
770 // need to be ordered in the same way nor to be stored in the same way.
771 // <br>Note that if tables have different column names, it is possible
772 // to form a projection (as described in the previous section) first
773 // to make them appear identical.
774 //
775 // Sometimes a MeasurementSet is partitioned, for instance in chunks of
776 // one hour. All those chunks can be virtually concatenated this way.
777 // Note that all tables in the concatenation will be opened, thus one might
778 // run out of file descriptors if there are many chunks.
779 //
780 // Similar to reference tables, it is possible to make a concatenated Table
781 // persistent by using the <src>rename</src> function. It will not copy the
782 // data; only the names of the tables used are written.
783 //
784 // The keywords of a concatenated table are taken from the first table.
785 // It is possible to change or add keywords, but that is not persistent,
786 // not even if the concatenated table is made persistent.
787 // <br>The keywords holding subtables can be handled in a special way.
788 // Normally the subtables of the concatenation are the subtables of the first
789 // table are used, but is it possible to concatenate subtables as well by
790 // giving their names in the constructor.
791 // In this way the, say, SYSCAL subtable of a MeasurementSet can be
792 // concatenated as well.
793 // <srcblock>
794 // // Create virtual concatenation of ms0 and ms1.
795 // Block<String> names(2);
796 // names[0] = "ms0";
797 // names[1] = "ms1";
798 // // Also concatenate their SYSCAL subtables.
799 // Block<String> subNames(1, "SYSCAL");
800 // Table concTab (names, subNames);
801 // </srcblock>
802 
803 // <ANCHOR NAME="Tables:iterate">
804 // <h3>Table Iterators</h3></ANCHOR>
805 //
806 // You can iterate through a table in an arbitrary order by getting
807 // a subset of the table consisting of the rows in which the iteration
808 // columns have the same value.
809 // An iterator object is created by constructing a
810 // <linkto class="TableIterator:description">TableIterator</linkto>
811 // object with the appropriate column names.
812 //
813 // In the next example we define an iteration on the columns Time and
814 // Baseline. Each iteration step returns a table subset in which Time and
815 // Baseline have the same value.
816 //
817 // <srcblock>
818 // // Iterate over Time and Baseline (by default in ascending order).
819 // // Time is the main iteration order, thus the first column specified.
820 // Table t;
821 // Table tab ("UV_Table.data");
822 // Block<String> iv0(2);
823 // iv0[0] = "Time";
824 // iv0[1] = "Baseline";
825 // //
826 // // Create the iterator. This will prepare the first subtable.
827 // TableIterator iter(tab, iv0);
828 // Int nr = 0;
829 // while (!iter.pastEnd()) {
830 // // Get the first subtable.
831 // // This will contain rows with equal Time and Baseline.
832 // t = iter.table();
833 // cout << t.nrow() << " ";
834 // nr++;
835 // // Prepare the next subtable with the next Time,Baseline value.
836 // iter.next();
837 // }
838 // cout << endl << nr << " iteration steps" << endl;
839 // </srcblock>
840 //
841 // You can define more than one iterator on the same table; they operate
842 // independently.
843 //
844 // Note that the result of each iteration step is a table in itself which
845 // references the original table, just as in the case of a sort or select.
846 // This means that the resulting table can be used again in a sort, select,
847 // iteration, etc..
848 
849 // <ANCHOR NAME="Tables:vectors">
850 // <h3>Table Vectors</h3></ANCHOR>
851 //
852 // A table vector makes it possible to treat a column in a table
853 // as a vector. Almost all operators and functions defined for normal
854 // vectors, are also defined for table vectors. So it is, for instance,
855 // possible to add a constant to a table vector. This has the effect
856 // that the underlying column gets changed.
857 //
858 // You can use the templated class
859 // <linkto class="TableVector:description">TableVector</linkto>
860 // to make a scalar column appear as a (table) vector.
861 // Columns containing arrays or tables are not supported.
862 // The data type of the TableVector object must match the
863 // data type of the column.
864 // A table vector can also hold a normal vector so that (temporary)
865 // results of table vector operations can be handled.
866 //
867 // In the following example we double the data in column COL1 and
868 // store the result in a temporary table vector.
869 // <srcblock>
870 // // Create a table vector for column COL1.
871 // // Note that if the table is readonly, putting data in the table vector
872 // // results in an exception.
873 // Table tab ("Table.data");
874 // TableVector<Int> tabvec(tab, "COL1");
875 // // Multiply it by a constant. Result is kept in a Vector in memory.
876 // TableVector<Int> temp = 2 * tabvec;
877 // </srcblock>
878 //
879 // In the next example we double the data in COL1 and put the result back
880 // in the column.
881 // <srcblock>
882 // // Create a table vector for column COL1.
883 // // It has to be a TableVector to be able to change the column.
884 // Table tab ("Table.data", Table::Update);
885 // TableVector<Int> tabvec(tab, "COL1");
886 // // Multiply it by a constant.
887 // tabvec *= 2;
888 // </srcblock>
889 
890 // <ANCHOR NAME="Tables:keywords">
891 // <h3>Table Keywords</h3></ANCHOR>
892 //
893 // Any number of keyword/value pairs may be attached to the table as a whole,
894 // or to any individual column. They may be freely added, retrieved,
895 // re-assigned, or deleted. They are, in essence, a self-resizing list of
896 // values (any of the primitive types) indexed by Strings (the keyword).
897 //
898 // A table keyword/value pair might be
899 // <srcblock>
900 // Observer = Grote Reber
901 // Date = 10 october 1942
902 // </srcblock>
903 // Column keyword/value pairs might be
904 // <srcblock>
905 // Units = mJy
906 // Reference Pixel = 320
907 // </srcblock>
908 // The class
909 // <linkto class="TableRecord:description">TableRecord</linkto>
910 // represents the keywords in a table.
911 // It is (indirectly) derived from the standard record classes in the class
912 // <linkto class="Record:description">Record</linkto>
913 
914 // <ANCHOR NAME="Tables:Table Description">
915 // <h3>Table Description</h3></ANCHOR>
916 //
917 // A table contains a description of itself, which defines the layout of the
918 // columns and the keyword sets for the table and for the individual columns.
919 // It may also define initial keyword sets and default values for the columns.
920 // Such a default value is automatically stored in a cell in the table column,
921 // whenever a row is added to the table.
922 //
923 // The creation of the table descriptor is the first step in the creation of
924 // a new table. The description is part of the table itself, but may also
925 // exist in a separate file. This is useful if you need to create a number
926 // of tables with the same structure; in other circumstances it probably
927 // should be avoided.
928 //
929 // The public classes to set up a table description are:
930 // <ul>
931 // <li> <linkto class="TableDesc:description">TableDesc</linkto>
932 // -- holds the table description.
933 // <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto>
934 // -- holds a generic column description.
935 // <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc&lt;T&gt;
936 // </linkto>
937 // -- defines a column containing a scalar value.
938 // <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc;
939 // </linkto>
940 // -- defines a column containing a scalar record value.
941 // <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc&lt;T&gt;
942 // </linkto>
943 // -- defines a column containing an (in)direct array.
944 // </ul>
945 //
946 // Here follows a typical example of the construction of a table
947 // description. For more specialized things -- like the definition of a
948 // default data manager -- we refer to the descriptions of the above
949 // mentioned classes.
950 //
951 // <srcblock>
952 // #include <casacore/tables/Tables/TableDesc.h>
953 // #include <casacore/tables/Tables/ScaColDesc.h>
954 // #include <casacore/tables/Tables/ArrColDesc.h>
955 // #include <casacore/tables/Tables/ScaRecordTabDesc.h>
956 // #include <casacore/tables/Tables/TableRecord.h>
957 // #include <casacore/casa/Arrays/IPosition.h>
958 // #include <casacore/casa/Arrays/Vector.h>
959 //
960 // main()
961 // {
962 // // Create a new table description
963 // // Define a comment for the table description.
964 // // Define some keywords.
965 // ColumnDesc colDesc1, colDesc2;
966 // TableDesc td("tTableDesc", "1", TableDesc::New);
967 // td.comment() = "A test of class TableDesc";
968 // td.rwKeywordSet().define ("ra" float(3.14));
969 // td.rwKeywordSet().define ("equinox", double(1950));
970 // td.rwKeywordSet().define ("aa", Int(1));
971 //
972 // // Define an integer column ab.
973 // td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab"));
974 //
975 // // Add a scalar integer column ac, define keywords for it
976 // // and define a default value 0.
977 // // Overwrite the value of keyword unit.
978 // ScalarColumnDesc<Int> acColumn("ac");
979 // acColumn.rwKeywordSet().define ("scale" Complex(0,0));
980 // acColumn.rwKeywordSet().define ("unit", "");
981 // acColumn.setDefault (0);
982 // td.addColumn (acColumn);
983 // td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG");
984 //
985 // // Add a scalar string column ad and define its comment string.
986 // td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad"));
987 //
988 // // Now define array columns.
989 // // This one is indirect and has no dimensionality mentioned yet.
990 // td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1"));
991 // // This one is indirect and has 3-dim arrays.
992 // td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3));
993 // // This one is direct and has 2-dim arrays with axes length 4 and 7.
994 // td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1",
995 // IPosition(2,4,7),
996 // ColumnDesc::Direct));
997 //
998 // // Add columns containing records.
999 // td.addColumn (ScalarRecordColumnDesc ("Rec1"));
1000 // }
1001 // </srcblock>
1002 
1003 // <ANCHOR NAME="Tables:Data Managers">
1004 // <h3>Data Managers</h3></ANCHOR>
1005 //
1006 // Data managers take care of the actual access to the data in a column.
1007 // There are two kinds of data managers:
1008 // <ol>
1009 // <li> <A HREF="#Tables:storage managers">Storage managers</A> --
1010 // which store the data as such. They can only handle the standard
1011 // data types (Bool,...,String) as discussed in the section about the
1012 // <A HREF="#Tables:properties">table properties</A>).
1013 // <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A>
1014 // -- which manipulate the data.
1015 // An engine could be a simple thing like scaling the data (as done
1016 // in classic AIPS to reduce data storage), but it could also be an
1017 // elaborate thing like applying corrections on-the-fly.
1018 // <br>A special engine is VirtualTaQLColumn which can be used to define
1019 // the contents of a column by means of a TaQL expression. In particular,
1020 // it can be used to define a constant value for the entire column.
1021 // But it can also be used to calculate the UVW-coordinates on-the-fly.
1022 // <br>An engine must be used when storing data objects with a non-standard type.
1023 // It has to break down the object into items with standard data types
1024 // which can be stored with a storage manager.
1025 // </ol>
1026 // In general the user of a table does not need to be aware which
1027 // data managers are being used underneath. Only when the table is created
1028 // data managers have to be bound to the columns. Thereafter it is
1029 // completely transparent.
1030 //
1031 // Data managers needs to be registered, so they can be found when a table is
1032 // opened. All data managers mentioned below are part of the system and
1033 // pre-registered.
1034 // It is, however, also possible to load data managers on demand. If a data
1035 // manager is not registered it is tried to load a shared library with the
1036 // part of the data manager name (in lowercase) before a dot or left arrow.
1037 // The dot makes it possible to have multiple data managers in a shared library,
1038 // while the left arrow is meant for templated data manager classes.
1039 // <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared
1040 // library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If
1041 // successful, its function <src>register_bitflagsengine()</src> will be
1042 // executed which should register the data manager(s). Thereafter it is known
1043 // and will be used. For example in a file Register.h and Register.cc:
1044 // <srcblock>
1045 // // Declare in .h file as C function, so no name mangling is done.
1046 // extern "C" {
1047 // void register_bitflagsengine();
1048 // }
1049 // // Implement in .cc file.
1050 // void register_bitflagsengine()
1051 // {
1052 // BitFlagsEngine<uChar>::registerClass();
1053 // BitFlagsEngine<Short>::registerClass();
1054 // BitFlagsEngine<Int>::registerClass();
1055 // }
1056 // </srcblock>
1057 // There are several functions that can give information which data managers
1058 // are used for which columns and to obtain the characteristics and properties
1059 // of them. Class RODataManAccessor and derived classes can be used for it
1060 // as well as the functions <src>dataManagerInfo</src> and
1061 // <src>showStructure</src> in class Table.
1062 
1063 // <ANCHOR NAME="Tables:storage managers">
1064 // <h3>Storage Managers</h3></ANCHOR>
1065 //
1066 // Storage managers are used to store the data contained in the column cells.
1067 // At table construction time the binding of columns to storage managers is done.
1068 // <br>Each storage manager uses one or more files (usually called table.fi_xxx
1069 // where i is a sequence number and _xxx is some kind of extension).
1070 // Typically several file are used to store the data of the columns of a table.
1071 // <br>In order to reduce the number of files (and to support large block sizes),
1072 // it is possible to have a single container file (a MultiFile) containing all
1073 // data files used by the storage managers. Such a file is called table.mf.
1074 // Note that the program <em>lsmf</em> can be used to see which
1075 // files are contained in a MultiFile. The program <em>tomf</em> can
1076 // convert the files in a MultiFile to regular files.
1077 // <br>At table creation time it is decided if a MultiFile will be used. It
1078 // can be done by means of the StorageOption object given to the SetupNewTable
1079 // constructor and/or by the aipsrc variables:
1080 // <ul>
1081 // <li> <src>table.storage.option</src> which can have the value
1082 // 'multifile', 'sepfile' (meaning separate files), or 'default'.
1083 // Currently the default is to use separate files.
1084 // <li> <src>table.storage.blocksize</src> defines the block size to be
1085 // used by a MultiFile. If 0 is given, the file system's block size
1086 // will be used.
1087 // </ul>
1088 // About all standard storage managers support the MultiFile.
1089 // The exception is StManAipsIO, because it is hardly ever used.
1090 //
1091 // Several storage managers exist, each with its own storage characteristics.
1092 // The default and preferred storage manager is <src>StandardStMan</src>.
1093 // Other storage managers should only be used if they pay off in
1094 // file space (like <src>IncrementalStMan</src> for slowly varying data)
1095 // or access speed (like the tiled storage managers for large data arrays).
1096 // <br>The storage managers store the data in a big or little endian
1097 // canonical format. The format can be specified when the table is created.
1098 // By default it uses the endian format as specified in the aipsrc variable
1099 // <code>table.endianformat</code> which can have the value local, big,
1100 // or little. The default is local.
1101 // <ol>
1102 // <li>
1103 // <linkto class="StandardStMan:description">StandardStMan</linkto>
1104 // stores all the values in so-called buckets (equally sized chunks
1105 // in the file). It requires little memory.
1106 // <br>It replaces the old <src>StManAipsIO</src>.
1107 //
1108 // <li>
1109 // <linkto class="IncrementalStMan:description">IncrementalStMan</linkto>
1110 // uses a storage mechanism resembling "incremental backups". A value
1111 // is only stored if it is different from the previous row. It is
1112 // very well suited for slowly varying data.
1113 // <br>The class <linkto class="ROIncrementalStManAccessor:description">
1114 // ROIncrementalStManAccessor</linkto> can be used to tune the
1115 // behaviour of the <src>IncrementalStMan</src>. It contains functions
1116 // to deal with the cache size and to show the behaviour of the cache.
1117 //
1118 // <li>
1119 // The <a href="#Tables:TiledStMan">Tiled Storage Managers</a>
1120 // store the data as a tiled hypercube allowing for more or less equally
1121 // efficient data access along all main axes. It can be used for
1122 // UV-data as well as for image data.
1123 //
1124 // <li>
1125 // <linkto class="StManAipsIO:description">StManAipsIO</linkto>
1126 // uses <src>AipsIO</src> to store the data in the columns.
1127 // It supports all table functionality, but its I/O is probably not
1128 // as efficient as other storage managers. It also requires that
1129 // a large part of the table fits in memory.
1130 // <br>It should not be used anymore, because it uses a lot of memory
1131 // for larger tables and because it is not very robust in case an
1132 // application or system crashes.
1133 //
1134 // <li>
1135 // <linkto class="MemoryStMan:description">MemoryStMan</linkto>
1136 // holds the data in memory. It means that data 'stored' with this
1137 // storage manager are NOT persistent.
1138 // <br>This storage manager is primarily meant for tables held in
1139 // memory, but it can also be useful for temporary columns in
1140 // normal tables. Note, however, that if a table is accessed
1141 // concurrently from multiple processes, MemoryStMan data cannot be
1142 // synchronized.
1143 //
1144 // <li>
1145 // @ref dyscostman.DyscoStMan is a class that stores data with lossy
1146 // compression. It combines non-linear least-squares quantization and
1147 // different kinds of normalizaton. With the typical factor of 4
1148 // compression, the loss in accuracy from lossy compression is
1149 // negligable. It should only be used for real (non-simulated) data
1150 // that is in a Measurement Set.
1151 // The method is described in this article:
1152 // https://arxiv.org/abs/1609.02019.
1153 //
1154 // <li>
1155 // <linkto class="Adios2StMan:description">Adios2StMan</linkto> uses the
1156 // <A HREF="https://github.com/ornladios/ADIOS2">ADIOS2 framework</A> to
1157 // store and load column data.
1158 // <br>ADIOS2 has several configurable storage backend itself, and this
1159 // flexibility is also available via Adios2StMan. This includes, among other
1160 // things, storing compressed data, or choosing a different on-disk formats.
1161 // <br>This storage manager is also special in that it provides parallel
1162 // writing capabilities for MPI processes, so that multiple processes can
1163 // write into different sections of the same column concurrently.
1164 // </ol>
1165 //
1166 // The storage manager framework makes it possible to support arbitrary files
1167 // as tables. This has been used in a case where a file is filled
1168 // by the data acquisition system of a telescope. The file is simultaneously
1169 // used as a table using a dedicated storage manager. The table
1170 // system and storage manager provide a sync function to synchronize
1171 // the processes, i.e. to make CTDS aware of changes
1172 // in the file size (thus in the table size) by the filling process.
1173 //
1174 // <note role=tip>
1175 // Not all data managers support all the table functionality. So, the choice
1176 // of a data manager can greatly influence the type of operations you can do
1177 // on the table as a whole.
1178 // For example, if a column uses the tiled storage manager,
1179 // it is not possible to delete rows from the table, because that storage
1180 // manager will not support deletion of rows.
1181 // However, it is always possible to delete all columns of a data
1182 // manager in one single call.
1183 // </note>
1184 
1185 // <ANCHOR NAME="Tables:TiledStMan">
1186 // <h3>Tiled Storage Manager</h3></ANCHOR>
1187 // The Tiled Storage Managers allow one to store the data of
1188 // one or more columns in a tiled way. Tiling means
1189 // that the data are stored without a preferred order to make access
1190 // along the different main axes equally efficient. This is done by
1191 // storing the data in so-called tiles (i.e. equally shaped subsets of an
1192 // array) to increase data locality. The user can define the tile shape
1193 // to optimize for the most frequently used access.
1194 // <p>
1195 // The Tiled Storage Manager has the following properties:
1196 // <ul>
1197 // <li> There can be more than one Tiled Storage Manager in
1198 // a table; each with its own (unique) name.
1199 // <li> Each Tiled Storage Manager can store an
1200 // N-dimensional so-called hypercolumn.
1201 // Elaborate hypercolumns can be defined using
1202 // <linkto file="TableDesc.h#defineHypercolumn">
1203 // TableDesc::defineHypercolumn</linkto>).
1204 // <br>Note that defining a hypercolumn is only necessary if it
1205 // contains multiple columns or if the TiledDataStMan is used.
1206 // It means that in practice it is hardly ever needed to define a
1207 // hypercolumn.
1208 // <br>A hypercolumn consists of up to three types of columns:
1209 // <dl>
1210 // <dt> Data columns
1211 // <dd> contain the data to be stored in a tiled way. This will
1212 // be done in tiled hypercubes.
1213 // There must be at least one data column.
1214 // <br> For example: a table contains UV-data with
1215 // data columns "Visibility" and "Weight".
1216 // <dt> Coordinate columns
1217 // <dd> define the world coordinates of the pixels in the data columns.
1218 // Coordinate columns are optional, but if given there must
1219 // be N coordinate columns for an N-dimensional hypercolumn.
1220 // <br>
1221 // For example: the data in the example above is 4-dimensional
1222 // and has coordinate columns "Time", "Baseline", "Frequency",
1223 // and "Polarization".
1224 // <dt> Id columns
1225 // <dd> are needed if TiledDataStMan is used.
1226 // Different rows in the data columns can be stored in different
1227 // hypercubes. The values in the id column(s) uniquely identify
1228 // the hypercube a row is stored in.
1229 // <br>
1230 // For example: the line and continuum data in a MeasurementSet
1231 // table need to be stored in 2 different hypercubes (because
1232 // their shapes are different (see below)). A column containing
1233 // the type (line or continuum) has to be used as an id column.
1234 // </dl>
1235 // <li> If multiple data columns are used, the shape of their data
1236 // must be conforming in each individual row.
1237 // If data in different rows have different shapes, they must be
1238 // stored in different hypercubes, because a hypercube can only hold
1239 // data with conforming shapes.
1240 // <br>
1241 // Thus in the example above, rows with line data will have conforming
1242 // shapes and can be stored in one hypercube. The continuum data
1243 // will have another shape and can be stored in another hypercube.
1244 // <br>
1245 // The storage manager keeps track of the mapping of rows to/from
1246 // hypercubes.
1247 // <li> Each hypercube can be tiled in its own way. It is not required
1248 // that an integer number of tiles fits in the hypercube. The last
1249 // tiles will be padded as needed.
1250 // <li> The last axis of a hypercube can be extensible. This means that
1251 // the size of that axis does not need to be defined when the
1252 // hypercube is defined in the storage manager. Instead, the hypercube
1253 // can be extended when another chunk of data has to be stored.
1254 // This can be very useful in, for example, a (quasi-)realtime
1255 // environment where the size of the time axis is not known.
1256 // <li> If coordinate columns are defined, they describe the coordinates
1257 // of the axes of the hypercubes. Each hypercube has its own set of
1258 // coordinates.
1259 // <li> Data and id columns have to be stored with the Tiled
1260 // Storage Manager. However, coordinate columns do not need to be
1261 // stored with the Tiled Storage Manager.
1262 // Especially in the case where the coordinates for a hypercube axis
1263 // are varying (i.e. dependent on other axes), another storage manager
1264 // has to be used (because the Tiled Storage Manager can only
1265 // hold constant coordinates).
1266 // </ul>
1267 // <p>
1268 // The following Tiled Storage Managers are available:
1269 // <dl>
1270 // <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto>
1271 // <dd> can be seen as a specialization of <src>TiledDataStMan</src>
1272 // by using the array shape as the id value.
1273 // Similarly to <src>TiledDataStMan</src> it can maintain multiple
1274 // hypercubes and store multiple rows in a hypercube, but it is
1275 // easier to use, because the special <src>addHypercube</src> and
1276 // <src>extendHypercube</src> functions are not needed.
1277 // An hypercube is automatically added when a new array shape is
1278 // encountered.
1279 // <br>
1280 // This storage manager could be used for a table with a column
1281 // containing line and continuum data, which will result
1282 // in 2 hypercubes.
1283 // <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto>
1284 // <dd> creates (automatically) a new hypercube for each row.
1285 // Thus each row of the hypercolumn is stored in a separate hypercube.
1286 // Note that the row number serves as the id value. So an id column
1287 // is not needed, although there are multiple hypercubes.
1288 // <br>
1289 // This storage manager is meant for tables where the data arrays
1290 // in the different rows are not accessed together. One can think
1291 // of a column containing images. Each row contains an image and
1292 // only one image is shown at a time.
1293 // <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto>
1294 // <dd> creates one hypercube for the entire hypercolumn. Thus all cells
1295 // in the hypercube have to have the same shape and therefore this
1296 // storage manager is only possible if all columns in the hypercolumn
1297 // have the attribute FixedShape.
1298 // <br>
1299 // This storage manager could be used for a table with a column
1300 // containing images for the Stokes parameters I, Q, U, and V.
1301 // By storing them in one hypercube, it is possible to retrieve
1302 // the 4 Stokes values for a subset of the image or for an individual
1303 // pixel in a very efficient way.
1304 // <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto>
1305 // <dd> allows one to control the creation and extension of hypercubes.
1306 // This is done by means of the class
1307 // <linkto class=TiledDataStManAccessor:description>
1308 // TiledDataStManAccessor</linkto>.
1309 // It makes it possible to store, say, row 0-9 in hypercube A,
1310 // row 10-34 in hypercube B, row 35-54 in hypercube A again, etc..
1311 // <br>
1312 // The drawback of this storage manager is that its hypercubes are not
1313 // automatically extended when adding new rows. The special functions
1314 // <src>addHypercube</src> and <src>extendHypercube</src> have to be
1315 // used making it somewhat tedious to use.
1316 // Therefore this storage manager may become obsolete in the near future.
1317 // </dl>
1318 // The Tiled Storage Managers have 3 ways to access and cache the data.
1319 // Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an
1320 // access choice and use it in a Table constructor.
1321 // <ul>
1322 // <li> The old way (the only way until January 2010) uses a cache
1323 // of its own to keep tiles that might need to be reused. It will always
1324 // access entire tiles, even if only a small part is needed.
1325 // It is possible to define a maximum cache size. The description of class
1326 // <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
1327 // contains a discussion about the effect of defining a maximum cache
1328 // size.
1329 // <li> Memory-mapping the data files. In this way the operating system
1330 // takes care of the IO and caching. However, the limited address space
1331 // may preclude using it for large tables on 32-bit systems.
1332 // <li> Use buffered IO and let the kernel's file cache take care of caching.
1333 // It will access the data in chunks of the given buffer size, so the
1334 // entire tile does not need to be accessed if only a small part is
1335 // needed.
1336 // </ul>
1337 // Apart from reading, all access ways described above can also handle writing
1338 // and extending tables. They create fully equal files. Both little and big
1339 // endian data can be read or written.
1340 
1341 // <ANCHOR NAME="Tables:virtual column engines">
1342 // <h3>Virtual Column Engines</h3></ANCHOR>
1343 //
1344 // Virtual column engines are used to implement the virtual (i.e.
1345 // calculated-on-the-fly) columns. CTDS provides
1346 // an abstract base class (or "interface class")
1347 // <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto>
1348 // that specifies the protocol for these engines.
1349 // The programmer must derive a concrete class to implement
1350 // the application-specific virtual column.
1351 // <p>
1352 // For example: the programmer
1353 // needs a column in a table which is the difference between two other
1354 // columns. (Perhaps these two other columns are updated periodically
1355 // during the execution of a program.) A good way to handle this would
1356 // be to have a virtual column in the table, and write a virtual column
1357 // engine which knows how to calculate the difference between corresponding
1358 // cells of the two other columns. So the result is that accessing a
1359 // particular cell of the virtual column invokes the virtual column engine,
1360 // which then gets the values from the other two columns, and returns their
1361 // difference. This particular example could be done using
1362 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>.
1363 // <p>
1364 // Several virtual column engines exist:
1365 // <ol>
1366 // <li> The class
1367 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>
1368 // makes it possible to define a column as an arbitrary expression of
1369 // other columns. It uses the <a href="../notes/199.html">TaQL</a>
1370 // CALC command. The virtual column can be a scalar or an array and
1371 // can have one of the standard data types supported by CTDS.
1372 // <li> The class
1373 // <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto>
1374 // maps an integer bit flags column to a Bool column. A read and write mask
1375 // can be defined telling which bits to take into account when mapping
1376 // to and from Bool (thus when reading or writing the Bool).
1377 // <li> The class
1378 // <linkto class="CompressFloat:description">CompressFloat</linkto>
1379 // compresses a single precision floating point array by scaling the
1380 // values to shorts (16-bit integer).
1381 // <li> The class
1382 // <linkto class="CompressComplex:description">CompressComplex</linkto>
1383 // compresses a single precision complex array by scaling the
1384 // values to shorts (16-bit integer). In fact, the 2 parts of the complex
1385 // number are combined to an 32-bit integer.
1386 // <li> The class
1387 // <linkto class="CompressComplexSD:description">CompressComplexSD</linkto>
1388 // does the same as CompressComplex, but optimizes for the case where the
1389 // imaginary part is zero (which is often the case for Single Dish data).
1390 // <li> The double templated class
1391 // <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto>
1392 // scales the data in an array from, for example,
1393 // float to short before putting it.
1394 // <li> The double templated class
1395 // <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto>
1396 // converts the data from one data type to another. Sometimes it might be
1397 // needed to store the residual data in an MS in double precision.
1398 // Because the imaging task can only handle single precision, this enigne
1399 // can be used to map the data from double to single precision.
1400 // <li> The double templated class
1401 // <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto>
1402 // converts the data from one data type to another with the possibility
1403 // to reduce the number of dimensions. For example, it can be used to
1404 // store an 2-d array of StokesVector objects as a 3-d array of floats
1405 // by treating the 4 data elements as an extra array axis. If the
1406 // StokesVector class is simple, it can be done very efficiently.
1407 // <li> The class
1408 // <linkto class="ForwardColumnEngine:description">
1409 // ForwardColumnEngine</linkto>
1410 // forwards the gets and puts on a row in a column to the same row
1411 // in a column with the same name in another table. This provides
1412 // a virtual copy of the referenced column.
1413 // <li> The class
1414 // <linkto class="ForwardColumnIndexedRowEngine:description">
1415 // ForwardColumnIndexedRowEngine</linkto>
1416 // is similar to <src>ForwardColumnEngine.</src>.
1417 // However, instead of forwarding it to the same row it uses a
1418 // a column to map its row number to a row number in the referenced
1419 // table. In this way multiple rows can share the same data.
1420 // This data manager only allows for get operations.
1421 // <li> The calibration module has implemented a virtual column engine
1422 // to do on-the-fly calibration in a transparent way.
1423 // </ol>
1424 // To handle arbitrary data types the templated abstract base class
1425 // <linkto class="VSCEngine:description">VSCEngine</linkto>
1426 // has been written. An example of how to use this class can be
1427 // found in the demo program <src>dVSCEngine.cc</src>.
1428 
1429 // <ANCHOR NAME="Tables:LockSync">
1430 // <h3>Table locking and synchronization</h3></ANCHOR>
1431 //
1432 // Multiple concurrent readers and writers (also via NFS) of a
1433 // table are supported by means of a locking/synchronization mechanism.
1434 // This mechanism is not very sophisticated in the sense that it is
1435 // very coarsely grained. When locking, the entire table gets locked.
1436 // A special lock file is used to lock the table. This lock file also
1437 // contains some synchronization data.
1438 // <p>
1439 // Five ways of locking are supported (see class
1440 // <linkto class=TableLock>TableLock</linkto>):
1441 // <dl>
1442 // <dt> TableLock::PermanentLocking(Wait)
1443 // <dd> locks the table permanently (from open till close). This means
1444 // that one writer OR multiple readers are possible.
1445 // <dt> TableLock::AutoLocking
1446 // <dd> does the locking automatically. This is the default mode.
1447 // This mode makes it possible that a table is shared amongst
1448 // processes without the user needing to write any special code.
1449 // It also means that a lock is only released when needed.
1450 // <dt> TableLock::AutoNoReadLocking
1451 // <dd> is similar to AutoLocking. However, no lock is acquired when
1452 // reading the table making it possible to read the table while
1453 // another process holds a write-lock. It also means that for read
1454 // purposes no automatic synchronization is done when the table is
1455 // updated in another process.
1456 // Explicit synchronization can be done by means of the function
1457 // <src>Table::resync</src>.
1458 // <dt> TableLock::UserLocking
1459 // <dd> requires that the programmer explicitly acquires and releases
1460 // a lock on the table. This makes some kind of transaction
1461 // processing possible. E.g. set a write lock, add a row,
1462 // write all data into the row and release the lock.
1463 // The Table functions <src>lock</src> and <src>unlock</src>
1464 // have to be used to acquire and release a (read or write) lock.
1465 // <dt> TableLock::UserNoReadLocking
1466 // <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking
1467 // no lock is needed to read the table.
1468 // <dt> TableLock::NoLocking
1469 // <dd> does not use table locking. It is the responsibility of the
1470 // user to ensure that no concurrent access is done on the same
1471 // bucket or tile in a storage manager, otherwise a table might
1472 // get corrupted.
1473 // <br>This mode is always used if Casacore is built with
1474 // -DAIPS_TABLE_NOLOCKING.
1475 // </dl>
1476 // Synchronization of the processes accessing the same table is done
1477 // by means of the lock file. When a lock is released, the storage
1478 // managers flush their data into the table files. Some synchronization data
1479 // is written into the lock file telling the new number of table rows
1480 // and telling which storage managers have written data.
1481 // This information is read when another process acquires the lock
1482 // and is used to determine which storage managers have to refresh
1483 // their internal caches.
1484 // <br>Note that for the NoReadLocking modes (see above) explicit
1485 // synchronization might be needed using <src>Table::resync</src>.
1486 // <p>
1487 // The function <src>Table::hasDataChanged</src> can be used to check
1488 // if a table is (being) changed by another process. In this way
1489 // a program can react on it. E.g. the table browser can refresh its
1490 // screen when the underlying table is changed.
1491 // <p>
1492 // In general the default locking option will do.
1493 // From the above it should be clear that heavy concurrent access
1494 // results in a lot of flushing, thus will have a negative impact on
1495 // performance. If uninterrupted access to a table is needed,
1496 // the <src>PermanentLocking</src> option should be used.
1497 // If transaction-like processing is done (e.g. updating a table
1498 // containing an observation catalogue), the <src>UserLocking</src>
1499 // option is probably best.
1500 // <p>
1501 // Creation or deletion of a table is not possible if that table
1502 // is still open in another process. The function
1503 // <src>Table::isMultiUsed()</src> can be used to check if a table
1504 // is open in other processes.
1505 // <br>
1506 // The function <src>TableUtil::deleteTable</src> should be used to delete
1507 // a table. Before deleting the table it ensures that it is writable
1508 // and that it is not open in the current or another process.
1509 // <p>
1510 // The following example wants to read the table uninterrupted, thus it uses
1511 // the <src>PermanentLocking</src> option. It also wants to wait
1512 // until the lock is actually acquired.
1513 // Note that the destructor closes the table and releases the lock.
1514 // <srcblock>
1515 // // Open the table (readonly).
1516 // // Acquire a permanent (read) lock.
1517 // // It waits until the lock is acquired.
1518 // Table tab ("some.name",
1519 // TableLock(TableLock::PermanentLockingWait));
1520 // </srcblock>
1521 //
1522 // The following example uses the automatic locking..
1523 // It tells the system to check about every 20 seconds if another
1524 // process wants access to the table.
1525 // <srcblock>
1526 // // Open the table (readonly).
1527 // Table tab ("some.name",
1528 // TableLock(TableLock::AutoLocking, 20));
1529 // </srcblock>
1530 //
1531 // The following example gets data (say from a GUI) and writes it
1532 // as a row into the table. The lock the table as little as possible
1533 // the lock is acquired just before writing and released immediately
1534 // thereafter.
1535 // <srcblock>
1536 // // Open the table (writable).
1537 // Table tab ("some.name",
1538 // TableLock(TableLock::UserLocking),
1539 // Table::Update);
1540 // while (True) {
1541 // get input data
1542 // tab.lock(); // Acquire a write lock and wait for it.
1543 // tab.addRow();
1544 // write data into the row
1545 // tab.unlock(); // Release the lock.
1546 // }
1547 // </srcblock>
1548 //
1549 // The following example deletes a table if it is not used in
1550 // another process.
1551 // <srcblock>
1552 // Table tab ("some.name");
1553 // if (! tab.isMultiUsed()) {
1554 // tab.markForDelete();
1555 // }
1556 // </srcblock>
1557 
1558 // <ANCHOR NAME="Tables:KeyLookup">
1559 // <h3>Table lookup based on a key</h3></ANCHOR>
1560 //
1561 // Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the
1562 // user a means to find the rows matching a given key or key range.
1563 // It is a somewhat primitive replacement of a B-tree index and in the
1564 // future it may be replaced by a proper B+-tree implementation.
1565 // <p>
1566 // The <src>ColumnsIndex</src> class makes it possible to build an
1567 // in-core index on one or more columns. Looking a key or key range
1568 // is done using a binary search on that index. It returns a vector
1569 // containing the row numbers of the rows matching the key (range).
1570 // <p>
1571 // The class is not capable of tracing changes in the underlying column(s).
1572 // It detects a change in the number of rows and updates the index
1573 // accordingly. However, it has to be told explicitly when a value
1574 // in the underlying column(s) changes.
1575 // <p>
1576 // The following example shows how the class can be used.
1577 // <example>
1578 // Suppose one has an antenna table with key ANTENNA.
1579 // <srcblock>
1580 // // Open the table and make an index for column ANTENNA.
1581 // Table tab("antenna.tab")
1582 // ColumnsIndex colInx(tab, "ANTENNA");
1583 // // Make a RecordFieldPtr for the ANTENNA field in the index key record.
1584 // // Its data type has to match the data type of the column.
1585 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
1586 // // Now loop in some way and find the row for the antenna
1587 // // involved in that loop.
1588 // Bool found;
1589 // while (...) {
1590 // // Fill the key field and get the row number.
1591 // // ANTENNA is a unique key, so only one row number matches.
1592 // // Otherwise function getRowNumbers had to be used.
1593 // *antFld = antenna;
1594 // uInt antRownr = colInx.getRowNumber (found);
1595 // if (!found) {
1596 // cout << "Antenna " << antenna << " is unknown" << endl;
1597 // } else {
1598 // // antRownr can now be used to get data from that row in
1599 // // the antenna table.
1600 // }
1601 // }
1602 // </srcblock>
1603 // </example>
1604 // <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more
1605 // advanced example. It shows how to use a private compare function
1606 // to adjust the lookup if the index does not contain single
1607 // key values, but intervals instead. This is useful if a row in
1608 // a (sub)table is valid for, say, a time range instead of a single
1609 // timestamp.
1610 
1611 // <ANCHOR NAME="Tables:performance">
1612 // <h3>Performance and robustness considerations</h3></ANCHOR>
1613 //
1614 // CTDS resembles a database system, but it is not as robust.
1615 // It lacks the transaction and logging facilities common to data base systems.
1616 // It means that in case of a crash data might be lost.
1617 // To reduce the risk of data loss to
1618 // a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally
1619 // with an <tt>fsync</tt> to ensure that all data are really written.
1620 // However, that can degrade the performance because it involves extra writes.
1621 // So one should find the right balance between robustness and performance.
1622 //
1623 // To get a good feeling for the performance issues, it is important to
1624 // understand some of the internals of CTDS.
1625 // <br>The storage managers drive the performance. All storage managers use
1626 // buckets (called tiles for the TiledStMan) which contain the data.
1627 // All IO is done by bucket. The bucket/tile size is defined when creating
1628 // the storage manager objects. Sometimes the default will do, but usually
1629 // it is better to set it explicitly.
1630 //
1631 // It is best to do a flush when a tile is full.
1632 // For example: <br>
1633 // When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines
1634 // or N*(N+1) if auto-correlations are stored as well) it makes sense to
1635 // store, say, N/2 rows in a tile and do a flush each time all baselines
1636 // are written. In that way tiles are fully filled when doing the flush, so
1637 // no extra IO is involved.
1638 // <br>Here is some code showing this when creating a MeasurementSet.
1639 // The code should speak for itself.
1640 // <srcblock>
1641 // MS* createMS (const String& msName, int nrchan, int nrant)
1642 // {
1643 // // Get the MS main default table description.
1644 // TableDesc td = MS::requiredTableDesc();
1645 // // Add the data column and its unit.
1646 // MS::addColumnToDesc(td, MS::DATA, 2);
1647 // td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet().
1648 // define("UNIT","Jy");
1649 // // Store the DATA and FLAG column in two separate files.
1650 // // In this way accessing FLAG only is much cheaper than
1651 // // when combining DATA and FLAG.
1652 // // All data have the same shape, thus use TiledColumnStMan.
1653 // // Also store UVW with TiledColumnStMan.
1654 // Vector<String> tsmNames(1);
1655 // tsmNames[0] = MS::columnName(MS::DATA);
1656 // td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1657 // td.defineHypercolumn("TiledData", 3, tsmNames);
1658 // tsmNames[0] = MS::columnName(MS::FLAG);
1659 // td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1660 // td.defineHypercolumn("TiledFlag", 3, tsmNames);
1661 // tsmNames[0] = MS::columnName(MS::UVW);
1662 // td.defineHypercolumn("TiledUVW", 2, tsmNames);
1663 // // Setup the new table.
1664 // SetupNewTable newTab(msName, td, Table::New);
1665 // // Most columns vary slowly and use the IncrStMan.
1666 // IncrementalStMan incrStMan("ISMData");
1667 // // A few columns use he StandardStMan (set an appropriate bucket size).
1668 // StandardStMan stanStMan("SSMData", 32768);
1669 // // Store all pol and freq and some rows in a single tile.
1670 // // autocorrelations are written, thus in total there are
1671 // // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an
1672 // // integer number of tiles.
1673 // TiledColumnStMan tiledData("TiledData",
1674 // IPosition(3,4,nchan,(nrant+1)/2));
1675 // TiledColumnStMan tiledFlag("TiledFlag",
1676 // IPosition(3,4,nchan,8*(nrant+1)/2));
1677 // TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,));
1678 // IPosition(2,3,nrant*(nrant+1)/2));
1679 // newTab.bindAll (incrStMan);
1680 // newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan);
1681 // newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan);
1682 // newTab.bindColumn(MS::columnName(MS::DATA),tiledData);
1683 // newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag);
1684 // newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW);
1685 // // Create the MS and its subtables.
1686 // // Get access to its columns.
1687 // MS* msp = new MeasurementSet(newTab);
1688 // // Create all subtables.
1689 // // Do this after the creation of optional subtables,
1690 // // so the MS will know about those optional sutables.
1691 // msp->createDefaultSubtables (Table::New);
1692 // return msp;
1693 // }
1694 // </srcblock>
1695 
1696 // <h4>Some more performance considerations</h4>
1697 // Which storage managers to use and how to use them depends heavily on
1698 // the type of data and the access patterns to the data. Here follow some
1699 // guidelines:
1700 // <ol>
1701 // <li> Scalar data can be stored with the StandardStMan (SSM) or
1702 // IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column
1703 // in a MeasurementSet) it is best to use the ISM. Otherwise the SSM.
1704 // Note that very long strings (longer than the bucketsize) can only
1705 // be stored with the SSM.
1706 // <li> Any number of storage managers can be used. In fact, each column
1707 // can have a storage manager of its own resulting in column-wise
1708 // stored data which is more and more used in data base systems.
1709 // In that way a query or sort on that column is very fast, because
1710 // the buckets to read only contain data of that column.
1711 // In practice one can decide to combine a few frequently used columns
1712 // in a storage manager.
1713 // <li> Array data can be stored with any column manager. Small fixed size
1714 // arrays can be stored directly with the SSM
1715 // (or ISM if not changing much).
1716 // However, they can also be stored with a TiledStMan (TSM) as shown
1717 // for the UVW column in the example above.
1718 // <br> Large arrays should usually be stored with a TSM. However,
1719 // if it must be possible to change the shape of an array after it
1720 // was stored, the SSM (or ISM) must be used. Note that in that
1721 // case a lot of disk space can be wasted, because the SSM and ISM
1722 // store the array data at the end of the file if the array got
1723 // bigger and do not reuse the old space. The only way to
1724 // reclaim it is by making a deep copy of the entire table.
1725 // <li> If an array is stored with a TSM, it is important to decide
1726 // which TSM to use.
1727 // <ol>
1728 // <li> The TiledColumnStMan is the most efficient, but only suitable
1729 // for arrays having the same shape in the entire column.
1730 // <li> The TiledShapeStMan is suitable for columns where the arrays
1731 // can have a few shapes.
1732 // <li> The TiledCellStMan is suitable for columns where the arrays
1733 // can have many different shapes.
1734 // </ol>
1735 // This is discussed in more detail
1736 // <a href="#Tables:TiledStMan">above</a>.
1737 // <li> If storing an array with a TSM, it can be very important to
1738 // choose the right tile shape. Not only does this define the size
1739 // of a tile, but it also defines if access in other directions
1740 // than the natural direction can be fast. It is also discussed in
1741 // more detail <a href="#Tables:TiledStMan">above</a>.
1742 // <li> Columns can be combined in a single TiledStMan. For instance, combining DATA
1743 // and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG
1744 // is used on its own (e.g. in combination with CORRECTED_DATA), it is better
1745 // to separate them, otherwise tiles containing FLAG also contain DATA making the
1746 // tiles much bigger, thus more expensive to access.
1747 // </ol>
1748 //
1749 // <ANCHOR NAME="Tables:iotracing">
1750 // <h4>IO Tracing</h4></ANCHOR>
1751 //
1752 // Several forms of tracing can be done to see how the Table I/O performs.
1753 // <ul>
1754 // <li> On Linux/UNIX systems the <src>strace</src> command can be used to
1755 // collect trace information about the physical IO.
1756 // <li> The function <src>showCacheStatistics</src> in class
1757 // TiledStManAccessor can be used to show the number of actual reads
1758 // and writes and the percentage of cache hits.
1759 // <li> The software has some options to trace the operations done on
1760 // tables. It is possible to specify the columns and/or the operations
1761 // to be traced. The following <src>aipsrc</src> variables can be used.
1762 // <ul>
1763 // <li> <src>table.trace.filename</src> specifies the file to write the
1764 // trace output to. If not given or empty, no tracing will be done.
1765 // The file name can contain environment variables or a tilde.
1766 // <li> <src>table.trace.operation</src> specifies the operations to be
1767 // traced. It is a string containing s, r, and/or w where
1768 // s means tracing RefTable construction (selection/sort),
1769 // r means column reads, and w means column writes.
1770 // If empty, only the high level table operations (open, create, close)
1771 // will be traced.
1772 // <li> <src>table.trace.columntype</src> specifies the types of columns to
1773 // be traced. It is a string containing the characters s, a, and/or r.
1774 // s means all scalar columns, a all array columns, and r all record
1775 // columns. If empty and if <src>table.trace.column</src> is empty,
1776 // its default value is a.
1777 // <li> <src>table.trace.column</src> specifies names of columns to be
1778 // traced. Its value can be one or more glob-like patterns separated
1779 // by commas without any whitespace. The default is empty.
1780 // For example:
1781 // <srcblock>
1782 // table.trace.column: *DATA,FLAG,WEIGHT*
1783 // </srcblock>
1784 // to trace all DATA, the FLAG, and all WEIGHT columns.
1785 // </ul>
1786 // The trace output is a text file with the following columns
1787 // separated by a space.
1788 // <ul>
1789 // <li> The UTC time the trace line was written (with msec accuracy).
1790 // <li> The operation: n(ew), o(pen), c(lose), t(able), r(ead), w(rite),
1791 // s(election/sort/iter), p(rojection).
1792 // t means an arbitrary table operation as given in the name column.
1793 // <li> The table-id (as t=i) given at table creation (new) or open.
1794 // <li> The table name, column name, or table operation
1795 // (as <src>*oper*</src>).
1796 // <src>*reftable*</src> means that the operation is on a RefTable
1797 // (thus result of selection, sort, projection, or iteration).
1798 // <li> The row or rows to access (* means all rows).
1799 // Multiple rows are given as a series of ranges like s:e:i,s:e:i,...
1800 // where e and i are only given if applicable (default i is 1).
1801 // Note that e is inclusive and defaults to s.
1802 // <li> The optional array shape to access (none means scalar).
1803 // In case multiple rows are accessed, the last shape value is the
1804 // number of rows.
1805 // <li> The optional slice of the array in each row as [start][end][stride].
1806 // </ul>
1807 // Shape, start, end, and stride are given in Fortran-order as
1808 // [n1,n2,...].
1809 // </ul>
1810 
1811 // <ANCHOR NAME="Tables:applications">
1812 // <h4>Applications to inspect/manipulate a table</h4></ANCHOR>
1813 // <ul>
1814 // <li><em>showtableinfo</em> shows the structure of a table. It can show:
1815 // <ul>
1816 // <li> the columns and their format (optionally sorted on name)
1817 // <li> the data managers used to store the column data
1818 // <li> the table and/or column keywords and their values
1819 // <li> recursively the same info of the subtables
1820 // </ul>
1821 // <li><em>showtablelock</em> if a table is locked or opened and by
1822 // which process.
1823 // <li><em>lsmf</em> shows the virtual files contained in a MultiFile.
1824 // <li><em>tomf</em> copies the given files to a MultiFile.
1825 // <li><em>taql</em> can be used to query a table using the
1826 // <a href="../notes/199.html">Table Query Language</a> (TaQL).
1827 // </ul>
1828 //
1829 // </synopsis>
1830 // </module>
1831 
1832 
1833 
1834 } //# NAMESPACE CASACORE - END
1835 
1836 #endif