casacore
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MultiFileBase.h
Go to the documentation of this file.
1 //# MultiFileBase.h: Abstract base class to combine multiple files in a single one
2 //# Copyright (C) 2014
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id: RegularFileIO.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
27 
28 #ifndef CASA_MULTIFILEBASE_H
29 #define CASA_MULTIFILEBASE_H
30 
31 //# Includes
32 #include <casacore/casa/aips.h>
36 #include <casacore/casa/vector.h>
37 #include <casacore/casa/ostream.h>
38 
39 
40 namespace casacore { //# NAMESPACE CASACORE - BEGIN
41 
42  //# Forward declaration.
43  class AipsIO;
44  class HDF5Group;
45  class HDF5DataSet;
46 
47 
48  // <summary>
49  // Helper class for MultiFileInfo holding a data buffer
50  // </summary>
51  // <synopsis>
52  // The buffer can be allocated with posix_memalign (for O_DIRECT support).
53  // Hence the memory must be freed using free, which makes it impossible
54  // to use a shared_ptr to that memory. Hence it is encapsulated in this class.
55  // </synopsis>
56  struct MultiFileBuffer {
57  MultiFileBuffer (size_t bufSize, Bool useODirect);
59  { if (data) free (data); }
60  // Data member
61  char* data;
62  private:
65  };
66 
67  // <summary>
68  // Helper class for MultiFileBase containing info per internal file.
69  // </summary>
70  // <synopsis>
71  // This struct defines the various fields describing a logical file in a
72  // class derived from MultiFileBase (such as MultiFile or MultiHDF5).
73  // </synopsis>
74  // <use visibility=local>
75  struct MultiFileInfo {
76  // Initialize the object and create the buffer with the proper size.
77  // If align>1 (for use of O_DIRECT), the buffer is properly aligned and it
78  // is ensured that its size is a multiple of the alignment.
79  explicit MultiFileInfo (Int64 bufSize=0, Bool useODirect=False);
80  // Allocate the buffer.
81  void allocBuffer (Int64 bufSize, Bool useODirect=False)
82  { buffer = std::shared_ptr<MultiFileBuffer> (new MultiFileBuffer(bufSize, useODirect)); }
83  //# Data members.
84  vector<Int64> blockNrs; // physical blocknrs for this logical file
85  Int64 curBlock; // the data block held in buffer (<0 is none)
86  Int64 fsize; // file size (in bytes)
87  String name; // the virtual file name
88  Bool dirty; // has data in buffer been changed?
89  std::shared_ptr<MultiFileBuffer> buffer; // buffer holding a data block
92  };
93  void operator<< (ostream&, const MultiFileInfo&);
94  void operator<< (AipsIO&, const MultiFileInfo&);
96 
97 
98  // <summary>
99  // Abstract base class to combine multiple files in a single one.
100  // </summary>
101 
102  // <use visibility=export>
103 
104  // <reviewed reviewer="" date="" tests="tMultiFile" demos="">
105  // </reviewed>
106 
107  // <synopsis>
108  // This class is a container file holding multiple virtual files. It is
109  // primarily meant as a container file for the storage manager files of a
110  // table to reduce the number of files used (especially for Lustre) and to
111  // reduce the number of open files (especially when concatenating tables).
112  // <br>A secondary goal is offering the ability to use an IO buffer size
113  // that matches the file system well (large buffer size for e.g. ZFS).
114  //
115  // The SetupNewTable constructor has a StorageOption argument to define
116  // if a MultiFile has to be used and if so, the buffer size to use.
117  // It is also possible to specify that through aipsrc variables.
118  //
119  // A virtual file is spread over multiple (fixed size) data blocks in the
120  // MultiFile. A data block is never shared by multiple files.
121  // For each virtual file MultiFile keeps a MultiFileInfo object telling
122  // the file size and the blocks numbers used for the file. When flushing
123  // the MultiFile, this meta info is written into a header block and,
124  // if needed, continuation blocks. On open and resync, it is read back.
125  // <br>
126  //
127  // A virtual file is represented by an MFFileIO object, which is derived
128  // from ByteIO and as such part of the casacore IO framework. It makes it
129  // possible for applications to access a virtual file in the same way as
130  // a regular file.
131  //
132  // It is possible to delete a virtual file. Its blocks will be added to
133  // the free block list (which is also stored in the meta info).
134  // </synopsis>
135 
136  // <example>
137  // In principle it is possible to use the MultiFile functions directly.
138  // However, in general it is much easier to use an MFFileIO object
139  // per virtual file as shown below.
140  // <srcblock>
141  // // Create a new MultiFile using a block size of 1 MB.
142  // MultiFile mfile("file.mf', ByteIO::New, 1048576);
143  // // Create a virtual file in it.
144  // MFFileIO mf1(mfile, "mf1", ByteIO::New);
145  // // Use it (for example) as the sink of AipsIO.
146  // AipsIO stream (&mf1);
147  // // Write values.
148  // stream << (Int)10;
149  // stream << True;
150  // // Seek to beginning of file and read data in.
151  // stream.setpos (0);
152  // Int vali;
153  // Bool valb;
154  // stream >> vali >> valb;
155  // </srcblock>
156  // </example>
157 
158  // <todo>
159  // <li> write headers at alternating file positions (for robustness)
160  // <li> possibly write headers entirely at the end if larger than blocksize
161  // </todo>
162 
163 
165  {
166  public:
167  // Open or create a MultiFileBase with the given name.
168  // Upon creation the block size can be given. If 0, it uses the block size
169  // of the file system the file is on.
170  // If useODIrect=True, it means that O_DIRECT is used. If the OS does not
171  // support it, the flag will always be False. If True, the data buffers will
172  // have a proper alignment and size (as needed by O_DIRECT).
174 
175  // The destructor flushes and closes the file.
176  virtual ~MultiFileBase();
177 
178  // Return the file id of a file in the MultiFileBase object.
179  // If the name is unknown, an exception is thrown if throwExcp is set.
180  // Otherwise it returns -1.
181  Int fileId (const String& name, Bool throwExcp=True) const;
182 
183  // Add a file to the MultiFileBase object. It returns the file id.
184  // Only the base name of the given file name is used. In this way the
185  // MultiFileBase container file can be moved.
186  Int addFile (const String& name);
187 
188  // Delete a file. It adds its blocks to the free block list.
189  void deleteFile (Int fileId);
190 
191  // Read a block at the given offset. It returns the actual size read.
192  Int64 read (Int fileId, void* buffer, Int64 size, Int64 offset);
193 
194  // Write a block at the given offset. It returns the actual size written.
195  Int64 write (Int fileId, const void* buffer, Int64 size, Int64 offset);
196 
197  // Flush the file by writing all dirty data and all header info.
198  void flush();
199 
200  // Resync with another process by clearing the buffers and rereading
201  // the header. The header is only read if its counter has changed.
202  void resync();
203 
204  // Reopen the underlying file for read/write access.
205  // Nothing will be done if the file is writable already.
206  // Otherwise it will be reopened and an exception will be thrown
207  // if it is not possible to reopen it for read/write access.
208  virtual void reopenRW() = 0;
209 
210  // Fsync the file (i.e., force the data to be physically written).
211  virtual void fsync() = 0;
212 
213  // Get the file name of the MultiFileBase.
214  String fileName() const
215  { return itsName; }
216 
217  // Is the file writable?
218  Bool isWritable() const
219  { return itsWritable; }
220 
221  // Will O_DIRECT be used?
222  Bool useODirect() const
223  { return itsUseODirect; }
224 
225  // Get the block size used.
226  Int64 blockSize() const
227  { return itsBlockSize; }
228 
229  // Get the nr of virtual files.
230  uInt nfile() const;
231 
232  // Get the total nr of data blocks used.
233  Int64 size() const
234  { return itsNrBlock; }
235 
236  // Get the info object (for test purposes mainly).
237  const vector<MultiFileInfo>& info() const
238  { return itsInfo; }
239 
240  // Get the free blocks (for test purposes mainly).
241  const vector<Int64>& freeBlocks() const
242  { return itsFreeBlocks; }
243 
244  private:
246  {
247  writeBlock (info, info.curBlock, info.buffer->data);
248  info.dirty = False;
249  }
250 
251  // Do the class-specific actions on adding a file.
252  virtual void doAddFile (MultiFileInfo&) = 0;
253  // Do the class-specific actions on deleting a file.
254  virtual void doDeleteFile (MultiFileInfo&) = 0;
255  // Flush the file itself.
256  virtual void flushFile() = 0;
257  // Flush and close the file.
258  virtual void close() = 0;
259  // Write the header info.
260  virtual void writeHeader() = 0;
261  // Read the header info. If always==False, the info is only read if the
262  // header counter has changed.
263  virtual void readHeader (Bool always=True) = 0;
264  // Extend the virtual file to fit lastblk.
265  virtual void extend (MultiFileInfo& info, Int64 lastblk) = 0;
266  // Write a data block.
267  virtual void writeBlock (MultiFileInfo& info, Int64 blknr,
268  const void* buffer) = 0;
269  // Read a data block.
270  virtual void readBlock (MultiFileInfo& info, Int64 blknr,
271  void* buffer) = 0;
272 
273  protected:
274  // Set the flags and blockSize for a new MultiFile/HDF5.
275  void setNewFile();
276 
277  //# Data members
279  Int64 itsBlockSize; // The blocksize used
280  Int64 itsNrBlock; // The total nr of blocks actually used
281  Int64 itsHdrCounter; // Counter of header changes
282  vector<MultiFileInfo> itsInfo;
283  std::shared_ptr<MultiFileBuffer> itsBuffer;
284  Bool itsUseODirect; // use O_DIRECT?
285  Bool itsWritable; // Is the file writable?
286  Bool itsChanged; // Has header info changed since last flush?
287  vector<Int64> itsFreeBlocks;
288  };
289 
290 
291 } //# NAMESPACE CASACORE - END
292 
293 #endif
void resync()
Resync with another process by clearing the buffers and rereading the header.
uInt nfile() const
Get the nr of virtual files.
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition: aipsxtype.h:38
Int64 size() const
Get the total nr of data blocks used.
int Int
Definition: aipstype.h:50
const vector< Int64 > & freeBlocks() const
Get the free blocks (for test purposes mainly).
Abstract base class to combine multiple files in a single one.
AipsIO is the object persistency mechanism of Casacore.
Definition: AipsIO.h:168
const vector< MultiFileInfo > & info() const
Get the info object (for test purposes mainly).
CountedPtr< HDF5DataSet > dataSet
Definition: MultiFileBase.h:91
Helper class for MultiFileBase containing info per internal file.
Definition: MultiFileBase.h:75
Int addFile(const String &name)
Add a file to the MultiFileBase object.
ostream & operator<<(ostream &os, const IComplex &)
Show on ostream.
MultiFileInfo(Int64 bufSize=0, Bool useODirect=False)
Initialize the object and create the buffer with the proper size.
Int64 read(Int fileId, void *buffer, Int64 size, Int64 offset)
Read a block at the given offset.
Bool useODirect() const
Will O_DIRECT be used?
char * data
Data member.
Definition: MultiFileBase.h:61
virtual void fsync()=0
Fsync the file (i.e., force the data to be physically written).
std::shared_ptr< MultiFileBuffer > buffer
Definition: MultiFileBase.h:89
vector< Int64 > blockNrs
Definition: MultiFileBase.h:84
void deleteFile(Int fileId)
Delete a file.
void flush()
Flush the file by writing all dirty data and all header info.
Referenced counted pointer for constant data.
Definition: CountedPtr.h:80
virtual void writeBlock(MultiFileInfo &info, Int64 blknr, const void *buffer)=0
Write a data block.
Helper class for MultiFileInfo holding a data buffer.
Definition: MultiFileBase.h:56
virtual void close()=0
Flush and close the file.
String fileName() const
Get the file name of the MultiFileBase.
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
virtual void doDeleteFile(MultiFileInfo &)=0
Do the class-specific actions on deleting a file.
MultiFileBuffer(size_t bufSize, Bool useODirect)
virtual ~MultiFileBase()
The destructor flushes and closes the file.
virtual void readHeader(Bool always=True)=0
Read the header info.
Int64 write(Int fileId, const void *buffer, Int64 size, Int64 offset)
Write a block at the given offset.
const Bool False
Definition: aipstype.h:44
virtual void extend(MultiFileInfo &info, Int64 lastblk)=0
Extend the virtual file to fit lastblk.
MultiFileBase(const String &name, Int blockSize, Bool useODirect)
Open or create a MultiFileBase with the given name.
CountedPtr< HDF5Group > group
Definition: MultiFileBase.h:90
void allocBuffer(Int64 bufSize, Bool useODirect=False)
Allocate the buffer.
Definition: MultiFileBase.h:81
virtual void readBlock(MultiFileInfo &info, Int64 blknr, void *buffer)=0
Read a data block.
free(pool)
Int64 blockSize() const
Get the block size used.
String: the storage and methods of handling collections of characters.
Definition: String.h:225
vector< Int64 > itsFreeBlocks
AipsIO & operator>>(AipsIO &os, Record &rec)
Definition: Record.h:465
vector< MultiFileInfo > itsInfo
Int fileId(const String &name, Bool throwExcp=True) const
Return the file id of a file in the MultiFileBase object.
std::shared_ptr< MultiFileBuffer > itsBuffer
Bool isWritable() const
Is the file writable?
void writeDirty(MultiFileInfo &info)
MultiFileBuffer & operator=(const MultiFileBuffer &)
virtual void reopenRW()=0
Reopen the underlying file for read/write access.
const Bool True
Definition: aipstype.h:43
virtual void flushFile()=0
Flush the file itself.
virtual void doAddFile(MultiFileInfo &)=0
Do the class-specific actions on adding a file.
virtual void writeHeader()=0
Write the header info.
unsigned int uInt
Definition: aipstype.h:51
void setNewFile()
Set the flags and blockSize for a new MultiFile/HDF5.