00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef LEMUR_FILE_HPP
00020 #define LEMUR_FILE_HPP
00021
00022 #include <vector>
00023 #include <sstream>
00024 #include <fstream>
00025 #include <cassert>
00026
00027
00028 #define FILE_PAGE_SIZE (8192)
00029 #define FILE_1GB (1024 * 1024 * 1024)
00030 #define FILE_2GB_MINUS_1 (FILE_1GB + (FILE_1GB - 1))
00031 #define FILE_MAXIMUM_SEGMENT_SIZE (FILE_2GB_MINUS_1 - FILE_PAGE_SIZE + 1)
00035 class File {
00036 public:
00037 #ifdef WIN32
00038
00039 typedef __int64 offset_type;
00041 typedef int library_offset_type;
00042 #define _UNLINK ::unlink
00043 #else
00044
00045 typedef off_t offset_type;
00047 typedef off_t library_offset_type;
00048 #define _UNLINK std::remove
00049 #endif
00050
00051
00052 private:
00053 struct FileSegment {
00054 std::fstream stream;
00055 offset_type start;
00056 offset_type end;
00057
00058 bool contains( offset_type position ) {
00059 return start <= position && end > position;
00060 }
00061
00062 bool before( offset_type position ) {
00063 return end <= position;
00064 }
00065 };
00066
00067 std::string _fileName;
00068 std::vector<FileSegment*> _segments;
00069
00070 FileSegment* _readSegment;
00071 FileSegment* _writeSegment;
00072 offset_type _readPosition;
00073 offset_type _writePosition;
00074 offset_type _readCount;
00075 bool _readPointerValid;
00076 bool _writePointerValid;
00077 int _mode;
00078 int _state;
00079
00080 static std::string segmentName( const std::string& fileName, int segment ) {
00081 std::stringstream segName;
00082 segName << fileName << "$" << segment;
00083 return segName.str();
00084 }
00085
00086 void _appendSegment() {
00087 FileSegment* segment = new FileSegment();
00088 int number = (int)_segments.size();
00089 std::string name = segmentName( _fileName, number );
00090 segment->start = size();
00091 segment->end = size();
00092 segment->stream.open( name.c_str(), std::ios::out | std::ios::binary );
00093
00094 segment->stream.close();
00095 segment->stream.open( name.c_str(), std::ios::out | std::ios::binary | std::ios::in );
00096 _segments.push_back( segment );
00097 }
00098
00099
00100 offset_type _absolutePosition( offset_type relativePosition,
00101 offset_type currentPosition,
00102 std::fstream::seekdir direction ) const {
00103 offset_type newPosition;
00104
00105 switch( direction ) {
00106 case std::fstream::beg:
00107 newPosition = relativePosition;
00108 break;
00109
00110 case std::fstream::cur:
00111 newPosition = currentPosition + relativePosition;
00112 break;
00113
00114 case std::fstream::end:
00115 newPosition = size() + relativePosition;
00116 break;
00117 default:
00118 break;
00119 }
00120
00121 assert( newPosition <= size() );
00122
00123 return newPosition;
00124 }
00125
00126 FileSegment* _segmentForPosition( offset_type absolutePosition,
00127 FileSegment* guess ) {
00128 assert( absolutePosition <= size() );
00129 assert( absolutePosition >= 0 );
00130 assert( _segments.size() );
00131
00132
00133 if( guess->contains( absolutePosition ) ) {
00134 return guess;
00135 }
00136
00137 std::vector<FileSegment*>::iterator low = _segments.begin();
00138 std::vector<FileSegment*>::iterator high = _segments.end() - 1;
00139 std::vector<FileSegment*>::iterator middle;
00140
00141
00142
00143 if( (*high)->end <= absolutePosition ) {
00144 return *high;
00145 }
00146
00147 assert( _segments.size() > 1 );
00148
00149 while( high - low > 1 ) {
00150 middle = low+((high-low)/2);
00151
00152 if( (*middle)->before(absolutePosition) ) {
00153 low = middle;
00154 } else {
00155 high = middle;
00156 }
00157 }
00158
00159 if( (*high)->contains(absolutePosition) )
00160 return *high;
00161 else
00162 return *low;
00163 }
00164
00165 void _validateReadPointer() {
00166 if( !_readPointerValid ) {
00167 _readSegment = _segmentForPosition( _readPosition, _readSegment );
00168 library_offset_type toHere = _readPosition - _readSegment->start;
00169 _readSegment->stream.seekg( toHere , std::ios::beg );
00170 _readPointerValid = true;
00171 _writePointerValid = false;
00172 }
00173
00174 assert( _readPosition <= _readSegment->end );
00175 assert( _readPosition >= _readSegment->start );
00176 assert( _readPosition == ( library_offset_type(_readSegment->stream.tellg()) + _readSegment->start) );
00177 }
00178
00179 void _validateWritePointer() {
00180 if( !_writePointerValid ) {
00181 _writeSegment = _segmentForPosition( _writePosition, _writeSegment );
00182 library_offset_type toHere = _writePosition - _writeSegment->start;
00183 _writeSegment->stream.seekp( toHere , std::ios::beg );
00184 _writePointerValid = true;
00185 _readPointerValid = false;
00186 }
00187
00188 assert( _writePosition <= _writeSegment->end );
00189 assert( _writePosition >= _writeSegment->start );
00190 assert( _writePosition == ( library_offset_type(_writeSegment->stream.tellp()) + _writeSegment->start) );
00191 }
00192
00193 public:
00194 File() {
00195 }
00196
00197 ~File() {
00198 close();
00199 }
00202 void open( const std::string& fileName, int mode ) {
00203 close();
00204
00205 FileSegment* segment;
00206 _readPosition = 0;
00207 _writePosition = 0;
00208 _readCount = 0;
00209
00210 _readPointerValid = false;
00211 _writePointerValid = false;
00212
00213 _mode = mode | std::fstream::binary;
00214 _state = std::fstream::goodbit;
00215 _fileName = fileName;
00216
00217
00218
00219 for( int i=0; ; i++ ) {
00220 segment = new FileSegment();
00221 std::string name = segmentName( _fileName, i );
00222
00223
00224
00225
00226
00227
00228 segment->stream.open( name.c_str(),
00229 std::ifstream::in | std::ifstream::binary );
00230
00231 if( segment->stream.rdstate() & std::fstream::failbit ) {
00232
00233 delete segment;
00234 break;
00235 } else if ( mode & std::fstream::trunc ) {
00236
00237
00238 segment->stream.close();
00239 delete segment;
00240 _UNLINK(name.c_str());
00241 } else {
00242
00243
00244 if( mode & std::fstream::out ) {
00245 segment->stream.close();
00246 segment->stream.open( name.c_str(),
00247
00248 #ifdef WIN32
00249 mode & (std::fstream::binary |
00250 std::fstream::in | std::fstream::out) );
00251 #else
00252 std::_Ios_Openmode(mode) &
00253 (std::fstream::binary | std::fstream::in |
00254 std::fstream::out) );
00255 #endif
00256 if( segment->stream.rdstate() & std::fstream::failbit ) {
00257 delete segment;
00258 break;
00259 }
00260 }
00261
00262
00263 offset_type length;
00264
00265 segment->stream.seekg( 0, std::fstream::end );
00266 length = segment->stream.tellg();
00267
00268 segment->start = _readPosition;
00269 segment->end = _readPosition + length;
00270 _readPosition += length;
00271
00272 _segments.push_back( segment );
00273 }
00274 }
00275
00276
00277 if( _segments.size() == 0 && (mode & std::fstream::out) ) {
00278 _appendSegment();
00279 }
00280
00281
00282 if( _segments.size() == 0 ) {
00283 _state |= std::fstream::failbit;
00284 } else {
00285
00286 _readPosition = 0;
00287 _writePosition = 0;
00288 _readSegment = _segments[0];
00289 _writeSegment = _segments[0];
00290
00291
00292
00293 if( mode & (std::fstream::ate|std::fstream::app) ) {
00294 seekg( 0, std::fstream::end );
00295 seekp( 0, std::fstream::end );
00296 } else {
00297 seekg( 0, std::fstream::beg );
00298 seekp( 0, std::fstream::beg );
00299 }
00300 }
00301 }
00302
00304 void close() {
00305 std::vector<FileSegment*>::iterator iter;
00306
00307 for( iter = _segments.begin(); iter != _segments.end(); iter++ ) {
00308 (*iter)->stream.close();
00309 delete (*iter);
00310 }
00311
00312 _segments.clear();
00313 }
00315 void read( void* buffer, offset_type count ) {
00316 _validateReadPointer();
00317 offset_type readAmount;
00318 _readCount = 0;
00319
00320 while( ((count - _readCount + _readPosition) >= _readSegment->end) &&
00321 _readPosition != size() ) {
00322 readAmount = _readSegment->end - _readPosition;
00323 _readSegment->stream.read( (char*)buffer + _readCount, library_offset_type(readAmount) );
00324
00325 _readPosition += readAmount;
00326 _readCount += readAmount;
00327 _readPointerValid = false;
00328 _validateReadPointer();
00329 }
00330
00331 if( _readPosition != size() ) {
00332 _readSegment->stream.read( (char*)buffer + _readCount, library_offset_type(count - _readCount) );
00333 readAmount = library_offset_type(_readSegment->stream.gcount());
00334 _readPosition += readAmount;
00335 _readCount += readAmount;
00336 }
00337
00338 assert((int)_readSegment->stream.tellg() != -1 || size() == _readPosition );
00339 }
00341 void write( const void* buffer, offset_type count ) {
00342 offset_type bytesWritten = 0;
00343 offset_type writeAmount = 0;
00344 _validateWritePointer();
00345
00346 while( ((count - bytesWritten) + (_writePosition - _writeSegment->start)) >= FILE_MAXIMUM_SEGMENT_SIZE ) {
00347 writeAmount = FILE_MAXIMUM_SEGMENT_SIZE - (_writePosition - _writeSegment->start);
00348
00349 _writeSegment->stream.write( (const char*)buffer + bytesWritten, library_offset_type(writeAmount) );
00350 _writePosition += writeAmount;
00351 bytesWritten += writeAmount;
00352
00353 if( _writePosition > _writeSegment->end ) {
00354 _writeSegment->end = _writePosition;
00355 }
00356
00357 _appendSegment();
00358 _writePointerValid = false;
00359 _validateWritePointer();
00360 }
00361
00362 writeAmount = count - bytesWritten;
00363 _writeSegment->stream.write( (const char*)buffer + bytesWritten, library_offset_type(writeAmount) );
00364 _writePosition += writeAmount;
00365 bytesWritten += writeAmount;
00366
00367 if( _writePosition > _writeSegment->end ) {
00368 _writeSegment->end = _writePosition;
00369 }
00370
00371 assert( (int)_writeSegment->stream.tellp() != -1 );
00372 }
00374 void seekg( offset_type relativePosition, std::fstream::seekdir direction ) {
00375 _readPosition = _absolutePosition( relativePosition, _readPosition, direction );
00376 _readPointerValid = false;
00377 }
00379 void seekp( offset_type relativePosition, std::fstream::seekdir direction ) {
00380 _writePosition = _absolutePosition( relativePosition, _writePosition, direction );
00381 _writePointerValid = false;
00382 }
00384 offset_type tellg() {
00385 return _readPosition;
00386 }
00388 offset_type tellp() {
00389 return _writePosition;
00390 }
00392 offset_type gcount() {
00393 return _readCount;
00394 }
00395
00397 int rdstate() {
00398 if( size() == _readPosition )
00399 return _state | std::fstream::eofbit;
00400 else
00401 return _state;
00402 }
00404 offset_type size() const {
00405 if( _segments.size() == 0 ) {
00406 return 0;
00407 } else {
00408 return _segments[ _segments.size()-1 ]->end;
00409 }
00410 }
00412 void unlink() {
00413 close();
00414 File::unlink( _fileName );
00415 _fileName = "";
00416 }
00417
00419 static void unlink( const std::string& fileName ) {
00420 for( int i=0; ; i++ ) {
00421 std::string segment = segmentName( fileName, i );
00422 if (_UNLINK( segment.c_str() ) != 0 ) {
00423 break;
00424 }
00425 }
00426 }
00428 static void rename( const std::string& oldName, const std::string& newName ) {
00429 for( int i=0; ; i++ ) {
00430 std::string oldSegment = segmentName( oldName, i );
00431 std::string newSegment = segmentName( newName, i );
00432
00433 if( ::rename( oldSegment.c_str(), newSegment.c_str() ) != 0 ) {
00434 break;
00435 }
00436 }
00437 }
00438 };
00439
00440 #endif // LEMUR_FILE_HPP