00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #ifndef LEMUR_FILE_HPP
00020 #define LEMUR_FILE_HPP
00021
00022 #include <vector>
00023 #include <sstream>
00024 #include <fstream>
00025 #include <cassert>
00026
00027
00028 #define FILE_PAGE_SIZE (8192)
00029 #define FILE_1GB (1024 * 1024 * 1024)
00030 #define FILE_2GB_MINUS_1 (FILE_1GB + (FILE_1GB - 1))
00031 #define FILE_MAXIMUM_SEGMENT_SIZE (FILE_2GB_MINUS_1 - FILE_PAGE_SIZE + 1)
00035 class File {
00036 public:
00037 #ifdef WIN32
00038
00039 typedef __int64 offset_type;
00041 typedef int library_offset_type;
00042 #define _UNLINK ::unlink
00043 #else
00044
00045 typedef off_t offset_type;
00047 typedef off_t library_offset_type;
00048 #define _UNLINK std::remove
00049 #endif
00050
00051
00052 private:
00053 struct FileSegment {
00054 std::fstream stream;
00055 offset_type start;
00056 offset_type end;
00057
00058 bool contains( offset_type position ) {
00059 return start <= position && end > position;
00060 }
00061
00062 bool before( offset_type position ) {
00063 return end <= position;
00064 }
00065 };
00066
00067 std::string _fileName;
00068 std::vector<FileSegment*> _segments;
00069
00070 FileSegment* _readSegment;
00071 FileSegment* _writeSegment;
00072 offset_type _readPosition;
00073 offset_type _writePosition;
00074 offset_type _readCount;
00075 bool _readPointerValid;
00076 bool _writePointerValid;
00077 int _mode;
00078 int _state;
00079
00080 static std::string segmentName( const std::string& fileName, int segment ) {
00081 std::stringstream segName;
00082 segName << fileName << "$" << segment;
00083 return segName.str();
00084 }
00085
00086 void _appendSegment() {
00087 FileSegment* segment = new FileSegment();
00088 int number = (int)_segments.size();
00089 std::string name = segmentName( _fileName, number );
00090
00091 segment->start = size();
00092 segment->end = size();
00093 segment->stream.open( name.c_str(), std::ios::out | std::ios::binary );
00094
00095 _segments.push_back( segment );
00096 }
00097
00098 offset_type _absolutePosition( offset_type relativePosition,
00099 offset_type currentPosition,
00100 std::fstream::seekdir direction ) const {
00101 offset_type newPosition;
00102
00103 switch( direction ) {
00104 case std::fstream::beg:
00105 newPosition = relativePosition;
00106 break;
00107
00108 case std::fstream::cur:
00109 newPosition = currentPosition + relativePosition;
00110 break;
00111
00112 case std::fstream::end:
00113 newPosition = size() + relativePosition;
00114 break;
00115 }
00116
00117
00118
00119 return newPosition;
00120 }
00121
00122 FileSegment* _segmentForPosition( offset_type absolutePosition,
00123 FileSegment* guess ) {
00124
00125
00126
00127
00128
00129 if( guess->contains( absolutePosition ) ) {
00130 return guess;
00131 }
00132
00133 std::vector<FileSegment*>::iterator low = _segments.begin();
00134 std::vector<FileSegment*>::iterator high = _segments.end() - 1;
00135 std::vector<FileSegment*>::iterator middle;
00136
00137
00138
00139 if( (*high)->end <= absolutePosition ) {
00140 return *high;
00141 }
00142
00143
00144
00145 while( high - low > 1 ) {
00146 middle = low+((high-low)/2);
00147
00148 if( (*middle)->before(absolutePosition) ) {
00149 low = middle;
00150 } else {
00151 high = middle;
00152 }
00153 }
00154
00155 if( (*high)->contains(absolutePosition) )
00156 return *high;
00157 else
00158 return *low;
00159 }
00160
00161 void _validateReadPointer() {
00162 if( !_readPointerValid ) {
00163 _readSegment = _segmentForPosition( _readPosition, _readSegment );
00164 library_offset_type toHere = _readPosition - _readSegment->start;
00165 _readSegment->stream.seekg( toHere , std::ios::beg );
00166 _readPointerValid = true;
00167 _writePointerValid = false;
00168 }
00169
00170
00171
00172
00173 }
00174
00175 void _validateWritePointer() {
00176 if( !_writePointerValid ) {
00177 _writeSegment = _segmentForPosition( _writePosition, _writeSegment );
00178 library_offset_type toHere = _writePosition - _writeSegment->start;
00179 _writeSegment->stream.seekp( toHere , std::ios::beg );
00180 _writePointerValid = true;
00181 _readPointerValid = false;
00182 }
00183
00184
00185
00186
00187 }
00188
00189 public:
00190 File() {
00191 }
00192
00193 ~File() {
00194 close();
00195 }
00198 void open( const std::string& fileName, int mode ) {
00199 close();
00200
00201 FileSegment* segment;
00202 _readPosition = 0;
00203 _writePosition = 0;
00204 _readCount = 0;
00205
00206 _readPointerValid = false;
00207 _writePointerValid = false;
00208
00209 _mode = mode | std::fstream::binary;
00210 _state = std::fstream::goodbit;
00211 _fileName = fileName;
00212
00213
00214
00215 for( int i=0; ; i++ ) {
00216 segment = new FileSegment();
00217 std::string name = segmentName( _fileName, i );
00218
00219
00220
00221
00222
00223
00224 segment->stream.open( name.c_str(),
00225 std::ifstream::in | std::ifstream::binary );
00226
00227 if( segment->stream.rdstate() & std::fstream::failbit ) {
00228
00229 delete segment;
00230 break;
00231 } else if ( mode & std::fstream::trunc ) {
00232
00233
00234 segment->stream.close();
00235 delete segment;
00236
00237
00238
00239 _UNLINK(name.c_str());
00240 } else {
00241
00242
00243
00244 if( mode & std::fstream::out ) {
00245 segment->stream.close();
00246 segment->stream.open( name.c_str(),
00247
00248
00249 #ifdef WIN32
00250 mode & (std::fstream::binary |
00251 std::fstream::in | std::fstream::out) );
00252 #else
00253 std::_Ios_Openmode(mode) &
00254 (std::fstream::binary | std::fstream::in |
00255 std::fstream::out) );
00256 #endif
00257 if( segment->stream.rdstate() & std::fstream::failbit ) {
00258 delete segment;
00259 break;
00260 }
00261 }
00262
00263
00264 offset_type length;
00265
00266
00267
00268 segment->stream.seekg( 0, std::fstream::end );
00269 length = segment->stream.tellg();
00270
00271 segment->start = _readPosition;
00272 segment->end = _readPosition + length;
00273 _readPosition += length;
00274
00275 _segments.push_back( segment );
00276 }
00277 }
00278
00279
00280 if( _segments.size() == 0 && (mode & std::fstream::out) ) {
00281 _appendSegment();
00282 }
00283
00284
00285 if( _segments.size() == 0 ) {
00286 _state |= std::fstream::failbit;
00287 } else {
00288
00289 _readPosition = 0;
00290 _writePosition = 0;
00291 _readSegment = _segments[0];
00292 _writeSegment = _segments[0];
00293
00294
00295
00296 if( mode & (std::fstream::ate|std::fstream::app) ) {
00297 seekg( 0, std::fstream::end );
00298 seekp( 0, std::fstream::end );
00299 } else {
00300 seekg( 0, std::fstream::beg );
00301 seekp( 0, std::fstream::beg );
00302 }
00303 }
00304 }
00305
00307 void close() {
00308 std::vector<FileSegment*>::iterator iter;
00309
00310 for( iter = _segments.begin(); iter != _segments.end(); iter++ ) {
00311 (*iter)->stream.close();
00312 delete (*iter);
00313 }
00314
00315 _segments.clear();
00316 }
00318 void read( void* buffer, offset_type count ) {
00319 _validateReadPointer();
00320
00321 offset_type readAmount;
00322 _readCount = 0;
00323
00324 while( ((count - _readCount + _readPosition) >= _readSegment->end) &&
00325 _readPosition != size() ) {
00326 readAmount = _readSegment->end - _readPosition;
00327 _readSegment->stream.read( (char*)buffer + _readCount, library_offset_type(readAmount) );
00328
00329 _readPosition += readAmount;
00330 _readCount += readAmount;
00331 _readPointerValid = false;
00332 _validateReadPointer();
00333 }
00334
00335 if( _readPosition != size() ) {
00336 _readSegment->stream.read( (char*)buffer + _readCount, library_offset_type(count - _readCount) );
00337 readAmount = library_offset_type(_readSegment->stream.gcount());
00338 _readPosition += readAmount;
00339 _readCount += readAmount;
00340 }
00341
00342
00343 }
00345 void write( const void* buffer, offset_type count ) {
00346 offset_type bytesWritten = 0;
00347 offset_type writeAmount = 0;
00348 _validateWritePointer();
00349
00350 while( ((count - bytesWritten) + (_writePosition - _writeSegment->start)) >= FILE_MAXIMUM_SEGMENT_SIZE ) {
00351 writeAmount = FILE_MAXIMUM_SEGMENT_SIZE - (_writePosition - _writeSegment->start);
00352
00353 _writeSegment->stream.write( (const char*)buffer + bytesWritten, library_offset_type(writeAmount) );
00354 _writePosition += writeAmount;
00355 bytesWritten += writeAmount;
00356
00357 if( _writePosition > _writeSegment->end ) {
00358 _writeSegment->end = _writePosition;
00359 }
00360
00361 _appendSegment();
00362 _writePointerValid = false;
00363 _validateWritePointer();
00364 }
00365
00366 writeAmount = count - bytesWritten;
00367 _writeSegment->stream.write( (const char*)buffer + bytesWritten, library_offset_type(writeAmount) );
00368 _writePosition += writeAmount;
00369 bytesWritten += writeAmount;
00370
00371 if( _writePosition > _writeSegment->end ) {
00372 _writeSegment->end = _writePosition;
00373 }
00374
00375
00376 }
00378 void seekg( offset_type relativePosition, std::fstream::seekdir direction ) {
00379 _readPosition = _absolutePosition( relativePosition, _readPosition, direction );
00380 _readPointerValid = false;
00381 }
00383 void seekp( offset_type relativePosition, std::fstream::seekdir direction ) {
00384 _writePosition = _absolutePosition( relativePosition, _writePosition, direction );
00385 _writePointerValid = false;
00386 }
00388 offset_type tellg() {
00389 return _readPosition;
00390 }
00392 offset_type tellp() {
00393 return _writePosition;
00394 }
00396 offset_type gcount() {
00397 return _readCount;
00398 }
00399
00401 int rdstate() {
00402 if( size() == _readPosition )
00403 return _state | std::fstream::eofbit;
00404 else
00405 return _state;
00406 }
00408 offset_type size() const {
00409 if( _segments.size() == 0 ) {
00410 return 0;
00411 } else {
00412 return _segments[ _segments.size()-1 ]->end;
00413 }
00414 }
00416 void unlink() {
00417 close();
00418 File::unlink( _fileName );
00419 _fileName = "";
00420 }
00421
00423 static void unlink( const std::string& fileName ) {
00424 for( int i=0; ; i++ ) {
00425 std::string segment = segmentName( fileName, i );
00426 if (_UNLINK( segment.c_str() ) != 0 ) {
00427
00428
00429 break;
00430 }
00431 }
00432 }
00434 static void rename( const std::string& oldName, const std::string& newName ) {
00435 for( int i=0; ; i++ ) {
00436 std::string oldSegment = segmentName( oldName, i );
00437 std::string newSegment = segmentName( newName, i );
00438
00439 if( ::rename( oldSegment.c_str(), newSegment.c_str() ) != 0 ) {
00440 break;
00441 }
00442 }
00443 }
00444 };
00445
00446 #endif // LEMUR_FILE_HPP