ISRWord.cpp

//
// Created by Jake Close on 3/13/18.
//

#include "ISRWord.h"

using namespace std;

ISRWord::ISRWord ( char *word ) : term( word )
	{
	getChunks( );
	currentChunk = 0;
	currentLocation = first( );
	}

// put into util file
vector< size_t > ISRWord::getSeekContents ( string fileName )
	{
	int file = open( fileName.c_str( ), O_RDONLY );
	ssize_t fileSize = FileSize( file );
	vector< size_t > contents;


	char *memMap = ( char * ) mmap( nullptr, fileSize, PROT_READ, MAP_PRIVATE, file, 0 );
	// char* memMap = util::getFileMap(fileName);
	string word = "";
	bool midWord = false;
	bool midFind = false;
	if ( memMap != MAP_FAILED )
		{
		for ( char *map = memMap; map < memMap + fileSize; map++ )
			{
			if ( midFind && isalpha( *map ) )
				{
				break;
				}
			switch ( *map )
				{
				case '\t':
				case '\n':
				case '\r':
				case ' ':
					if ( midFind && word != "" )
						{
						contents.push_back( stoll( word ) );
						word = "";
						}
					else if ( midWord )
						{
						midWord = false;
						if ( word == term )
							{
							midFind = true;
							}
						word = "";
						}
					break;
				default:
					word += *map;
					midWord = true;
				}
			}
		}
	return contents;
	}

void ISRWord::getChunks ( )
	{
	string path = util::GetCurrentWorkingDir( ) + "/index-test-files/twitter/index-master.txt";
	listOfChunks = getSeekContents( path );
//    int chunkFile = open("index-test-files/twitter/index-master.txt", O_RDONLY);
//    ssize_t chunkFileSize = FileSize(chunkFile);
//    char* chunkMemMap = (char*) mmap(nullptr, chunkFileSize, PROT_READ, MAP_PRIVATE, chunkFile, 0);
//    string word = "";
//    bool midWord = false;
//    bool midChunkFind = false;
//    if(chunkMemMap != MAP_FAILED) {
//        for(char* map = chunkMemMap; map < chunkMemMap + chunkFileSize; map++) {
//            if(midChunkFind && isalpha(*map)) {
//                break;
//            }
//            switch(*map) {
//                case '\t':
//                case '\n':
//                case '\r':
//                case ' ':
//                    if (midChunkFind && word != "") {
//                        listOfChunks.push_back(stoll(word));
//                        word = "";
//                    } else if (midWord) {
//                        midWord = false;
//                        if(word == term) {
//                            midChunkFind = true;
//                        }
//                        word = "";
//                    }
//                    break;
//                default:
//                    word += *map;
//                    midWord = true;
//            }
//        }
//    }
	}

//Go to current chunk
//Look in seek dictionary for chunk (mem map, binary search)
//Have offset into chunk, find post seek to post, return value
//update ISR currentLocation
//set current memory map
//returns offset into corpus

Location ISRWord::first ( )
	{
	if ( listOfChunks.size( ) <= currentChunk )
		{
		exit( 0 );
		}
	string currentChunkSeekFileLocation =
			util::GetCurrentWorkingDir( ) + "/index-test-files/twitter/index" + to_string( listOfChunks[ currentChunk ] ) +
			"-seek.txt";
	vector< size_t > location = getSeekContents( currentChunkSeekFileLocation );
	string currentChunkFileLocation =
			util::GetCurrentWorkingDir( ) + "/index-test-files/twitter/index" + to_string( listOfChunks[ currentChunk ] ) +
			".txt";
	int currentChunkFile = open( currentChunkFileLocation.c_str( ), O_RDONLY );
	ssize_t currentChunkFileSize = FileSize( currentChunkFile );
	currentMemMap = ( char * ) mmap( nullptr, currentChunkFileSize, PROT_READ, MAP_PRIVATE, currentChunkFile, 0 );
	currentMemMap += location[ 0 ];
	string firstLoc = "";
	while ( *currentMemMap != ' ' )
		{
		firstLoc += *currentMemMap;
		currentMemMap++;
		}
	currentMemMap++;
	return stoll( firstLoc );
	}

//returns next absolute location in corpus
//looks at memory map
//if new line ( end of current list for that file
//move to next chunk, update chunk
//find new offset, return first location
//else
//find way to increment to next delta
//return new location

Location ISRWord::next ( )
	{
	if ( *currentMemMap == '\n' )
		{
		currentChunk++;
		currentLocation = first( );
		}
	else
		{
		string delta = "";
		while ( *currentMemMap != ' ' )
			{
			delta += *currentMemMap;
			currentMemMap++;
			}
		currentLocation += stoll( delta );
		currentMemMap++;
		}
	return currentLocation;
	}

//look thru each chunk
//check if absolute position at offset in chunk is less then chunk,
//check seek lookup table to find if offset+absulte is bigger than target
//if so, set location to that big chunk
//go to next chunk

Location ISRWord::seek ( Location target )
	{

	}