Skip to content
Snippets Groups Projects
Commit 9540c57d authored by jsclose's avatar jsclose
Browse files

working on fixing CMAKE file, also created all of the constraint file code outlines from lecture

parent ac7893d4
No related branches found
No related tags found
No related merge requests found
Showing
with 306 additions and 26 deletions
......@@ -3,18 +3,10 @@ project(eecs398_search)
set(CMAKE_CXX_STANDARD 11)
add_executable(parser-test
main.cpp
shared/ProducerConsumerQueue.cpp
shared/ProducerConsumerQueue.h
parser/Parser.h
util/Tokenizer.h
util/stringProcessing.h
util/Stemmer.h)
add_executable(crawl
add_executable(crawler-parser-test
main.cpp
shared/ProducerConsumerQueue.h
shared/ThreadClass.h
......@@ -27,12 +19,13 @@ add_executable(crawl
crawler/LocalReader.h
shared/Document.cpp
parser/Parser.cpp
util/stringProcessing.cpp indexStreamReader/ISR.cpp indexStreamReader/ISR.h)
util/stringProcessing.cpp
)
add_executable(StringProcessingTest
util/stringProcessing.h
util/stringProcessing.cpp
util/Stemmer.h
util/tests/stringProcessingTest.cpp)
......@@ -45,9 +38,12 @@ add_executable(StemmerTest
util/tests/stemmerTest.cpp)
add_executable(ParserEndToEndTest
parser/Parser.h
shared/Document.h
parser/Parser.cpp
shared/Document.cpp
shared/url.h
util/util.cpp
util/Tokenizer.h
util/stringProcessing.cpp
parser/tests/parserTest.cpp)
......@@ -58,5 +54,5 @@ add_executable(search-engine search.cpp query/Query.cpp)
find_package(OpenSSL REQUIRED)
target_link_libraries(crawl OpenSSL::SSL pthread)
target_link_libraries(crawler-parser-test OpenSSL::SSL pthread)
File added
File added
File moved
//
// Created by Jake Close on 3/13/18.
//
#pragma once
#include "Post.h"
class ISR
{
public:
//
ISR* DocumentEnd;
virtual Post* Next( );
virtual Post* NextDocument( );
virtual Post* Seek( Location target);
virtual Location GetStartLocation( );
virtual Location GetEndLocation( );
virtual ISR *GetDocumentISR( );
};
......@@ -2,15 +2,4 @@
// Created by Jake Close on 3/13/18.
//
#pragma once
class ISR
{
};
#include "ISRAnd.h"
//
// Created by Jake Close on 3/13/18.
//
#pragma once
#include "ISR.h"
//Find occurrences of all child ISRs within a single document
class ISRAnd : ISR
{
public:
ISR **Terms;
unsigned NumberOfTerms;
Post *Seek( Location target )
{
// 1. Seek all the ISRs to the first occurrence beginning at
// the target location.
// 2. Move the document end ISR to just past the furthest
// word, then calculate the document begin location.
// 3. Seek all the other terms to past the document begin.
// 4. If any term is past the document end, return to
// step 2.
// 5. If any ISR reaches the end, there is no match.
}
Post *Next( )
{
return Seek( nearestStartLocation + 1 );
}
private:
unsigned nearestTerm, farthestTerm;
Location nearestStartLocation, nearestEndLocation;
};
//
// Created by Jake Close on 3/13/18.
//
#include "ISRContainer.h"
//
// Created by Jake Close on 3/13/18.
//
#pragma once
#include "ISR.h"
#include "ISREndDoc.h"
//Find occurrences of contained ISRs in a single document not containing any excluded ISRs.
class ISRContainer : ISR
{
public:
ISR **Contained,
*Excluded; ISREndDoc *EndDoc; unsigned CountContained,
CountExcluded; Location Next( );
Post *Seek( Location target )
{
// 1. Seek all the included ISRs to the first occurrence beginning at
// the target location.
// 2. Move the document end ISR to just past the furthest
// contained ISR, then calculate the document begin location.
// 3. Seek all the other contained terms to past the document begin.
// 4. If any contained erm is past the document end, return to
// step 2.
// 5. If any ISR reaches the end, there is no match.
// 6. Seek all the excluded ISRs to the first occurrence beginning at
// the document begin location.
// 7. If any excluded ISR falls within the document, reset the
// target to one past the end of the document and return to
// step 1.
};
Post *Next( )
{
Seek( Contained[ nearestContained ]->GetStartlocation( ) + 1 );
}
private:
unsigned nearestTerm, farthestTerm;
Location nearestStartLocation, nearestEndLocation;
};
};
//
// Created by Jake Close on 3/13/18.
//
#include "ISREndDoc.h"
//
// Created by Jake Close on 3/13/18.
//
#pragma once
#include "ISR.h"
// Find occurrences of document ends.
class ISREndDoc : ISR
{
public:
Location GetCurrentLocation( );
Location GetPreviousLocation( );
unsigned GetDocumentLength( );
unsigned GetTitleLength( );
unsigned GetUrlLength( );
};
//
// Created by Jake Close on 3/13/18.
//
#include "ISROr.h"
//
// Created by Jake Close on 3/13/18.
//
#pragma once
#include "ISR.h"
// Find occurrences of any child ISR.
class ISROr : publicISR
{
public:
ISR ** Terms;
unsigned NumberOfTerms;
Location GetStartLocation( );//{return nearestStartLocation;}
Location GetEndLocation( );// {return nearestEndLocation;}
Post*Seek( Location target);
// Seek all the ISRs to the first occurrence beginning at// the target location. Return null if there is no match.
// The document is the document containing the nearest term.
Post*Next( );//{ Do a next on the nearest term, then return// the new nearest match.}
Post*NextDocument( );
//{ Seek all the ISRs to the first occurrence just past the end of this document.returnSeek( DocumentEnd->GetEndLocation( ) + 1 );}
private:
unsigned nearestTerm;
// nearStartLocation and nearestEndLocation are// the start and end of the nearestTerm.
Location nearestStartLocation, nearestEndLocation;
};
//
// Created by Jake Close on 3/13/18.
//
#include "ISRPhrase.h"
//
// Created by Jake Close on 3/13/18.
//
#pragma once
#include "ISR.h"
// Find occurrences of all child ISRs as a phrase
class ISRPhrase : ISR
{
public:
ISR **Terms;
unsigned NumberOfTerms;
Post *Seek( Location target )
{
// 1. Seek all ISRs to the first occurrence beginning at
// the target location.
// 2. Pick the furthest term and attempt to seek all
// the other terms to the first location beginning
// where they should appear relative to the furthest
// term.
// 3. If any term is past the desired location, return
// to step 2.
// 4. If any ISR reaches the end, there is no match.
}
Post *Next( )
{
// Finds overlapping phrase matches. return Seek( nearestStartLocation + 1 );
}
};
//
// Created by Jake Close on 3/13/18.
//
#include "ISRWord.h"
//
// Created by Jake Close on 3/13/18.
//
#pragma once
#include "ISR.h"
//Find occurrences of individual words
class ISRWord : ISR
{
public:
unsigned GetDocumentCount( );
unsigned GetNumberOfOccurrences( );
virtual Post *GetCurrentPost( );
};
//
// Created by Jake Close on 3/13/18.
//
#include "Post.h"
//
// Created by Jake Close on 3/13/18.
//
#pramga once
typedef size_t Location; // Location 0 is the null location.
typedef size_t FileOffset;
typedef union Attributes
{
WordAttributes Word;
DocumentAttributes Document;
};
class Post
{
public:
//What Start / End Location
virtual Location GetStartLocation( );
virtual Location GetEndLocation( );
virtual Attributes GetAttributes( );
//Returns Next next delta
//TO-DO NICK/ZANE
virtual Post* Next( );
//*why?
virtual ISR* GetIsr( );
};
//
// Created by Jake Close on 3/13/18.
//
#include "PostingList.h"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment