Newer
Older
#include <math.h>
#define GOV ".gov"
#define COM ".com"
#define EDU ".edu"
#define ORG ".org"
#define NET ".net"
#define MIL ".mil"
#define INT ".int"
private:
string CompleteUrl,
Service,
Host,
Domain,
Path,
AnchorText;
double Score = 0;
// Assumes url points to static text but
// does not check.
char *temp_CompleteUrl,
*temp_Service,
*temp_Host,
*temp_Domain,
*temp_Path,
*temp_AnchorText,
*temp_pathBuffer;
//intialize anchor text to ""
char *null = new char[2];
strcpy( null, string( "" ).c_str( ) );
temp_AnchorText = null;
char *url = new char[input_url.length( ) + 1];
strcpy( url, input_url.c_str( ) );
temp_pathBuffer = new char[strlen( url ) + 1];
char *f, *t;
for ( t = temp_pathBuffer, f = url; ( *t++ = *f++ ); );
const char Colon = ':', Slash = '/', HashTag = '#', Period = '.', QuestionMark = '?';
char *p;
for ( p = temp_pathBuffer; *p && *p != Colon; p++ );
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// Mark the end of the Service.
*p++ = 0;
if ( *p == Slash )
p++;
if ( *p == Slash )
p++;
temp_Host = p;
for ( ; *p && *p != Slash; p++ );
if ( *p )
// Mark the end of the Host.
*p++ = 0;
//char * domainBuffer = new char[ 20 ];
//get the domain:
char *i = temp_Host;
temp_Domain = nullptr;
if(i)
{
for ( ; *i; i++ )
{
if ( *i == Period )
temp_Domain = i;
}
}
// Whatever remains is the Path. // need to remove fragments
temp_Path = p;
for ( ; *p && *p != QuestionMark; p++ );
if ( *p )
// Mark the end of the Path, remove fragments.
*p++ = 0;
CompleteUrl = string(temp_CompleteUrl, strlen(temp_CompleteUrl));
//remove question marks
size_t found = CompleteUrl.find("?");
if(found < CompleteUrl.size( ) )
CompleteUrl.erase(found, CompleteUrl.size( ));
Service = string(temp_Service, strlen(temp_Service));
Host = string(temp_Host, strlen(temp_Host));
if(Service == "http" || Service == "https")
{
if( temp_Domain != nullptr )
Domain = string(temp_Domain, strlen(temp_Domain));
//vector<string> noquestionmakr = splitStr(Path, '?', false);
AnchorText = string(temp_AnchorText, strlen(temp_AnchorText));
pathBuffer = temp_pathBuffer;
else
isValid = false;
}
catch (exception e)
{
cerr << "Error constructing a ParsedUrl from string url "<< endl;
cout << "Complete URL: " << CompleteUrl << endl;
cout << "Service: " << Service << endl;
cout << "Host: " << Host << endl;
cout << "Domain: " << Domain << endl;
cout << "Path: " << Path << endl;
cout << "Score: " << Score << endl;
if(lengthOfUrl > 250)
isValid = false;
if ( Domain == ORG )
else if ( Domain == EDU )
Score += .5;
else if ( Domain == GOV )
else if ( Domain == COM )
Score += .5;
else if ( Domain == NET )
else if ( Domain == INT )
else if ( Domain == MIL )
else
Score += 10;
}
}
std::string getDomain ( )
{
return Domain;
}
std::string getService ( )
{
return Service;
double getScore ( )
{
return Score;
}
void updateScore( double time )
{
Score += time;
pathBuffer = 0;
bool isValid = true;