From af2b73d2911c12b0a520854f64fc7994f2e373fd Mon Sep 17 00:00:00 2001 From: jsclose <jsclose@umich.edu> Date: Mon, 5 Mar 2018 20:43:15 -0500 Subject: [PATCH] implemented score element to url based off of length of url and domain --- CMakeLists.txt | 2 ++ URLTEST | Bin 0 -> 23988 bytes makefile | 44 +++++++++++++++++++++++++++++++++ shared/ProducerConsumerQueue.h | 4 ++- shared/url.h | 40 ++++++++++++++++++++++++++++-- shared/urlTest.cpp | 19 +++++++------- 6 files changed, 97 insertions(+), 12 deletions(-) create mode 100755 URLTEST diff --git a/CMakeLists.txt b/CMakeLists.txt index 5dcc721..0519d88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,8 @@ add_executable(ParserEndToEndTest parser/tests/parserTest.cpp) +add_executable(URLTEST shared/url.h shared/urlTest.cpp) + find_package(OpenSSL REQUIRED) diff --git a/URLTEST b/URLTEST new file mode 100755 index 0000000000000000000000000000000000000000..e4798c6edbdcb8295d737208bf37be67f1373d99 GIT binary patch literal 23988 zcmeHP4R9RAm7cY1EE8a5j!T@MgH5nO=0_{p*vKY6WLpd4Lnex4C62)vtyd%M!b&T4 zSF#K!UL)<5#+wD@3Uf{jDUr(MTuClb668}Qkt4umE99y$2}!uC+~NDnjZ#iU$6bO# z2)gg<?$OSU<b()wS68Jeb-#Yy@4bHg`pui}p3y%4+Rxuus%gFwP0K9OG_4FNaFM2E zGw(-9)837wBN;~h=G#NNLpwgi+TzPsjLwNs&Nw<SjGdufJBvvKe`Rr<rf}${EkP<( z&4!USyVC_)ue^cFF&H$jULk3SFD{T1$+%I&NToaXre><V^1gYUl=n?V;ouHKsgehi ziA%~eqGr6qOrpjsFMYj~cTUk0r`>0+S^Bj`B8J%=F+0*mI=t5_ujvLUuU<_8@gjLM zf@Q%lB8m1?!GK<QLpLgUijR1a{btKcrIWGtR#oqnr&UOK%N-{H?!LUb8b)XPzF2#+ z5o>QrfY2-NilCG?po9?j%5cj!3^Qu9B*Sf{to6z}rsVBXOvG_7ocE$1S7R)OQM-BP zW)~U!RSdCA)hYiKHx#Pq$CT^HHjMVTk=oz3HxV~F(#iA6bHwp}aCq4R{p~6i)MXg0 ziNdVT)~^iw)KA?O8gZ|@_dCJ>4I>`@UHX+Pc~gpqxLY2_yV51E5J&2^-FZi-cAFCe zYLwjqiWoSrr~X7j$`N*zsP+NGDRiZ2H8LiY0uBN_1~>r1>Mfc!1V!wSrZoYlZ*a^_ zw`<w~z`N@;ZP%rm_70R$6xVL#Ov{iWNbf^B-&Ox1P1`z0<7KD|AYFnK>`Wzt@z~yA z^Zs}<a9;04q8{1DHq}0v-0;E|AN|;lFUN2F5^Yq5q+NQkNC-EM%dl1}$9T!dIOe|C z5ADD<j!|32(fbO0qyNbD*H=_kK#AC1p;xt=Ui&xP*2VM<`%`JNtzwH}p;YY}B<iQb ze(Y<t!fDTiYrz5w7Fe*rf&~^Vuwa1&3oKY*!2$~wSg^obu|T74{kwkfTp9K>*60VP zr>AvmxRM}0eD)22=>l7_6+0^3%GMCeWkqE!FEA^+OYr6EWt+sZO#;gcVCAC{6Tr%M zNK61L-%afFX0a^;6h}|`X>701V@^u7o`+uLLZNIxfcy#o`%~{C&8Vo#4YNwOp0N+I zHa|K=G$+{Xr!c??Au&J7f!VPFjeRZ3sxg}pGD7`&e#wFNaLA#Io&%cOo<aGDZfzd` z4j39l-5~H|fX4ub0EYla&N~*cav<3<(MP_NdOZJ5F#=JQ8)g+p@Ez0&jU1hiupvK8 z?Q9#Saq30I6O@r3C5?TT!>Q-9l|ou*9Amjuj%5tGk3-Kf=>HOQJp&vtbQX1If!lyK z;3VKA;MDoYaz>6t6Mf{Tev7_K<XA*iZkSaZ%YXf`XDm8d^25~5exGYBqT&TmV=Sbx zA8|OHimzZ6oq>K%mO07xV<=aR=?72JkM}$alC!9`UVyD%Vj=$`sk$icD4f94F!2CA zEk8<IdYbGl<hg4Oo?9djb@uXHMxOV|c8<f_&iQjj@e^4Vqd3EIZj>6^4_~2a{;O&< zaewxnE7Pr>DLvOSWq*%Ey(i1|JnO%zR+H@q&keG-+>)>0*hkN=>%hqY7><Aej@kpS zJ8cg`pq}f5^R6fQ1)}+No0gF>1o!P61og4$Mu1JNQcSfF6Y973&+vAYk~t=sd4L1v z5irB|dq$=k0XAJHnU_fBjQzm^Ge&XRzCkiSi`yK`<6s^G^Z0Zlz^0#*E5Gh}-kesJ zfx&+Lk5Cqhcda1*IpX<sy^<f!+j$23FHJWBZ2B9?e?;;#zS!R`@CV5MWy!x)^26CX z&w}5cZUorWCixA?&uC>osQ6#;m)%xQ0o$d3MN+^N1WZD})O6z{R2Jdx;7QI0XK42P zJ%EFy+4INaWZfDqXC%x?pyF^YTggf*TP@loaN@8Ez<FJ?NnoisEP(&`^4@>(^}Yt- z!;|nU^kEB-<qhKR6JDGX2)dXfEazl&wh;Xo0dw2n0C{@o#Ae}g4yy?Ixx4_D4Enh_ zVn6hG`Zb>(c}_{eqlCir-Z39d`vDrq$_AtvB*;p=MF{RkNE(15@aXOEZX^pKI0vV- z8WDz(Ec#&MkpK9a-k<t<-=I;?hymxv#ei|#tf5mP@V>K<aTYR)!(MJ195p`%l8KE) z<Rc&-0r?o<7$76*xZe9TbY~<*vbMhj2&WY6=!jC#(G)xwN$6rXEvF8{YLs$5Zst== zPV@*^>@B2GR=GsdOTybK^&Ao&BGMle(+Y_RU_C8$$`d<O1}y~bOf|9ovdn9mzmKna zCQeFTAwQ=>wqANF>qq6rQ`yS^f~T^V0$hD6dkH{=E@<)^%2i|iPwTnEEM_)p{rSIz z>i#}F1!!Z|`cIVh{(7VT@H72~*>)(){Fu+56&>^7S3bxjwH&ZuoOlx4N=T*}n!e6E zM3K#mgk7APsrR2K>wPWg&+_QjAr&SR65Sg8J|ZkTq_sDfd53s>&LIZH5ZQJp%Y0r# zMC_T-dyXNh`Jr!4p$m+MKp02ei^PS+D`t!!DV@=*aI7t=rEOT8_-eu0g|P>`V;`pc zS<`#97(3ezWtm^_u}`=h%B7udZH8})cxK;=R}6UkGmP$ewxy+|!hcmCtf`)lz2^eD zdVcD;r6}4AHZOyxMfbOl`O7a2Ve#4G-}`h6%G*TwZj>J!d&R#zG>OO`nzWCA9HmQt z<|q6(?){Mcihq}04jR2&&vmcBQ|SlbweJUI?-YENYXGn;c^NE<yoTg8<r6XC&LIRI z!8b_0EndC@v-xf)=4&J0#a_PF*?hl%DpW!Bw~>6`$4iYOJ+Gb3_w{1FrR4jJm+wFD zHpVl)&SJh7A=%#R<@@SvzUpGWC&_o6!$&`$JrlF}(BfJ!3pX!hc-{uw>m|n@)RP;r zc@pYRD#zf8z^=v%D=rJrh6ltm_Q#0}G4L+0e;qtGK((zNJRq2s>bV-Y*spcp(|97B zJr#QSVj(9VIs>91I3FGf$LweD3JU$=p~ZP&U+aF!@qV85*k6bN916XRC(0n?O!bFe z#j_2Mku`d*tJ;5}=T)ocaozfHZ7u}RIZ$hzLi@n<_xnSSsP$*h87@9U<JR_*_LHK! zo^g8(3;CWi_9L=p=uqep|A|TaZi(Uv7s}X;Gl*D*>DCnPFAj5#`|IGj4BgTi73&ad zxQ5ZhI$SM+%CM-+i$#sS1X5)T%4fx5$I8~r&_i&jv_HR;Fw1-<jy#6<i+Vm=DhSmg zM=rE1x!}k-o6TJ$QR^6I&*`jcqJn+7p4(yTJ{UzVY-X)*%5|$04`(q1XP=9TZD1OQ zVUBUaa^0m^Pcd*>qyLROmu0?c2-fMH3iz|wYhhyLUb)0su`zs4@U^AWVr6ASS3s`i zO7hZbhlSO|Orz&lppmTjQ$2S86>ng^;b=&fT7ij08H_GhJHzAX&2HloH78a+xvz2V z5cL{RH)Gj<l<JTeW22)6q@%()R)SDX72S5mMRy?D*gmoN6|0+qm&!HI+>Sc>ZP5pO zH7iTQL#PlQdzd9SZT72x6W3Es$7h9?PT-w1tUjLGeiEY{1d)}0oTO!RI{NWxP#hBM z{uASPByc>|BjTRSiKSkypGg06{fCdD2^93DLs=rVf<;UwRx(@_!(uN1)aA0YrF}@a zc6I<|$8*JUDwk&!{77I{Xb3tE>AhI1WY0>o${BMx&%o)>C%Nze<v_0SlQ8O_)%ZBD z!CZYH*ZKIeR}dPkAK4`skLXKgp8|{6Lk6I=m46>{u`m^NE_;AX%VlXA41x8!yz8K& z;LMWK?#HJLumEf95m}as`lXlifTADF_C7QU1?;~RWb)AG;7Q)=d!7|7sssBblKum= z<CeRiroG3JA<0#@UxN`ydD_bNv8QW+`>)DS|5^Lp4eWc?ewSP2+J4-SVy<yBj&Zow z;SmAy^r$*3dkmmI``4`N6@-`@Y~C<@wE++_Jkf_Y^Qa;d3@0S5hUn}OQ6Z!NqfdZJ zf|?bm5%Zsu;1)scf0&<9fJ(AT4_S?efGADi8KUW@1p`W?8}ar@%R^ZL&HwNNl4{&V zl{+j*)F#OiYkUk`x}@4U(dC*DGPwSSQ%eUuJ^g}yi<nt|-<47k%2kSpE-Yz}G~%tC zuijCP5sEy>1D0QnaD{3;qv!J-LL<>ydq`T?l99_AtwgW2{n?=CT!k}Pc}P~O9jbg> zfbw=pSx?|Z^idhgN1H^O1lDL&Vg%c)U)QOFMDRcz`?q{dIB{~6RsIu)DcjyB9w16- z5gO4?w~$r5y|uRY<3Zeyhx7p80N@e8BmUz{d(Zl?RDz?`bL3WDr})00oF5{%nGt%h z9rSJm<!-VDB+hP9kq@fp`Z2&`U_$^wlDkR7cK1#*Pj<MA0Akz9t*dme++nE_76umA zFx<>?zga4sfJ>W38}}NfABPB3gHx{VM^OQ{;57UDbPngVxHm|H4qa|>I@<&y+;`f< zb!^O!(h0;3A7vrmjNHB)jxw<oRFYhXk(BnUsKt{SRM{~rdlR6)kDpWtRdS-D*Y;WB z)Y9Mg3$|PY7Dv+|FksB$juE$KKP%32s|c!@Tcwm!ZHXN#Zj~t^NG&Inm4-Fp4YEzx z>doCM+Z_F2t7DXF$>WOC;-M}^pOZB?9U&usRt~}zq^K#fFt9UhICzrb4ab<dQCil1 z2y&eLOufArZMwC1rA@;OFkV_cuUnl{xz5*b>74REI=0g1KR$+4+RXXj2*x&Dy#6hN zYr{v7PTyWB)jUwC_fA7{&!oR^8LH4aY3EoF(RW~aRTN=*deuE5kBhur<Q*bUiaagy zE|GVO{C<%?DDqE;JR|alM9ve4=~aI&@_vyY5;>of)2kj9`GCm(LgbH%{4tRq5&36C z{<z3LEAl~+e@^6|M_!lzlRQHF=7#B2J%9sI?jt6C3Zq1~vh)*{T3Px&OW$N^oTYcr z{Z6p-5tg1}=~0%x$kK}}4YKqeI{jlT?P2MYEPa}#hgdq!5~EG@S1h%&^lpaOW|pd0 z+Rf4(EHMa1p@24V8%q(^@wH4e!O|L*KEYBsOL>;AWa)91u0Uz}=+814twf0Ud&=R( z0-t+7ME}}vkv@y`OCo=TF|cJB__gl>VasYF_o10Bn}{3+f=!sVhscvaegi13>7%Pl zu4SoC;<CWo;t5t9Mlx97HGTBpK`Pft28xS9+>#%nSNhw7M1BF}E+CEQqvTu2sE=s* z7@F?~BCa0*p#VRoQ+pZ6t+HOzu0#!{T-!i+HL`1*7JPlz1L?SCbiNs!;$neZ7Aro8 z?Awx3w&IJt8d*B4`k8e4Q}v|^UIAFS6)7a?ot$;NDnrq9x+AqQ7;HAX%y<IdELXH8 z?vKUe;fh4EHE6cq*w_$kPDE0{Iy0PX*GLZPef#!RnBC#Fj<{J7Nwft!!s%!*od~w= zZ;7EQnr@5Vdat>^YExG@-f31{gYp_#UUPLrqRreFHIwGH_MK6)HWBG;GuzW{$*qYl zGs(&dl;)DHQ8}%`Z0^)ZqEQE}qTNhu6)}jaXp6<QtqE$EHUo`2>NW<n1~b_ei<l_s ziBuY(HqjQ2wWClE-ANEhB-xCMw&qbFrdxc&O1xa*03Sy31rVk9O=c628YF%f&Z`MY z)Zd_KXRbmb3~1WxfHkW$t?x$po$Mv}qWuQZlco)pXucZkVrsA(1Fvrga);ifEgE~5 z=EL~|eQM^LRNwvebvra)ciGaTCaTNIE?HVS00r?9LtMW5MXS%L=Qx;ly~djnk2pz& z=yO?_Q+zXUCG@8q>Jh65JFu`M?8E+u@L>h(VF%H!U;tw#enho@PQfoLcwE6T1)o#! zMFsy!LE4aVY=ys~;Oh!zU~9Htr1V6w=qgk7?@_Qq!D<D!C|IZ9M-(g)vVbgDV8H?l z7Fe*rf&~^Vuwa1&3oKY*!2$~wSg^o?1^%B}VEG5io8GzQquTNs!`OXiLwc=YRNWLw zbf!bvhm&OkuG)2x^!^TWTO_n5)*RAAOLMrsOUsP-2B+lV0*>QFeS5e~^VAuw-Q5O` zmr{xLa6FdYZ**0H6Q{}I@(o;G!+(|)|BP?!#s7cfpXW0#6{KoHkq!0XWXf!AOvbA_ z@IU8i6hd9lvr_8mYEGGHaiD`{_ZX_GBGGWtNGHRwbP7YMsxt4xf12Yz>oK%YET(0O zhU_I@YbIKLkHqzHv%NJP4b|TfDe8I6O@`6YnKsyeIFdG#M#{Xe(`=8xqIWhJ-kw6C z&<<9w-5XBDA_k7El4iJV+fHL=r4b4>+-%g}(QuQ|re$i}!;7c3y?I%Yos_g5k|OvQ zV<AS)XQ^lEc6+CjuAVHv98=OL|5ty9nZhBSHlQVIq#xZO9cg_$5edi54V@{|XbDHm zGzV2_NC(oAdJ%DTERixWTyuS^nFb>bttF#w$E;c-%^`nrU~a>~02{(_cAQQmF~y<K zdIM7&OLI!vD6-?iiEP6pYsv1nF0i&r4maZ~lI5BHxjkhq{3;|A>7NJpx|VonDk^&) zm<Rv*6b@>W`?pqwb~LOtXpSTEm*SXVa2_J!sZcY!A1dNiF6amzDQcW?xwQy;2pMp{ z=Do4B5ytON@Mkh&2t!8FOh=Q6ebPc>bMvo@#Bn?xx`+Lrkt7Z24Y<qOV(sCyDLmO0 z^+u#SZ0t2#W9<e!G!iApWTC}m&rbc65BFF+(Mp=kB9z<AHfTw-pD1pFNLvTd0SE2a zPxOdF5sQGz%V*$?!XiT}<FU7Pqqe9mik}rRBb{jzzg7}vz+JujPHhbjy-TzK9NR8( z#ZzsYyrZ=?ZILVVMqF_=TCy02-6e}B@r`oHyd%+Li{7S4d32FC#%hC$N)~5sh013( zLuGg5{yPwRo(wN6QqliM;2Qiha_!`3cS`dPtQuSz7%T~#y?9`8eP+>_(m>|ovOt;k z6P(^Rt0^E@v7{JZ3B0Amr+w-_pV-ZIo?%xY<2wYF`95-`3+LO&J`$>H+=G*6B@*5# zh>Q50$lN&PSE_UTi|{!M<vaK;;BNWkr@R0XFXEIJAcBiH<vH&KKCR?A_&3fYZ;*_* z2!wtvobo2|9}ld0Magq;*1Z{h%J69jX_qXO<8$zj0-tMqO|T;`@=zZ)PJNvB9iLJ1 z9GvvjhyUoq%i(w9<agdrJf-*@+~Kd6>@)msocw&3MA}yszk_coEgny|2Y2KPW|w|$ zT*wbd`6J+`KREAO-i=qGbJ?qYu~eAy??mRtDbIQD^D*#Ko`WAak34iQE=Qglr#!wp zW7~I>JO^KUaj|~)d2sS@el*Ygx^W?2y|4N@_^EG=!X19+Wf^gf*Nv0kdGGa{;&*U| zKR_LDIs5^ENu2y0s=r?<eg}8>-Tlqge@5}Y^Bq!u2Y2{4xcVc%gRchtHJD51ccgAn zINw{6ce}#*{*t(%aK6VR&eY6IT}iZG>C4Q-lamT}?%y9P+_@iDUn1!pe@-aencshP za5cW)DBKy}Wq5CTnM?nG!X5qBDBRJ%R^g8Rylwr^*NInrS4!Nuzt=nMs{gRluJDHx z?)djHg**4pGT4Ulocp~_;Bq!;wkClW;(ZHn{HbW7X<ZUWF0Us%__sWGDL7@7q;K%x zM?CoR9=sIK4LASo9=z9sk9hF!dhmbt;IDe{O3a<Rzd8@TN8lNd$jkKLaS#0g4?f_* zzaVjpT3%oB;NS4z-}2zkdhqXg@Cd~6L4FrfGm?pPH_{%Yk0Z4pwIW54Vo3KO-HQ}Q zYC|%RI*{%|N+LBOrI6A{{2zucq<u*Hk@)$^14s`d^&ou$30F-!vIJ5$(*LDC0gY!0 zu2rOK<>+Z~ts@KP5Z(pbtToyBN!42I`~>dRUV-XuT6-2cnD@HaFlMjU4Y8E>?!wvG zo7jK5$Y!=dg`Ian;|s9ozEHxj+kUH}#6Hj^<PX_k|DkL%JL^A`(X*wVC;PmHDmKxD z4%JCup`!Q@uu$*bW*2H_9P7>PujgQ#*;u2kct?#2aa;w+^C<&AZ0XrqtE$458WnS# zLW&)>v$b~i)B<8NO~_sJti#D*V+x0v!F$5@nvr-SWd>`_)V=9MM{w<i>R?AQagP~E z2Te1Q0)Ar(hXavlFom-}vpJ}BCgVHJRJtP4(V@*p&erYd^)`HHmkQ$Jww<AdofXZo zq$A!Xy&|E>7!j<C?M;T0`=O=!%rA)V<Jxe}Q5S1BcP0|?l=#uYSbHSi*=z>!%YnhJ zs-Qe))8@&I(~3kACM<de;G9UEGS3bym9vjQd0gl@&I`@UX3F+3PDRwClKU)qo<mjV zvmU`ScVOWWxOL4182<bwlSka@<lCcX^_)-NxA3T%d69s7FFyA%?e9C^p0Cs45P80= z_{mdoTrI34kGtNc^}sy6EQSL`o@%=k^e&Oy%bI!i!e1~JKj_OFSDK*;N9Y~lW^pcx zS3@{&KHsGB^}_~rH|*_fF&vd=-dyl~1KwOTRP&FoIA)#^Aym(I>eR%|bHo@gUaml0 z7}kiy!|km&gN?`WOVjH7_IDV3erwV~K&V}VgXP<|Zgu^{bKI<)Rde&Kn(A3KRh2@| mg6s<sYm1}z+Z53Xo_5n)h!_Pa|9k%WyBwx45f^rnwf_NDA@%eC literal 0 HcmV?d00001 diff --git a/makefile b/makefile index 363ce8c..d807dec 100644 --- a/makefile +++ b/makefile @@ -110,6 +110,19 @@ depend: $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 .PHONY : depend +#============================================================================= +# Target rules for targets named URLTEST + +# Build rule for target. +URLTEST: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 URLTEST +.PHONY : URLTEST + +# fast build rule for target. +URLTEST/fast: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/build +.PHONY : URLTEST/fast + #============================================================================= # Target rules for targets named ParserEndToEndTest @@ -432,6 +445,33 @@ shared/ProducerConsumerQueue.cpp.s: $(MAKE) -f CMakeFiles/search.dir/build.make CMakeFiles/search.dir/shared/ProducerConsumerQueue.cpp.s .PHONY : shared/ProducerConsumerQueue.cpp.s +shared/urlTest.o: shared/urlTest.cpp.o + +.PHONY : shared/urlTest.o + +# target to build an object file +shared/urlTest.cpp.o: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.o +.PHONY : shared/urlTest.cpp.o + +shared/urlTest.i: shared/urlTest.cpp.i + +.PHONY : shared/urlTest.i + +# target to preprocess a source file +shared/urlTest.cpp.i: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.i +.PHONY : shared/urlTest.cpp.i + +shared/urlTest.s: shared/urlTest.cpp.s + +.PHONY : shared/urlTest.s + +# target to generate assembly for a file +shared/urlTest.cpp.s: + $(MAKE) -f CMakeFiles/URLTEST.dir/build.make CMakeFiles/URLTEST.dir/shared/urlTest.cpp.s +.PHONY : shared/urlTest.cpp.s + util/tests/stemmerTest.o: util/tests/stemmerTest.cpp.o .PHONY : util/tests/stemmerTest.o @@ -547,6 +587,7 @@ help: @echo "... clean" @echo "... depend" @echo "... edit_cache" + @echo "... URLTEST" @echo "... ParserEndToEndTest" @echo "... rebuild_cache" @echo "... StemmerTest" @@ -579,6 +620,9 @@ help: @echo "... shared/ProducerConsumerQueue.o" @echo "... shared/ProducerConsumerQueue.i" @echo "... shared/ProducerConsumerQueue.s" + @echo "... shared/urlTest.o" + @echo "... shared/urlTest.i" + @echo "... shared/urlTest.s" @echo "... util/tests/stemmerTest.o" @echo "... util/tests/stemmerTest.i" @echo "... util/tests/stemmerTest.s" diff --git a/shared/ProducerConsumerQueue.h b/shared/ProducerConsumerQueue.h index d1ad2dd..2647d1c 100644 --- a/shared/ProducerConsumerQueue.h +++ b/shared/ProducerConsumerQueue.h @@ -20,7 +20,9 @@ private: public: ProducerConsumerQueue() {} - void Push(T obj); + + + void Push(T obj); T Pop(); size_t Size(); diff --git a/shared/url.h b/shared/url.h index 95c42e5..44fc016 100644 --- a/shared/url.h +++ b/shared/url.h @@ -7,10 +7,18 @@ #include <string> #include <iostream> #include "../util/util.h" +#include <math.h> //#include "../crawler/SocketReader.h" using namespace std; +#define GOV ".gov" +#define COM ".com" +#define EDU ".edu" +#define ORG ".org" +#define NET ".net" +#define MIL ".mil" +#define INT ".int" @@ -25,6 +33,7 @@ public: *Host, *Domain, *Path; + double Score; ParsedUrl( string input_url ) { @@ -69,7 +78,12 @@ public: //char * domainBuffer = new char[ 20 ]; //get the domain: - for(int i = strlen(Host); Host[i] != Period; i--){ + char *i = Host; + for(; *i; i++){ + + if(*i == Period) + Domain = i; + } @@ -90,6 +104,8 @@ public: } else Host = Path = p; + + setScore(); } void printUrl() @@ -97,12 +113,32 @@ public: cout << "Complete URL: " << CompleteUrl << endl; cout << "Service: " << Service << endl; cout << "Host: " << Host << endl; + cout << "Domain: " << Domain << endl; cout << "Path: " << Path << endl; + cout << "Score: " << Score << endl; } - + void setScore(){ + double lengthOfUrl = strlen(CompleteUrl); + Score += 4 * 1/ log( lengthOfUrl ); + + if ( strcmp ( Domain , ORG ) ) + Score += 5; + else if ( strcmp ( Domain , EDU ) ) + Score += 4; + else if ( strcmp ( Domain , GOV ) ) + Score += 3; + else if ( strcmp ( Domain , COM ) ) + Score += 2; + else if ( strcmp ( Domain , NET ) ) + Score += 1; + else if ( strcmp ( Domain , INT ) ) + Score += 1; + else if ( strcmp ( Domain , MIL ) ) + Score += .5; + } ~ParsedUrl( ) { diff --git a/shared/urlTest.cpp b/shared/urlTest.cpp index 7fda4f8..feab256 100644 --- a/shared/urlTest.cpp +++ b/shared/urlTest.cpp @@ -22,20 +22,21 @@ int main(int argc, const char * argv[]) ParsedUrl fragmentTest = ParsedUrl("http://www.example.com/path/to/myfile.html?key1=value1&key2=value2#SomewhereInTheDocument"); - //fragmentTest.printUrl(); + fragmentTest.printUrl(); //assert( strcmp(fragmentTest.Service, "http")); //assert( strcmp(fragmentTest.Host, "example.com")); + ParsedUrl gov = ParsedUrl("http://www.goverment.gov/path/to/myfile.html"); + gov.printUrl(); + //ParsedUrl relativeURLTest = ParsedUrl("/wiki/List_of_sheep_breeds"); + //relativeURLTest.printUrl(); - ParsedUrl relativeURLTest = ParsedUrl("/wiki/List_of_sheep_breeds"); - relativeURLTest.printUrl(); + //ParsedUrl pointToFragment = ParsedUrl("#topOfPage"); - ParsedUrl pointToFragment = ParsedUrl("#topOfPage"); - - ParsedUrl mailToTest = ParsedUrl("mailto:someone@example.com?cc=someoneelse@example.com&bcc=andsomeoneelse@example.com\n" - "&subject=Summer%20Party&body=You%20are%20invited%20to%20a%20big%20summer%20party!\""); - mailToTest.printUrl(); - std::cout << "URL TEST PASSED" << std::endl; + //ParsedUrl mailToTest = ParsedUrl("mailto:someone@example.com?cc=someoneelse@example.com&bcc=andsomeoneelse@example.com\n" + // "&subject=Summer%20Party&body=You%20are%20invited%20to%20a%20big%20summer%20party!\""); + //mailToTest.printUrl(); + //std::cout << "URL TEST PASSED" << std::endl; } \ No newline at end of file -- GitLab