From 7979052b89bd83ba430111f5b7559222c5b97a1f Mon Sep 17 00:00:00 2001 From: Gitesh Sharma <58145476+Glitchedgitz@Users.Noreply.Github.Com> Date: Tue, 9 Apr 2024 02:23:49 +0530 Subject: [PATCH 1/3] `test_file` -> `test` --- {test_file => test}/file1.txt | 0 {test_file => test}/file2.txt | 0 {test_file => test}/test.txt | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {test_file => test}/file1.txt (100%) rename {test_file => test}/file2.txt (100%) rename {test_file => test}/test.txt (100%) diff --git a/test_file/file1.txt b/test/file1.txt similarity index 100% rename from test_file/file1.txt rename to test/file1.txt diff --git a/test_file/file2.txt b/test/file2.txt similarity index 100% rename from test_file/file2.txt rename to test/file2.txt diff --git a/test_file/test.txt b/test/test.txt similarity index 100% rename from test_file/test.txt rename to test/test.txt From 9bb29b0b5dbce3dcb28cb6e8752be1cec3dab534 Mon Sep 17 00:00:00 2001 From: Gitesh Sharma <58145476+Glitchedgitz@Users.Noreply.Github.Com> Date: Tue, 9 Apr 2024 02:50:00 +0530 Subject: [PATCH 2/3] Fixed error: `invalid control character in url` --- test/urls.txt | Bin 0 -> 22388 bytes v2/pkg/methods/urls.go | 17 +++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 test/urls.txt diff --git a/test/urls.txt b/test/urls.txt new file mode 100644 index 0000000000000000000000000000000000000000..0adc9913dc7c32c9b8bb224b4c5433ba0ecb3a3a GIT binary patch literal 22388 zcmd6vS5qX}a)9eu5&9E~SYD*Y+f_`Aa03i>a69lq;B5nghgkh}N#A#px>emYOjErPs;#yY$7wAV)VHS1-? z+iaY+Z+!Z>>gVZ3c1Qp9>^mcS=ygupVm1oodyH7ghJmo3z1(Eo^q9&XS?4r6q{mY> z%hP%`!IKGgI6&K6wo0E(X5C72Ze~N-WVS%-Ff07QTo3fQ0QO;eZv)3X{jPZOz_StB zCVBG6Y&*=;4J4}H?2+h#w9 z;AJoS#O~tL!5Z^GF`msZ*C)Og!1od`cJgHX8QxW9p8bn4b zmRNO}xu$u#LceifTFAc8YdMWr<;s>$dMvWWBs1g?OmII31QU$)NiUH1g6-}E=}1;% z>@Om{%-wm)eNWVewHN>b9M%m&{gfn{1alMfCu)vALPUoL@?o6(_!gnv% z?tGa$+vn2^E6iWxTnG8(651-w6`yEOU-D5_+(6UY z@bNz5ZuoxSyOTM`z;GY9?&j`}@2c{&@hru(^xezM(&Qjlp6C#J+GF*bXNl{{Zz74l zvGQW#;U{=)BJFXL-(7IH$!_K54f-v!vpIgh@Vk-zZuory6HoN3Gp?K8DR{3twT~eY z7kPHXv(4;;{Xg=%%Dmlao&}ybk2sab*U%gDX}$g=X-v!@ zLE}nU+nn&=^SAP*DpuO%sjBmj1LGAtSY~(U;BgzesXAA?u>Q00zxu2ft>zj22LBFL z#(?rw+cD_3I)^uMR99tqa!H{^@thwy=|(E?kFzu8*sCO7CqOBoS zd$rB?PP*EKYx>e%KhKoaLH!-f7gDNNdm-_tR<*iJ&G4FrC{ti~0qxlHjaVzs@gA>W zlpP4U8ra|c50KOa4r=h%anjIL;VZnBYi2rxYt;px6Wv{K5?bqr;J3OSKPqXEkhe-d z^@%>08s(T1ENhgidwD4_w8#1rs-^)jBu4n3)LGYsI;Fx{jZPolaJM(6xh#{M*R^*w#YKM?!xqfP0rm%aA0=N)#eFE|25#FNjoTWYi@ zsrCDIX$||NpDQU%Iqv>muG>m3JK`lJw8qW~ae`~sfA~P`w5Cn>(s*7~B~R;!&w8@u zjg}bcF@jP9-+y2eOyHkPBUcZhgCnqh4o*jbW`e#`v>tIDv-B0hD{!~Z-2gIEfB6DC ze3Z`^KKeK#wz#T6XEVu*nP%)Vvm2?I#Dh5GlHVGS zugrAMU!A_jtsG^9X@Xg1f$W;Ubz1v?tB-Y0SaFwcqcg``Pt)3u?7F6PgXf3Pg|V!0 z=G$QY9(KCR?>@cvnOlGK4p@g6s}I!$EajMhoT&w_9tbI`uQG)6_6&{Kh2>)8sm6M% z?AYIkDvfikH)J0vr!(!|SWk_%Yd*KML?8X;?_B5cuG}xG0iW{NXZ7dxyw=`01UJR? zPU*Uyu9e?X(Ix$aL3>J1Y@b#2XpIP|b&Nz5pRf|np{*Ji*-q=*ua8k`8NHJn%GiAl zYX>u{nT=(B;#9V|(}L+_-CL+3^m%96$1W{re2i}%W&g!q@{ti;t+$l?ug5N`@}fRo z?iIiE`1DEiQ0CJ<)WDwC7~$ zkG;UhKH^$CS})X-)sm5N`xD&>(c6CcufUqmyaL3+=81kz6naLB7OnU)dMg)&)gmQ* z{F=(fJ)}iY`6pJB&O>8)a$Z+mb%j~a)1RDFVAOM%V~4e8o9Pi;m!BIcC`NidVc7{w z#7o@2j=_>)?r; z8(vfqz4^pmenXn}^DXQ-KK>*7cjWAC;(5Kjo6Hs?K#?)&V6J=UQ@chzCS5G1ohWZ_ zx$A&_rId(!*k7x^Og+t5>tJLNH|apqZ?S^BETuU^#2r7;&w1oY<&iK+j~&1_4rk{P ze6u)bNk(hgl}{(4g?l_xTKZ&wzu>G+TBUDsbJ);nM1_ZzUB zLNharP&cn9EX}Zk`*c5wMyj!!GW8n8_taap8lexK_w?j_t*##W%WFni*Vv``1mSY` z+OxqUo%GBxk!MqscJA4og*Nnh68Mx!aiWols4?d}vKRpU@IUp$ALtN_+io z<<&CnMO1kjin1k6ZYyCiB!xC$u^X?|##Onu;8*#vET@VmkxNoP8!47AdjE#?!rM_7 zce7^I{mpV-mx=S$QpKJ^`x^f?ZYXaXX>%i}hPQXDctvQJmqmI^=Qh!IJOj)d-p(GP817M~M`)d*GqEApv;~Z777Fr^p zGhZUSkx7xuLcip@41LmgTIwG$poZ8^@+kHyt*aS4=de$dyk}S{vHxNAZI;9=OkZ0V zsJg=?_7fUGc(iIGDp=rCU0U4LkX&k@8+wOjBNZzXj1CO)Y3IDELR;9fg(UA{<#GIm zVLX-ZKvGAS&9RGKC~E>3ukorD*})nZ?*-pV5wkR3p|BpWfNnVD;~z*qc{HRdsj z^9Wt`Gv5X{I;OWdrfsn`N-WB z{rh>FG4n#`)( z$B451^4z@|S&kZcelo(dw{rROJkN9aYVFr@`9=V>qkB0wZCbC?SK0=B+>fMBWbyNB z^Y_B%NNWJh8^ts01k8M@g_Hu?$oADoH45n|M9qYpDZUp|t6qoNe2dc%iY4aT;(L*M zR~Tb23*f~{h^ibo1$8OCw6CsMV+yIIWIKf3?zu11OWc*cFr~Lyyyj7(aLENEL0uNhh+yb)U0DLHy zdg#;73Nt`60Or>jb;{EnpxlHSo|yj#n3P0DkIYdT8yaSvN8tF9B+@se&nCN5QXVp9 z30ZK>oZo0u^4xQ-N|Xym-ZM%`S>w!iXtg3ouV9CM_UvL#PuZxK7I$+#S&P?CUZ0cO zw0Yf(@4~lRyt&s!ZBT zK2S=?)rAx(S|j$NR#Rf=6&6yYk5NIN&Lf|Hj1&oYtl!Bif|PRV)sP}uM9LJUiaCg~ zB+0J}DN^R}_H$;JoTk=_RRUJJspsYNF8++m8Kcvm(`LBjnLREz^INE)&Ql|3;mPVl z>Md5!dbZZ~H|$-lZ~|?pac02E3h+u9Lui;2=;DmKE%?hgy;XU;;L6DDHe>pLsTY{L z8EGAfHDZ6HeVqWY`EU0*$Sfmphu-@d6!Mjk<^ZP{IRgeRd489A?1GCQ%(zOAv!v4} z88ynPC-k#+?mK^*;6YxyOzRKOK1ur!biI%`e})yu>9Ghe?J?ST=pK8J|JIo4ju!R4 z{^%;u9Mb-cQ&~>X#;I7_(|ih_fkk^QVm)Thq+a!kKISnaJnc_lG^%m*q4lKoKO@o* z5ee;ay=y67?Pe59kKF8*F+^<{qm1SsV}#Xx^ITiIqmJ6*G_-==idE{{h94kQhtD&~ zyH+S#OQEi;B^>JUTXSL5+tW4Ep~kc#L%tcJw^qOq?R)eX22$%8b6mI=Ed_Vvr-PNv zn>>Jxxc0Y|#Z$GWv9No@%{sfAg!1&Y8!_cw==b;9N@7YI;H~OLwSQPlaf;%gkZo2e zzGV^lJhzCvZe$m z&d3@-BeLSV`8+mx7SiXGD9i( z$gL?eUZV*jy`HiJsnJ(o*V^Pso<)7Nxg|3`fmQw7$ZlPymf(wPPDC!bY?!a=j#|@z zc}lrcts@P@y|zclWh4B-snNqaRyp{%G8K%ohQ7=tGdEhWkvFI%&1=p~dN4{mj2@8( z--^~X&Qujgk*mqGi+MHu7%NY4hKqd9afY4P9A-JS@wJGXE#r@0eGF>T()z1bUP-u? z$HN;`G8P;Vk<+lgjRb3XSC5;9wIQ{iq50*4L*{!upBa{M#M6ysA8ek% zJLx69^WNo?Xjj@=>)n{sHr7&Qrt+9-vPZ8jYLCK_EYJ9!{p=)u)_chF^vYGQl|R;- zA2jUnJ+S?18(Q*5m5Zr`iSY329h~{+xueL%|61p?5!@G$Zy*j7(u@G2GaK_3vU~rq?xR*kTmBp@l zsp`t=_44~JU3i(sgI!PRvkt%&T2~UjWtVj_`aI#SRfI=t!ggoYhty+}YK(p4yW57^ ztu>N!jPvbP-ev0(_GGmlclkczH*7uQ6hf0jVa<~qS_cMYLsd+-hT#U=_8`F!5ijq> zTfCcKZD&DejWL$0a46ZXs64X@V4Ag*?w-1Ds9JZm{rpU-+Izm9N!L=v`E$!6J@v$^ zQ(kL)n0>ndo}31~Q!5))P->iWm7mN5_a6^|)V2!*Qb%x|F(qS<`MSz+aVWhi6{?;@ zYYZ48F`5$Iw7=e)r^(lkC)J}Ut=BEfJEhL+@=gossPe9Z{(NV+kTm7!URUxp^!%#g zbq6Rr=c7+7;LJo$Ihz zd_Vh7MhVFVbaF@R(j4__I*~_K^}Y@?v#dW29Ce=he={bT{R->GTK_xbQ!o9d>1PJp zxO{jPr}SI}nn@tlW0f0b>2Z&~dSbs{fy&xX;lE0`tZU}wM@*1;0uz&p&^ZZr= zs9BZxR&%Li!`geRcBXaiJEF7Zvih~% z=DF{3YyRFr)N^Ua7WpAuag)yf!i>dlogJuPL#`>oeJf~cHTVyN3f0xJ9U>wQ3d+CQ7mNoITj-{ss)^Lebo)z)q;f_Jle)|mK?mj54T znv=7(J*vj5L2-oqktSY_HVT?kX+)^w4WHb~E{~}0#c9!4Luj^9kF^!0TLBqS!S*3) z{~XHuh*4&j&E>pz_j~L>??G=g{?~*uUOf``S_sxYy*=~WQHu9VnjFJoG2#KOqY71K zcE-q!#2)>hJmGhT?DDyLw|?0Fg%xiilx3d%^@75B@*N!G72y{|RZ;nTeud7o>hnyc zFlo!=u(h_Fs$e9jS>Mrk-^ak-3UAh0No{%~L0x*2R#{m+(EQBoUuY7kGfv18i>iWq z#%Z}Vdb`03ddu3G;irZD=ZU+jeyi`yd6;v-d+^Ui)WM+G<53dwc9Bw>u)TV|-Pa{xeJD z->63|Gu8gWudF!uw zt6y(9yyo1y7B=k&-|Dv>S8rD=SzlLv%U^t11@5~Lf90tuxh|0}o=68!`F_!LI=c?| zNl6uzAHtezGHZX0Tps_UU+y;*Rb_>YQ#)QVxA|=4xpeW*wxi#gq(X4!$MHsv<-c;@ RYL;dxn@e^z$j`01{{tT~dS(Cs literal 0 HcmV?d00001 diff --git a/v2/pkg/methods/urls.go b/v2/pkg/methods/urls.go index 410a788..afedb74 100644 --- a/v2/pkg/methods/urls.go +++ b/v2/pkg/methods/urls.go @@ -5,6 +5,7 @@ import ( "net" "net/url" "path/filepath" + "regexp" "strings" "golang.org/x/net/publicsuffix" @@ -113,9 +114,10 @@ func (m *Methods) UrlAllDir(u *url.URL, array *[]string) { func (m *Methods) AnalyzeURLs(urls []string, fn func(*url.URL, *[]string), array *[]string) { for _, s := range urls { - u, err := url.Parse(s) + sanitizedURL := sanitizeURL(s) + u, err := url.Parse(sanitizedURL) if err != nil { - log.Println("Err: AnalyseURLs in url " + s) + log.Println("Err: AnalyseURLs in url ", err) continue } @@ -123,6 +125,17 @@ func (m *Methods) AnalyzeURLs(urls []string, fn func(*url.URL, *[]string), array } } +// Function to sanitize the URL string +func sanitizeURL(s string) string { + // Regular expression to match any characters outside the valid ASCII range + controlCharsRegex := regexp.MustCompile(`[^ -~]`) + + // Replace any characters outside the valid ASCII range with an empty string + sanitizedURL := controlCharsRegex.ReplaceAllString(s, "") + + return sanitizedURL +} + func (m *Methods) init() { log.SetFlags(0) } From 9749596327d11d2747e8a369ab772752a6ab1857 Mon Sep 17 00:00:00 2001 From: Gitesh Sharma <58145476+Glitchedgitz@Users.Noreply.Github.Com> Date: Tue, 9 Apr 2024 02:53:29 +0530 Subject: [PATCH 3/3] Handle if schema not present in URL --- v2/pkg/methods/urls.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/v2/pkg/methods/urls.go b/v2/pkg/methods/urls.go index afedb74..fc6befb 100644 --- a/v2/pkg/methods/urls.go +++ b/v2/pkg/methods/urls.go @@ -114,6 +114,9 @@ func (m *Methods) UrlAllDir(u *url.URL, array *[]string) { func (m *Methods) AnalyzeURLs(urls []string, fn func(*url.URL, *[]string), array *[]string) { for _, s := range urls { + if !strings.HasPrefix(s, "http://") && !strings.HasPrefix(s, "https://") { + s = "http://" + s + } sanitizedURL := sanitizeURL(s) u, err := url.Parse(sanitizedURL) if err != nil {