Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged changes to my fork. #11

Merged
merged 4 commits into from
Oct 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions pyanchor/link_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import requests
from bs4 import BeautifulSoup

import re

class LinkResults:
def __init__(self, url: str):
Expand Down Expand Up @@ -42,11 +42,23 @@ def check_link_for_http_scheme(self, href: str) -> str:

if href.startswith(self.base_url):
return href

elif href.startswith("/"):
href = self.base_url + href.lstrip("/")
return href
else: # This catches any href set to '#'
return None # TODO: Deal with ./ or ../ relative links.

elif href.startswith("./"):
return self.base_url + re.sub("./", "", href) #using re.sub to remove all instances of ./ in href

elif href.startswith("../"):
return self.base_url + re.sub("../", "", href) #using re.sub to remove all instances of ./ in href

elif href.startswith("#"):
if "#" not in list(self.base_url):
return self.base_url + re.sub("#", "", href) #checks if # exists in base_url
else:
return None #returns none if # already exists in self.base_url


def find_all_atags(self, url: str):
"""Find all anchor tags on a given URL.
Expand Down
2 changes: 2 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def test_exception_on_invalid_url_http_scheme(self):
def test_successful_result_prints(self):
results = runner.invoke(app, ["http://127.0.0.1:5000/", "--verbose"])
assert "[ 200 ] - http://127.0.0.1:5000/about" in results.output
assert "[ 200 ] - http://127.0.0.1:5000/rel" in results.output
assert "[ 200 ] - http://127.0.0.1:5000/rel2" in results.output

def test_failing_result_prints(self):
results = runner.invoke(app, ["http://127.0.0.1:5000/"])
Expand Down
9 changes: 8 additions & 1 deletion tests/test_webapp/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,18 @@ def about_link_2():
def contact():
return render_template("success.html", name="Contact")

@app.route("/rel")
def relative_link():
return render_template("success.html",name="Rel")

@app.route("/rel2")
def relative_link2():
return render_template("success.html",name="Rel2")

@app.route("/#")
def login():
return render_template("error.html", status_code=401), 401


@app.route("/500")
def five_hundred():
return render_template("error.html", status_code=500), 500
Expand All @@ -47,3 +53,4 @@ def sitemap():

if __name__ == "__main__":
app.run()

3 changes: 2 additions & 1 deletion tests/test_webapp/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
<a href="/about">About Page</a>
<a href="http://127.0.0.1:5000/contact">Contact Page</a>
<a href="#">Hash Link</a>
<a href="./">Rel Link</a>
<a href="./rel">Rel Link</a>
<a href="../rel2"> Second Rel Link</a>

<!-- These should fail -->
<a href="/dhjaow">Failing Link</a>
Expand Down