Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions LinkChecker/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__pycache__/
*.pyc
venv/
.env
*.csv
146 changes: 146 additions & 0 deletions LinkChecker/linkchecker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import csv
from datetime import datetime


def get_all_links(url):
"""
Fetches a URL and extracts all unique HTTP/HTTPS links.
"""
print(f"Fetching page: {url}")
try:
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, timeout=10, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
links = set()

# Find all 'a' tags with an 'href' attribute
for tag in soup.find_all("a", href=True):
full_url = urljoin(url, tag["href"])
# Ensure we only check web links
if full_url.startswith("http"):
links.add(full_url)

print(f"Found {len(links)} links.")
return links
except Exception as e:
print(f"Error fetching page: {e}")
return set()


def check_link(url):
"""
Checks a single URL's status. It first tries a HEAD request for efficiency,
then falls back to a GET request if needed.
"""
try:
headers = {"User-Agent": "Mozilla/5.0"}
# Use a HEAD request to get status without downloading the whole page
response = requests.head(url, timeout=10, allow_redirects=True, headers=headers)

# If HEAD is not allowed (405) or forbidden (403), try a GET request
if response.status_code in [405, 403]:
response = requests.get(url, timeout=10, headers=headers)

return response.status_code
except requests.exceptions.ConnectionError:
# Handle cases where the server is not reachable
return "Connection Error"
except requests.exceptions.Timeout:
return "Timeout"
except Exception as e:
return f"Error: {e}"


def get_status_label(status):
"""
Converts an HTTP status code or error string into a user-friendly label.
"""
if isinstance(status, int):
if status < 300:
return " ✔ OK"
elif status < 400:
return "⚠️Redirect"
elif status == 404:
return "❌Not Found"
elif status == 403:
return "🔒Forbidden"
elif status >= 500:
return "Server Error"
return str(status)


def export_to_csv(results, filename):
"""
Exports the list of checked links and their statuses to a CSV file.
"""
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["URL", "Status", "Result"])
for url, status in results:
label = "Working" if isinstance(status, int) and status < 400 else "Broken"
writer.writerow([url, status, label])
print(f"Results saved to: {filename}")


def check_all_links(website_url):
"""
Main function to orchestrate the link checking process for a given website.
"""
links = get_all_links(website_url)

if not links:
print("No links found.")
return

broken = []
working = []
all_results = []

print(f"{'STATUS':<15} URL")
print("-" * 60)

# Iterate through all found links and check their status
for i, link in enumerate(links, 1):
status = check_link(link)
label = get_status_label(status)
all_results.append((link, status))

print(f"{label:<15} {link}")

# Categorize links as working or broken
if isinstance(status, int) and status < 400:
working.append((link, status))
else:
broken.append((link, status))

# Summary
print("\nSUMMARY")
print("-" * 20)
print(f"Total Links: {len(links)}")
print(f"Working: {len(working)}")
print(f"Broken: {len(broken)}")

# Broken links detail
if broken:
print("\nBROKEN LINKS:")
for url, status in broken:
print(f"[{status}] {url}")

# Ask to export
save = input("\nSave results to CSV? (y/n): ").strip().lower()
if save == "y":
filename = f"link_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
export_to_csv(all_results, filename)


# --- Script Entry Point ---
if __name__ == "__main__":
print("--- Link Checker Tool ---")
website = input("Enter website URL: ").strip()
# Ensure the URL has a scheme (http or https)
if not website.startswith("http"):
website = "https://" + website
check_all_links(website)
76 changes: 76 additions & 0 deletions LinkChecker/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Link Checker

A Python script that scans a website and detects broken links.

The tool extracts all links from a webpage, checks their HTTP status code, and reports whether they are working or broken. Results can also be exported to a CSV file.

## Features

- Extracts all links from a webpage
- Detects broken and working links
- Shows HTTP status codes
- Displays a summary report
- Optional CSV export of results

## Requirements

- Python 3.8+

Install dependencies:

```bash
pip install -r requirements.txt
```

## Usage

Run the script:

```bash
python link_checker.py
```

Enter a website URL when prompted:

```
Enter website URL: https://example.com
```

## Example Output

```
--- Link Checker Tool ---

Fetching page: https://example.com
Found 25 links.

STATUS URL
------------------------------------------------------------
✔ OK https://example.com/about
⚠️ Redirect https://example.com/docs
❌ Not Found https://example.com/old-page

SUMMARY
--------------------
Total Links: 25
Working: 23
Broken: 2
```

## CSV Export

After scanning, you can save results to a CSV file:

```
Save results to CSV? (y/n):
```

A report like this will be generated:

```
link_report_20260307_143210.csv
```

## License

MIT License
2 changes: 2 additions & 0 deletions LinkChecker/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
requests
beautifulsoup4
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ More information on contributing and the general code of conduct for discussion
| JSON to YAML converter | [JSON to YAML converter](https://github.com/DhanushNehru/Python-Scripts/tree/main/JSON%20to%20YAML) | Converts JSON file to YAML files. A sample JSON is included for testing. |
| Keylogger | [Keylogger](https://github.com/DhanushNehru/Python-Scripts/tree/main/Keylogger) | Keylogger that can track your keystrokes, clipboard text, take screenshots at regular intervals, and records audio. |
| Keyword - Retweeting | [Keyword - Retweeting](https://github.com/DhanushNehru/Python-Scripts/tree/main/Keyword%20Retweet%20Twitter%20Bot) | Find the latest tweets containing given keywords and then retweet them. |
| Link Checker | [Link Checker](https://github.com/JaivPatel07/Python-Scripts/tree/main/LinkChecker) | A Python script that scans a website and detects broken links. |
| LinkedIn Bot | [LinkedIn Bot](https://github.com/DhanushNehru/Python-Scripts/tree/main/LinkedIn%20Bot) | Automates the process of searching for public profiles on LinkedIn and exporting the data to an Excel sheet. |
| Longitude & Latitude to conical coverter | [Longitude Latitude conical converter](master/Longitude%20Latitude%20conical%20converter) | Converts Longitude and Latitude to Lambert conformal conic projection. |
| Mail Sender | [Mail Sender](https://github.com/DhanushNehru/Python-Scripts/tree/main/Mail%20Sender) | Sends an email. |
Expand Down