From f23639984700abe950a49cdeab51b343ba416fc4 Mon Sep 17 00:00:00 2001
From: Jaiv Patel <jaivpatel402@gmail.com>
Date: Sat, 7 Mar 2026 11:30:22 +0530
Subject: [PATCH 1/3] Add Link Checker script with README and requirements

---
 LinkChecker/.gitignore       |   5 ++
 LinkChecker/linkchecker.py   | 117 +++++++++++++++++++++++++++++++++++
 LinkChecker/readme.md        |  76 +++++++++++++++++++++++
 LinkChecker/requirements.txt |   2 +
 4 files changed, 200 insertions(+)
 create mode 100644 LinkChecker/.gitignore
 create mode 100644 LinkChecker/linkchecker.py
 create mode 100644 LinkChecker/readme.md
 create mode 100644 LinkChecker/requirements.txt

diff --git a/LinkChecker/.gitignore b/LinkChecker/.gitignore
new file mode 100644
index 00000000..c63db526
--- /dev/null
+++ b/LinkChecker/.gitignore
@@ -0,0 +1,5 @@
+__pycache__/
+*.pyc
+venv/
+.env
+*.csv
\ No newline at end of file
diff --git a/LinkChecker/linkchecker.py b/LinkChecker/linkchecker.py
new file mode 100644
index 00000000..76977bea
--- /dev/null
+++ b/LinkChecker/linkchecker.py
@@ -0,0 +1,117 @@
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+import csv
+from datetime import datetime
+
+def get_all_links(url):
+    print(f"Fetching page: {url}")
+    try:
+        headers = {"User-Agent": "Mozilla/5.0"}
+        response = requests.get(url, timeout=10, headers=headers)
+        soup = BeautifulSoup(response.text, "html.parser")
+        links = set()
+        for tag in soup.find_all("a", href=True):
+            full_url = urljoin(url, tag["href"])
+            if full_url.startswith("http"):
+                links.add(full_url)
+        print(f"Found {len(links)} links.")
+        return links
+    except Exception as e:
+        print(f"Error fetching page: {e}")
+        return set()
+
+
+def check_link(url):
+    try:
+        headers = {"User-Agent": "Mozilla/5.0"}
+        response = requests.head(url, timeout=10, allow_redirects=True, headers=headers)
+        # Some servers don't support HEAD, fallback to GET
+        if response.status_code in [405, 403]:
+            response = requests.get(url, timeout=10, headers=headers)
+        return response.status_code
+    except requests.exceptions.ConnectionError:
+        return "Connection Error"
+    except requests.exceptions.Timeout:
+        return "Timeout"
+    except Exception as e:
+        return f"Error: {e}"
+
+
+def get_status_label(status):
+    if isinstance(status, int):
+        if status < 300:
+            return " ✔ OK"
+        elif status < 400:
+            return "⚠️Redirect"
+        elif status == 404:
+            return "❌Not Found"
+        elif status == 403:
+            return "🔒Forbidden"
+        elif status >= 500:
+            return "Server Error"
+    return str(status)
+
+
+def export_to_csv(results, filename):
+    with open(filename, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerow(["URL", "Status", "Result"])
+        for url, status in results:
+            label = "Working" if isinstance(status, int) and status < 400 else "Broken"
+            writer.writerow([url, status, label])
+    print(f"Results saved to: {filename}")
+
+
+def check_all_links(website_url):
+    links = get_all_links(website_url)
+
+    if not links:
+        print("No links found.")
+        return
+
+    broken = []
+    working = []
+    all_results = []
+
+    print(f"{'STATUS':<15} URL")
+    print("-" * 60)
+
+    for i, link in enumerate(links, 1):
+        status = check_link(link)
+        label = get_status_label(status)
+        all_results.append((link, status))
+
+        print(f"{label:<15} {link}")
+
+        if isinstance(status, int) and status < 400:
+            working.append((link, status))
+        else:
+            broken.append((link, status))
+
+    # Summary
+    print("\nSUMMARY")
+    print("-" * 20)
+    print(f"Total Links: {len(links)}")
+    print(f"Working:     {len(working)}")
+    print(f"Broken:      {len(broken)}")
+
+    # Broken links detail
+    if broken:
+        print("\nBROKEN LINKS:")
+        for url, status in broken:
+            print(f"[{status}] {url}")
+
+    # Ask to export
+    save = input("\nSave results to CSV? (y/n): ").strip().lower()
+    if save == "y":
+        filename = f"link_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
+        export_to_csv(all_results, filename)
+
+
+if __name__ == "__main__":
+    print("--- Link Checker Tool ---")
+    website = input("Enter website URL: ").strip()
+    if not website.startswith("http"):
+        website = "https://" + website
+    check_all_links(website)
\ No newline at end of file
diff --git a/LinkChecker/readme.md b/LinkChecker/readme.md
new file mode 100644
index 00000000..807e2e1a
--- /dev/null
+++ b/LinkChecker/readme.md
@@ -0,0 +1,76 @@
+# Link Checker
+
+A Python script that scans a website and detects broken links.
+
+The tool extracts all links from a webpage, checks their HTTP status code, and reports whether they are working or broken. Results can also be exported to a CSV file.
+
+## Features
+
+- Extracts all links from a webpage
+- Detects broken and working links
+- Shows HTTP status codes
+- Displays a summary report
+- Optional CSV export of results
+
+## Requirements
+
+- Python 3.8+
+
+Install dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+Run the script:
+
+```bash
+python link_checker.py
+```
+
+Enter a website URL when prompted:
+
+```
+Enter website URL: https://example.com
+```
+
+## Example Output
+
+```
+--- Link Checker Tool ---
+
+Fetching page: https://example.com
+Found 25 links.
+
+STATUS           URL
+------------------------------------------------------------
+✔ OK             https://example.com/about
+⚠️ Redirect       https://example.com/docs
+❌ Not Found      https://example.com/old-page
+
+SUMMARY
+--------------------
+Total Links: 25
+Working:     23
+Broken:      2
+```
+
+## CSV Export
+
+After scanning, you can save results to a CSV file:
+
+```
+Save results to CSV? (y/n):
+```
+
+A report like this will be generated:
+
+```
+link_report_20260307_143210.csv
+```
+
+## License
+
+MIT License
\ No newline at end of file
diff --git a/LinkChecker/requirements.txt b/LinkChecker/requirements.txt
new file mode 100644
index 00000000..a98ae430
--- /dev/null
+++ b/LinkChecker/requirements.txt
@@ -0,0 +1,2 @@
+requests
+beautifulsoup4
\ No newline at end of file

From 8a8365b8932b9e8b6b6c0b64e0bf2be56e0ac5c0 Mon Sep 17 00:00:00 2001
From: Jaiv Patel <jaivpatel402@gmail.com>
Date: Sat, 7 Mar 2026 11:38:42 +0530
Subject: [PATCH 2/3] Enhance link checker: Improve link extraction, add
 user-agent headers, and refine error handling

---
 LinkChecker/linkchecker.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/LinkChecker/linkchecker.py b/LinkChecker/linkchecker.py
index 76977bea..fa748beb 100644
--- a/LinkChecker/linkchecker.py
+++ b/LinkChecker/linkchecker.py
@@ -4,17 +4,25 @@
 import csv
 from datetime import datetime
 
+
 def get_all_links(url):
+    """
+    Fetches a URL and extracts all unique HTTP/HTTPS links.
+    """
     print(f"Fetching page: {url}")
     try:
         headers = {"User-Agent": "Mozilla/5.0"}
         response = requests.get(url, timeout=10, headers=headers)
         soup = BeautifulSoup(response.text, "html.parser")
         links = set()
+
+        # Find all 'a' tags with an 'href' attribute
         for tag in soup.find_all("a", href=True):
             full_url = urljoin(url, tag["href"])
+            # Ensure we only check web links
             if full_url.startswith("http"):
                 links.add(full_url)
+
         print(f"Found {len(links)} links.")
         return links
     except Exception as e:
@@ -23,14 +31,22 @@ def get_all_links(url):
 
 
 def check_link(url):
+    """
+    Checks a single URL's status. It first tries a HEAD request for efficiency,
+    then falls back to a GET request if needed.
+    """
     try:
         headers = {"User-Agent": "Mozilla/5.0"}
+        # Use a HEAD request to get status without downloading the whole page
         response = requests.head(url, timeout=10, allow_redirects=True, headers=headers)
-        # Some servers don't support HEAD, fallback to GET
+
+        # If HEAD is not allowed (405) or forbidden (403), try a GET request
         if response.status_code in [405, 403]:
             response = requests.get(url, timeout=10, headers=headers)
+
         return response.status_code
     except requests.exceptions.ConnectionError:
+        # Handle cases where the server is not reachable
         return "Connection Error"
     except requests.exceptions.Timeout:
         return "Timeout"
@@ -39,6 +55,9 @@ def check_link(url):
 
 
 def get_status_label(status):
+    """
+    Converts an HTTP status code or error string into a user-friendly label.
+    """
     if isinstance(status, int):
         if status < 300:
             return " ✔ OK"
@@ -54,6 +73,9 @@ def get_status_label(status):
 
 
 def export_to_csv(results, filename):
+    """
+    Exports the list of checked links and their statuses to a CSV file.
+    """
     with open(filename, "w", newline="", encoding="utf-8") as f:
         writer = csv.writer(f)
         writer.writerow(["URL", "Status", "Result"])
@@ -64,6 +86,9 @@ def export_to_csv(results, filename):
 
 
 def check_all_links(website_url):
+    """
+    Main function to orchestrate the link checking process for a given website.
+    """
     links = get_all_links(website_url)
 
     if not links:
@@ -77,6 +102,7 @@ def check_all_links(website_url):
     print(f"{'STATUS':<15} URL")
     print("-" * 60)
 
+    # Iterate through all found links and check their status
     for i, link in enumerate(links, 1):
         status = check_link(link)
         label = get_status_label(status)
@@ -84,6 +110,7 @@ def check_all_links(website_url):
 
         print(f"{label:<15} {link}")
 
+        # Categorize links as working or broken
         if isinstance(status, int) and status < 400:
             working.append((link, status))
         else:
@@ -109,9 +136,11 @@ def check_all_links(website_url):
         export_to_csv(all_results, filename)
 
 
+# --- Script Entry Point ---
 if __name__ == "__main__":
     print("--- Link Checker Tool ---")
     website = input("Enter website URL: ").strip()
+    # Ensure the URL has a scheme (http or https)
     if not website.startswith("http"):
         website = "https://" + website
     check_all_links(website)
\ No newline at end of file

From dba2f7bf5ca64b952ec6893978bd9ed2d8f07ddc Mon Sep 17 00:00:00 2001
From: Jaiv Patel <jaivpatel402@gmail.com>
Date: Sat, 7 Mar 2026 12:04:02 +0530
Subject: [PATCH 3/3] Add Link Checker entry to README with description and
 link

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 5333ecbf..a46aaa50 100644
--- a/README.md
+++ b/README.md
@@ -97,6 +97,7 @@ More information on contributing and the general code of conduct for discussion
 | JSON to YAML converter                   | [JSON to YAML converter](https://github.com/DhanushNehru/Python-Scripts/tree/main/JSON%20to%20YAML)                                                    | Converts JSON file to YAML files. A sample JSON is included for testing.                                                                                          |
 | Keylogger                                | [Keylogger](https://github.com/DhanushNehru/Python-Scripts/tree/main/Keylogger)                                                                        | Keylogger that can track your keystrokes, clipboard text, take screenshots at regular intervals, and records audio.                                               |
 | Keyword - Retweeting                     | [Keyword - Retweeting](https://github.com/DhanushNehru/Python-Scripts/tree/main/Keyword%20Retweet%20Twitter%20Bot)                                     | Find the latest tweets containing given keywords and then retweet them.                                                                                           |
+| Link Checker                             | [Link Checker](https://github.com/JaivPatel07/Python-Scripts/tree/main/LinkChecker) | A Python script that scans a website and detects broken links.                                                                                                    |
 | LinkedIn Bot                             | [LinkedIn Bot](https://github.com/DhanushNehru/Python-Scripts/tree/main/LinkedIn%20Bot)                                                                | Automates the process of searching for public profiles on LinkedIn and exporting the data to an Excel sheet.                                                      |
 | Longitude & Latitude to conical coverter | [Longitude Latitude conical converter](master/Longitude%20Latitude%20conical%20converter)                                                                | Converts Longitude and Latitude to Lambert conformal conic projection.                                                                                            |
 | Mail Sender                              | [Mail Sender](https://github.com/DhanushNehru/Python-Scripts/tree/main/Mail%20Sender)                                                                  | Sends an email.                                                                                                                                                   |