josh0xA · QRcode1337 · Aug 6, 2025
diff --git a/.DS_Store b/.DS_Store
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,35 @@
+# Changelog
+
+## [3.1.0] – 2025-07-17
+
+### Added
+* **User-Agent API demo** – `test_headers.py` showcasing every helper on the new `Headers` class.
+* **Browser selection CLI switch** – `-b / --browser` option in `darkdump.py` to pick a UA family (Chrome, Firefox, IE / Edge, Opera, Safari, Mobile).
+
+### Changed
+#### `headers/agents.py`
+* Refactored into a fully-featured **`Headers`** manager:
+  * Categorised >200 UA strings by browser family.
+  * Fixed one malformed UA and removed duplicates.
+  * New helpers:
+    * `get_random_agent()` – any UA.
+    * `get_random_by_browser(browser_type)` – browser-specific UA.
+    * `get_random_by_os(os_type)` – OS-specific UA.
+    * `get_modern_agent()` – modern (2022+) UA.
+  * Added doc-strings, type hints and runtime validation.
+  * Maintains legacy `user_agents` list for backward compatibility.
+
+#### `darkdump.py`
+* Integrates the new `Headers` API:
+  * Picks UA through the helper methods.
+  * Accepts `-b / --browser` argument.
+  * Debug mode now prints the selected UA.
+* Minor docs & argument-parsing improvements.
+
+### Documentation
+* **README.md** updated:
+  * Describes the new UA system and CLI switch.
+  * Adds example commands and usage notes.
+
+### Notes
+* Version bumped to **3.1.0** to reflect backward-compatible feature upgrade.
diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@ Darkdump is a OSINT interface for carrying out deep web investgations written in
 1) ``git clone https://github.com/josh0xA/darkdump``<br/>
 2) ``cd darkdump``<br/>
 3) ``python3 -m pip install -r requirements.txt``<br/>
+   • Make sure `PySocks` is installed (it is now listed in `requirements.txt`).  
 4) ``python3 darkdump.py --help``<br/>
 
 ### Tor Configuration 
@@ -32,10 +33,18 @@ Replace `[YourHashedPasswordHere]` with a hashed password which can be generated
 Linux: `sudo systemctl start tor.service`<br/>
 MacOS: `brew services start tor`<br/>
 
+> **Port note:**  
+> • **Tor Browser** exposes a SOCKS5 proxy on **`localhost:9150`** (this is the default that Darkdump now uses).  
+> • The system Tor daemon typically listens on **`localhost:9050`**.  
+> • If you are running the daemon on 9050 instead of Tor Browser, either edit `Configuration.__socks5init__` in `darkdump.py` or start Tor Browser so the 9150 proxy is available.  
+
+Darkdump will test the connection automatically and print an error if the proxy cannot be reached.
+
 ### Example Queries: 
 `python3 darkdump.py -q "hacking" -a 10 --scrape --proxy` - search for 10 links and scrape each site <br/>
 `python3 darkdump.py -q "free movies" -a 25` - don't scrape, just return 25 links for that query (does not require tor) <br/>
 `python3 darkdump.py -q "marketplaces" -a 15 --scrape --proxy -i` - search for 10 links and scrape each site as well as find and store images.
+`python3 darkdump.py -q "cryptography" -a 5 --scrape --proxy -b firefox` – scrape 5 results while forcing a Firefox-specific User-Agent.  
 
 ## Menu
 ```
@@ -61,8 +70,34 @@ options:
   -p, --proxy           use tor proxy for scraping
   -i, --images          scrape images and visual content from the site
   -s, --scrape          scrape the actual site for content and look for keywords
+  -b {chrome,firefox,ie,edge,opera,safari,mobile}, --browser {chrome,firefox,ie,edge,opera,safari,mobile}
+                        specify the browser family to use when randomly
+                        choosing a User-Agent header (default is a completely
+                        random choice)
+
+```
+
+## Enhanced User-Agent Handling  🚀
+Darkdump ships with an overhauled **`headers/agents.py`** module that now:
 
+* Organises more than 200 User-Agent strings by browser family (Chrome, Firefox, IE / Edge, Opera, Safari, Mobile).  
+* Provides convenience helpers:
+  * `Headers.get_random_agent()` – any UA  
+  * `Headers.get_random_by_browser('chrome')` – browser-specific UA  
+  * `Headers.get_random_by_os('windows')` – OS-specific UA  
+  * `Headers.get_modern_agent()` – modern (2022+) UA  
+* Fixes malformed strings and removes duplicates.
+
+Use the new **`-b / --browser`** CLI switch (see Menu above) to restrict the UA that Darkdump advertises during requests.
+
+For a hands-on tour of the new API, run the helper script:
+
+```bash
+python test_headers.py
 ```
+
+It prints examples for every helper method and shows category counts.
+
 ## Visual
 <p align="center">
   <img src="imgs/darkdump_example.png">

diff --git a/banner/banner.py b/banner/banner.py
@@ -28,7 +28,7 @@ class Banner(object):
     def LoadDarkdumpBanner(self):
         try:
             from termcolor import cprint, colored
-            banner = '''
+            banner = r'''
      _            _       _                            __
   __| | __ _ _ __| | ____| |_   _ _ __ ___  _ __      / /
  / _` |/ _` | '__| |/ / _` | | | | '_ ` _ \| '_ \    / / 

diff --git a/darkdump.py b/darkdump.py
@@ -89,7 +89,8 @@ class Configuration:
     descriptions = []
     urls = []
 
-    __socks5init__ = "socks5h://localhost:9050"
+    # Default Tor Browser SOCKS port is 9150 (9050 is used by the system daemon)
+    __socks5init__ = "socks5h://localhost:9150"
     __darkdump_api__ = "https://ahmia.fi/search/?q="
 
 class Platform(object):
@@ -120,15 +121,18 @@ def clean_screen(self):
             else: os.system('cls')
         else: pass
 
-    def check_tor_connection(self, proxy_config):
+    def check_tor_connection(self, proxy_config, *, debug: bool = False):
         test_url = 'http://api.ipify.org' 
         try:
             response = requests.get(test_url, proxies=proxy_config, timeout=10)
             print(f"{Colors.BOLD + Colors.G}Tor service is active. {Colors.END}")
             print(f"{Colors.BOLD + Colors.P}Current IP Address via Tor: {Colors.END}{response.text}")
             return True  # Connection was successful
-        except:
-            print(f"{Colors.BOLD + Colors.R} Tor is inactive or not configured properly. Cannot scrape. {Colors.END}")
+        except Exception as exc:
+            print(f"{Colors.BOLD + Colors.R}Tor is inactive or not configured properly. Cannot scrape.{Colors.END}")
+            if debug:
+                # Provide the underlying reason when debug flag is set
+                print(f"{Colors.BOLD + Colors.R}[DEBUG] Tor connectivity error: {exc}{Colors.END}")
             return False
 
 class Darkdump(object):
@@ -220,9 +224,50 @@ def extract_document_links(self, soup):
         return links
 
 
-    def crawl(self, query, amount, use_proxy=False, scrape_sites=False, scrape_images=False, debug_mode=False):
-        headers = {'User-Agent': random.choice(Headers.user_agents)}
-        proxy_config = {'http': 'socks5h://localhost:9050', 'https': 'socks5h://localhost:9050'} if use_proxy else {}
+    def crawl(
+        self,
+        query,
+        amount,
+        use_proxy: bool = False,
+        scrape_sites: bool = False,
+        scrape_images: bool = False,
+        debug_mode: bool = False,
+        browser_type: str | None = None,
+    ):
+        """
+        Crawl Ahmia results and optionally scrape target onion sites.
+
+        Parameters
+        ----------
+        browser_type : str | None
+            If provided, restrict the randomly-selected User-Agent header to the
+            specified browser family (chrome, firefox, ie, edge, opera, safari,
+            mobile).  Falls back to a completely random User-Agent when omitted.
+        """
+        # Determine an appropriate user-agent string
+        if browser_type:
+            try:
+                user_agent = Headers.get_random_by_browser(browser_type)
+            except ValueError:
+                # Fallback to a fully random UA if an invalid browser_type slips
+                # through (should be prevented by argparse choices).
+                user_agent = random.choice(Headers.user_agents)
+        else:
+            user_agent = random.choice(Headers.user_agents)
+
+        # ------------------------------------------------------------------ #
+        # Debug helper – show chosen User-Agent when debug mode is enabled
+        # ------------------------------------------------------------------ #
+        if debug_mode:
+            browser_lbl = browser_type if browser_type else "random"
+            print(f"{Colors.BOLD}{Colors.C}[DEBUG] Using User-Agent ({browser_lbl}): "
+                  f"{Colors.END}{user_agent}")
+
+        headers = {"User-Agent": user_agent}
+        proxy_config = (
+            {'http': Configuration.__socks5init__, 'https': Configuration.__socks5init__}
+            if use_proxy else {}
+        )
 
         # Fetching the initial search page
         try:
@@ -236,10 +281,12 @@ def crawl(self, query, amount, use_proxy=False, scrape_sites=False, scrape_image
 
         seen_urls = set()  # This set will store URLs to avoid duplicates
 
-        if scrape_sites: 
-            if Platform(True).check_tor_connection(proxy_config) == False: return
+        if scrape_sites:
+            # Forward the debug flag so we reveal connection errors when requested
+            if not Platform(True).check_tor_connection(proxy_config, debug=debug_mode):
+                return
 
-        for idx, result in enumerate(second_results[:min(amount + 1, len(second_results))], start=1):
+        for idx, result in enumerate(second_results[:min(amount, len(second_results))]):
             site_url = result.find('cite').text
             if "http://" not in site_url and "https://" not in site_url:
                 site_url = "http://" + site_url
@@ -317,6 +364,13 @@ def darkdump_main():
     parser.add_argument("-i", "--images", help="scrape images and visual content from the site", action="store_true")
     parser.add_argument("-s", "--scrape", help="scrape the actual site for content and look for keywords", action="store_true")
     parser.add_argument("-d", "--debug", help="enable debug output", action="store_true")
+    parser.add_argument(
+        "-b", "--browser",
+        help=("specify browser type for the User-Agent header "
+              "(chrome, firefox, ie, edge, opera, safari, mobile)"),
+        choices=['chrome', 'firefox', 'ie', 'edge', 'opera', 'safari', 'mobile'],
+        type=str
+    )
 
     args = parser.parse_args()
 
@@ -338,7 +392,15 @@ def darkdump_main():
 
     if args.query:
         print(f"Searching For: {args.query} and showing {args.amount} results...\nIndexing is viable, skipping dead onions.\n")
-        Darkdump().crawl(args.query, args.amount, use_proxy=args.proxy, scrape_sites=args.scrape, scrape_images=args.images, debug_mode=args.debug)
+        Darkdump().crawl(
+            args.query,
+            args.amount,
+            use_proxy=args.proxy,
+            scrape_sites=args.scrape,
+            scrape_images=args.images,
+            debug_mode=args.debug,
+            browser_type=args.browser
+        )
     else:
         print("[~] Note: No query arguments were passed. Please supply a query to search.")
 

diff --git a/headers/__pycache__/agents.cpython-313.pyc b/headers/__pycache__/agents.cpython-313.pyc