
--------------------------------------------------------------------------------
QUERY WAYBACK MACHINE
--------------------------------------------------------------------------------

Add URLS to file domains.txt

testphp.vulnweb.com
demo.testfire.net
juice-shop.herokuapp.com

--------------------------------------------------------------------------------
QUERY WAYBACK MACHINE
--------------------------------------------------------------------------------

# Single domain — quick test
waybackurls example.com > wayback_raw.txt

# Multiple domains from domains.txt — with rate limiting
while read -r domain; do
    echo "[*] Fetching: $domain"
    waybackurls "$domain" >> wayback_raw.txt
    sleep 0.5   # be polite to the archive APIs
done < domains.txt

echo "[✓] Total lines collected: $(wc -l < wayback_raw.txt)"

--------------------------------------------------------------------------------
CLEAN INITIAL LIST
--------------------------------------------------------------------------------

sort -u wayback_raw.txt \
  | grep -E '^https?://' \
  | sed 's/#.*$//' \
  | sed 's:/$::' \
  > wayback_clean.txt

echo "[✓] Unique clean URLs: $(wc -l < wayback_clean.txt)"

--------------------------------------------------------------------------------
LOOK FOR POTENTIALLY SENSITIVE DATA
--------------------------------------------------------------------------------

pattern='/(login|admin|wp-login|wp-admin|register|api|wp-json|xmlrpc|uploads|backup|\.env|phpmyadmin|config|\.git|\.svn|dashboard|console)'
grep -iE "$pattern" wayback_clean.txt | sort -u > sensitive_paths.txt

# Also catch credential-like patterns embedded in query strings
grep -iE '(api[_-]?key|token|secret|password|passwd|auth|credential)' \
  wayback_clean.txt > potential_secrets.txt

echo "[✓] Sensitive paths  : $(wc -l < sensitive_paths.txt)"
echo "[✓] Potential secrets: $(wc -l < potential_secrets.txt)"

--------------------------------------------------------------------------------
LOOK FOR URLS WITH PARAMETERS (POTENTIAL VULNERABILITIES)
--------------------------------------------------------------------------------

grep '\?' wayback_clean.txt | sort -u > urls_with_params.txt

echo "[✓] URLs with parameters: $(wc -l < urls_with_params.txt)"


--------------------------------------------------------------------------------
SEARCH URLS THAT MAY BE WEAK AGAINST SPECIFIC VULNERABILITIES
--------------------------------------------------------------------------------

# 🔴 XSS — parameters that are likely reflected in the page output
cat wayback_clean.txt | gf xss > xss_candidates.txt

# 🟠 SQLi — parameters likely passed to a database query
cat wayback_clean.txt | gf sqli > sqli_candidates.txt

# 🟡 SSRF — parameters containing URLs (redirect targets, remote resources)
cat wayback_clean.txt | gf ssrf > ssrf_candidates.txt

# 🔵 Open Redirect — redirect / next / url parameters
cat wayback_clean.txt | gf redirect > redirect_candidates.txt

echo "XSS:       $(wc -l < xss_candidates.txt) candidates"
echo "SQLi:      $(wc -l < sqli_candidates.txt) candidates"
echo "SSRF:      $(wc -l < ssrf_candidates.txt) candidates"
echo "Redirect:  $(wc -l < redirect_candidates.txt) candidates"


httpx -l sensitive_paths.txt \
  -threads 1 -rate-limit 1 -silent \
  -silent -status-code -title \
  -o live_sensitive.txt

# Probe parameterised URLs
httpx -l urls_with_params.txt \
  -threads 1 -rate-limit 1 -silent \
  -silent -status-code -title \
  -o live_params.txt

echo "[✓] Live sensitive paths : $(wc -l < live_sensitive.txt)"
echo "[✓] Live parameterised   : $(wc -l < live_params.txt)"


nuclei -l live_sensitive.txt \
  -t exposures/ \
  -t misconfiguration/ \
  -t takeovers/ \
  -severity low,medium,high,critical \
  -o nuclei_results.txt