Script para analise de superficie de ataque

Publicado por Dionata Suzin (última atualização em 11/10/2024)

[ Hits: 1.203 ]

Download web_audit_new.py




Script para analise de superficie de ataque

  



Esconder código-fonte

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import re
import json

# Definir o domínio diretamente no script
domain = "exemplo.com.br"

# Função para buscar todas as URLs expostas via crawling
def full_web_crawl(base_url):
    visited_urls = set()
    pending_urls = set([base_url])

    while pending_urls:
        url = pending_urls.pop()
        if url not in visited_urls:
            visited_urls.add(url)
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    soup = BeautifulSoup(response.text, 'html.parser')
                    # Buscar todas as URLs na página atual
                    for link in soup.find_all('a', href=True):
                        absolute_url = urljoin(base_url, link['href'])
                        parsed_url = urlparse(absolute_url)

                        # Filtrar para o domínio base e ignorar âncoras
                        if parsed_url.netloc == urlparse(base_url).netloc:
                            clean_url = absolute_url.split('#')[0]
                            if clean_url not in visited_urls:
                                pending_urls.add(clean_url)

            except requests.RequestException as e:
                print(f"Erro ao acessar {url}: {e}")
    
    return visited_urls

# Função para buscar subdomínios expostos via crt.sh
def find_subdomains(domain):
    subdomains = set()
    try:
        crtsh_url = f"https://crt.sh/?q=%25.{domain}&output=json"
        response = requests.get(crtsh_url)
        if response.status_code == 200:
            data = response.json()
            for entry in data:
                name_value = entry.get("name_value")
                if name_value:
                    subdomains.update(name_value.split("\n"))
    except requests.RequestException as e:
        print(f"Erro ao buscar subdomínios para {domain}: {e}")
    
    return subdomains

# Função para buscar URLs expostas via Google Dorks
def google_dork_search(domain):
    dorks = [
        f"site:{domain} intitle:index of",
        f"site:{domain} ext:php",
        f"site:{domain} ext:sql",
        f"site:{domain} ext:log",
        f"site:{domain} inurl:admin",
        f"site:{domain} inurl:login",
        f"site:{domain} inurl:backup",
        f"site:{domain} ext:bak",
        f"site:{domain} ext:old",
        f"site:{domain} ext:swp",
        f"site:{domain} inurl:config",
        f"site:{domain} ext:xml",
        f"site:{domain} ext:json",
        f"site:{domain} inurl:dbadmin",
        f"site:{domain} inurl:dev",
        f"site:{domain} inurl:test",
        f"site:{domain} inurl:staging"
    ]
    found_urls = []
    for dork in dorks:
        try:
            print(f"Pesquisando por: {dork}")
            url = f"https://www.google.com/search?q={dork}"
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            
            for link in soup.find_all('a'):
                href = link.get('href')
                if href and "url?q=" in href:
                    url_found = href.split("url?q=")[1].split("&")[0]
                    found_urls.append(url_found)
        except requests.exceptions.RequestException as e:
            print(f"Erro ao acessar {dork}: {e}")
    
    return found_urls

# Função para buscar URLs arquivadas usando a API do Wayback Machine
def wayback_machine_search(domain):
    url = f"https://web.archive.org/cdx/search/cdx?url=*.{domain}/*&output=json&fl=original&collapse=urlkey"
    try:
        print(f"Buscando URLs arquivadas via Wayback Machine para {domain}")
        response = requests.get(url)
        response.raise_for_status()
        urls = [entry[0] for entry in response.json()]
        return urls
    except requests.exceptions.RequestException as e:
        print(f"Erro ao acessar Wayback Machine: {e}")
        return []

# Função para realizar web crawling básico para URLs sensíveis, inclusive as não indexadas
def web_crawl(domain):
    sensitive_paths = [
        'admin', 'login', 'backup', 'config', 'private', 'phpmyadmin', 
        'webmail', 'wp-admin', 'dashboard', 'dbadmin', 'test', 'staging',
        'dev', 'sql', 'server-status', 'cgi-bin', 'shell', 'cmd',
        'config.php', 'passwd', 'password', 'database', 'db', 'logs', 
        'temp', 'wp-login.php', 'wp-content', 'uploads', 'downloads', 
        'sensitive-data', 'customer-data', 'user-data', 'backup.zip',
        'backup.tar', 'admin.php', 'access', 'htaccess', 'backup.sql',
        'api', 'debug', 'staging', 'test', 'adminer', 'adminpanel', 'dev',
        'v1', 'v2', 'old', 'outdated', 'backup_old', 'src', 'source', 
        'error', 'log', 'hidden', 'private', 'local', 'files', 'data'
    ]
    found_paths = []
    base_url = f"https://{domain}/"

    for path in sensitive_paths:
        url = f"https://{domain}/{path}"
        try:
            response = requests.get(url, allow_redirects=True)
            if response.status_code == 200:
                # Verifica se a URL não é redirecionada para a página principal
                if url != response.url and domain in response.url:
                    found_paths.append(url)
            else:
                print(f"URL {url} retornou status {response.status_code}")
        except requests.exceptions.RequestException as e:
            print(f"Erro ao acessar {url}: {e}")
    
    return found_paths


# Função para gerar um relatório HTML com as URLs encontradas
def generate_html_report(domain, crawled_urls, subdomains, google_urls, wayback_urls, sensitive_urls):
    html_content = f"""
    <html>
    <head>
        <title>Relatório de URLs Expostas - {domain}</title>
        <style>
            body {{ font-family: Arial, sans-serif; background-color: #f4f4f4; padding: 20px; }}
            table {{ width: 100%; border-collapse: collapse; margin-bottom: 20px; }}
            th, td {{ border: 1px solid #dddddd; padding: 8px; text-align: left; }}
            th {{ background-color: #1E90FF; color: white; }}
            td {{ background-color: #f2f2f2; }}
            h1 {{ color: #1E90FF; }}
            h2 {{ color: #4682B4; }}
            tr:nth-child(even) {{ background-color: #E0FFFF; }}
        </style>
    </head>
    <body>
        <h1>Relatório de URLs Expostas para {domain}</h1>
        
        <h2>URLs Encontradas via Web Crawling:</h2>
        <table>
            <tr><th>URL</th></tr>
    """
    if crawled_urls:
        for url in crawled_urls:
            html_content += f"<tr><td>{url}</td></tr>"
    else:
        html_content += "<tr><td>Nenhuma URL encontrada via crawling.</td></tr>"
    
    html_content += """
        </table>
        <h2>Subdomínios Encontrados:</h2>
        <table>
            <tr><th>Subdomínio</th></tr>
    """
    if subdomains:
        for subdomain in subdomains:
            html_content += f"<tr><td>{subdomain}</td></tr>"
    else:
        html_content += "<tr><td>Nenhum subdomínio encontrado.</td></tr>"

    html_content += """
        </table>
        <h2>Google Dorks:</h2>
        <table>
            <tr><th>URL</th></tr>
    """
    if google_urls:
        for url in google_urls:
            html_content += f"<tr><td>{url}</td></tr>"
    else:
        html_content += "<tr><td>Nenhuma URL encontrada via Google Dorks.</td></tr>"
    
    html_content += """
        </table>
        <h2>Wayback Machine URLs:</h2>
        <table>
            <tr><th>URL</th></tr>
    """
    if wayback_urls:
        for url in wayback_urls:
            html_content += f"<tr><td>{url}</td></tr>"
    else:
        html_content += "<tr><td>Nenhuma URL encontrada via Wayback Machine.</td></tr>"

    html_content += """
        </table>
        <h2>URLs Sensíveis Encontradas:</h2>
        <table>
            <tr><th>URL</th></tr>
    """
    if sensitive_urls:
        for url in sensitive_urls:
            html_content += f"<tr><td>{url}</td></tr>"
    else:
        html_content += "<tr><td>Nenhuma URL sensível encontrada.</td></tr>"
    
    html_content += """
        </table>
    </body>
    </html>
    """
    with open(f"relatorio_{domain}.html", "w") as report_file:
        report_file.write(html_content)
    print(f"Relatório HTML gerado: relatorio_{domain}.html")


if __name__ == "__main__":
    # Descobrir URLs e subdomínios expostos
    crawled_urls = full_web_crawl(f"https://{domain}")
    subdomains = find_subdomains(domain)
    google_urls = google_dork_search(domain)
    wayback_urls = wayback_machine_search(domain)
    sensitive_urls = web_crawl(domain)

    # Gerar relatório
    generate_html_report(domain, crawled_urls, subdomains, google_urls, wayback_urls, sensitive_urls)


Scripts recomendados

RT CRASH - "Quebrando" hash MD5, SHA1, SHA224, SHA256, SHA384 e SHA512

MsgCoder - Codificador de mensagens

Brute Force para MUD

Brute force, algo à aprimorar?

Identificador de spam em Python


  

Comentários
[1] Comentário enviado por maurixnovatrento em 11/10/2024 - 12:53h

Excelente script.
______________________________________________________________________
Inscreva-se no meu Canal: https://www.youtube.com/@LinuxDicasPro
Repositório GitHub do Canal: https://github.com/LinuxDicasPro
Grupo do Telegram: https://t.me/LinuxDicasPro
Meu GitHub Pessoal: https://github.com/mxnt10


Contribuir com comentário




Patrocínio

Site hospedado pelo provedor RedeHost.
Linux banner

Destaques

Artigos

Dicas

Tópicos

Top 10 do mês

Scripts