Source code for expliot_finder.scraper.core.sites_finder

"""Search pages with CVE or ready exploits for captured 'service_version'.

Information detected by this module will be saved and returned in the following
form:

.. code-block:: python

    # Returns list of URLs that redirect to CVEs or exploits that match or
    # partially match to captured service version.
    [
        'https://www.exploit-db.com/exploits/21314',
        'https://www.cvedetails.com/vulnerability-list/vendor_id-120/product_id-317/SSH-Ssh2.html',
        ...
    ]
"""

__all__ = ("GoogleSitesFinder",)

from urllib import parse

from requests_html import AsyncHTMLSession, HTMLResponse


[docs]class GoogleSitesFinder: """Finder of ready exploits and CVEs in web for captured 'service'. Using a Google search engine, the methods will make queries to find sites with matching exploits and the CVE for the version of the service that was captured after the target was scanned and currently is iterated in '__main__.py'. If multiple versions of services have been captured then a class('GoogleSitesFinder') instance will be created several times and each time with a next one successively captured version of the service. Found pages will be filtered according to the domain of the page (selected domains contain appropriate content). Sample return by this module: Attributes: service_version: Version of the service that was captured after the target was scanned by 'vulnerability_scanner' module. search_query: String contains ('base_query' + service_version) and by combining those two string we get query that's can be used to search ready exploits and CVEs in google. .. automethod:: __send_search_query .. automethod:: __extract_urls """ __slots__ = ("_search_query",) def __init__(self, service_version: str) -> None: """Init GoogleSitesFinder class. Args: service_version: Captured service version that will be used as a search query for finding a ready exploits and CVE in web. """ self.search_query: str = service_version def __repr__(self) -> str: """Print class name and class attributes. Returns: 'GoogleSitesFinder' as the class name and attributes of this class. """ return f"{self.__class__.__name__}({vars(self)!r})" @property def search_query(self) -> str: """Get google_query. Returns: 'search_query' that will be used in google search engine to find ready exploits or CVE. """ return self._search_query + " exploit" @search_query.setter def search_query(self, service_version: str) -> None: """Set 'search_query' value by combining base query and services version. Base query value: 'https://www.google.co.uk/search?q=' Args: service_version: Version of the service that was captured after the target was scanned by 'vulnerability_scanner' module. """ base_query: str = "https://www.google.co.uk/search?q=" self._search_query = base_query + parse.quote_plus(service_version) @staticmethod async def __send_search_query(search_query: str) -> HTMLResponse: """Send a 'search_query' to async consumable session by using GET request. Args: search_query: Search query used to find ready exploits or CVE's for captured 'service_version'. Returns: HTML response object. The content of the answer is exactly like that itself as if the query was made by google search engine. """ return await AsyncHTMLSession().get(search_query) @staticmethod def __extract_urls(response: HTMLResponse) -> list[str]: """Extract all URLs from HTML response. HTML response will store URLs to different sites and other content. This method will extract only URLs to site from whole HTMLResponse content. Args: response: HTML response, store content returned after executing a query to Google search engine. Returns: Links to different pages extracted from HTML response content. """ return list(response.html.absolute_links)
[docs] @staticmethod def filter_extracted_urls(site: str, urls: list[str]) -> list[str]: """Filter extracted URLs to find pages with CVE or ready exploits. Args: site: The value depends on provided parameter in 'exploit_finder.executor' but can be one of: - 'https://www.exploit-db.com' - 'https://www.cvedetails.com' Only pages with those domains will be returned. urls: Links to different pages extracted from HTML response content. Returns: URLs that redirect to CVEs or exploits that match or partially match to captured service version. """ return [url for url in urls if url.startswith(site)]
[docs] async def search_for_pages(self, site: str) -> list[str]: """Run a functions to find pages with ready exploits or CVEs. Ready exploits and CVEs will be searched for captured version of the service that was captured after the target was scanned by 'vulnerability_scanner' module. Args: site: What site should the scraper look for. Can be one of: - 'https://www.exploit-db.com' - 'https://www.cvedetails.com' Returns: List of pages containing ready-made exploits for detected 'service_version' or pages that contain information about detected 'service_version'. """ response: HTMLResponse = await self.__send_search_query(self.search_query) return self.filter_extracted_urls(site, urls=self.__extract_urls(response))