Source code for expliot_finder.scraper.core.cve_scrapper
"""CVE(Common Vulnerabilities and Exposures) scrapper.
Leaning on version of the service that was captured after the target
scanned by 'vulnerability_scanner' module, this module will try to find most
relevant CVE's in web by using scraping technique. If this module find CVE's
the chance to finding a matching exploit increases. Information detected by
this module will be saved and returned in the following form:
.. code-block:: python
# Returns URL the most suitable CVE to the captured version of the
# service
'https://www.cvedetails.com/cve/CVE-2002-1646/'
"""
__all__ = ("SuitableCVEFinder",)
import re
from typing import Optional
import aiohttp
from bs4 import BeautifulSoup
[docs]class SuitableCVEFinder:
"""Class storing a CVEs scraper that scrap page 'https://www.cvedetails.com'.
Scrapper in this class will find most relevant CVE for captured service.
If the script executes methods in this class, it means that 'sites_finder'
module found page with few CVE's that's are stored in HTML table. The
purpose of the methods in this class is to extract the best suitable CVE
for service that was captured after the target was scanned by module named:
'vulnerability_scanner'.
Attributes:
service_version:
Detected version of the service for which the CVE will be searched for.
cve_table_url:
URL to page with an HTML table containing partially matching CVEs
for the detected service. The scrapper will only pull out the most
suitable CVE.
"""
__slots__ = (
"cve_table_url",
"service_version",
)
def __init__(self, cve_table_url: str, service_version: str) -> None:
"""Init SuitableCVEFinder class.
Args:
cve_table_url:
A page with an HTML table containing partially few CVEs documents.
service_version:
Single detected service version.
"""
self.cve_table_url: str = cve_table_url
self.service_version: str = service_version
def __repr__(self) -> str:
"""Print class name and class attributes.
Returns:
'SuitableCVEFinder' as the class name and attributes of this class.
"""
return f"{self.__class__.__name__}({vars(self)!r})"
[docs] async def get_page_content(self) -> bytes:
"""Create async client session and perform a GET request.
Perform a GET request to page ('self.cve_table_url') with CVE's stored
in HTML table.
Returns: Content of page with few CVE's stored in HTML table.
"""
async with aiohttp.ClientSession() as session:
async with session.get(self.cve_table_url) as response:
return await response.read()
[docs] @staticmethod
async def scrape_cve_table_page(
page_content: bytes, parsed_service_ver: list[str]
) -> Optional[str]:
"""Scrape provided HTML table to find most suitable CVE for detected service.
The 'page_content' will hold a page with an HTML table filled with all
CVE's which partially match service. 'Partially' means that this HTML
table was found by 'sites_finder' module and this module was looking
for CVE by 'service name' not exactly by 'service version'. So this
HTML table will store few CVE's for different versions of captured
service and this scrapper will extract best matching CVE by searching
for the exact version of the captured service. Provide page in pram:
'page_content' must be from domain: (https://www.cvedetails.com).
Args:
parsed_service_ver:
List with a string of numbers from 'service version',
without any letters or words. Using the version of the service
prepared in this way, the scraper will find the most suitable
CVE for this captured service.
page_content:
Content of page from domain: 'https://www.cvedetails.com'
with few CVE's stored in HTML table that partially match
service.
Returns:
URL the most suitable CVE to the captured version of the service.
"""
soup = BeautifulSoup(page_content.decode("UTF-8"), "html.parser")
for element in soup.find_all("td", class_="cvesummarylong"):
if any(word in element.text for word in parsed_service_ver):
cve_a_tag = (
element.find_previous()
.find_previous()
.find_parent()
.find("td")
.find_next("td")
.find("a")
)
return f"https://www.cvedetails.com{cve_a_tag['href']}"
return None
[docs] async def find_suitable_cve(self) -> Optional[list[str]]:
"""Run sequence of functions to start scraping a provided HTML page with CVEs.
This handle will execute functions in following order which:
- Extract the numbers from the version of the service that will be
used to find the most suitable CVE.
- Asynchronously get the whole content of the HTML table with links
to CVEs.
- Using an extracted numbers from captured service version,
asynchronously scrape already downloaded HTML table page in
order to find most suitable CVE for captured service.
Returns:
One single URL to most suitable CVE for captured 'service'.
"""
parsed_service_ver: list[str] = self.extracted_service_ver_in_nums()
page_content: bytes = await self.get_page_content()
if suitable_cve := await self.scrape_cve_table_page(page_content, parsed_service_ver):
return [suitable_cve]
return None