Source code for usp.web_client.abstract_client
"""Abstract web client class."""
import abc
from http import HTTPStatus
from typing import Optional
RETRYABLE_HTTP_STATUS_CODES = {
# Some servers return "400 Bad Request" initially but upon retry start working again, no idea why
int(HTTPStatus.BAD_REQUEST),
# If we timed out requesting stuff, we can just try again
int(HTTPStatus.REQUEST_TIMEOUT),
# If we got rate limited, it makes sense to wait a bit
int(HTTPStatus.TOO_MANY_REQUESTS),
# Server might be just fine on a subsequent attempt
int(HTTPStatus.INTERNAL_SERVER_ERROR),
# Upstream might reappear on a retry
int(HTTPStatus.BAD_GATEWAY),
# Service might become available again on a retry
int(HTTPStatus.SERVICE_UNAVAILABLE),
# Upstream might reappear on a retry
int(HTTPStatus.GATEWAY_TIMEOUT),
# (unofficial) 509 Bandwidth Limit Exceeded (Apache Web Server/cPanel)
509,
# (unofficial) 598 Network read timeout error
598,
# (unofficial, nginx) 499 Client Closed Request
499,
# (unofficial, Cloudflare) 520 Unknown Error
520,
# (unofficial, Cloudflare) 521 Web Server Is Down
521,
# (unofficial, Cloudflare) 522 Connection Timed Out
522,
# (unofficial, Cloudflare) 523 Origin Is Unreachable
523,
# (unofficial, Cloudflare) 524 A Timeout Occurred
524,
# (unofficial, Cloudflare) 525 SSL Handshake Failed
525,
# (unofficial, Cloudflare) 526 Invalid SSL Certificate
526,
# (unofficial, Cloudflare) 527 Railgun Error
527,
# (unofficial, Cloudflare) 530 Origin DNS Error
530,
}
"""HTTP status codes on which a request should be retried."""
[docs]class AbstractWebClientResponse(object, metaclass=abc.ABCMeta):
"""
Abstract response.
"""
pass
[docs]class AbstractWebClientSuccessResponse(AbstractWebClientResponse, metaclass=abc.ABCMeta):
"""
Successful response.
"""
[docs] @abc.abstractmethod
def status_code(self) -> int:
"""
Return HTTP status code of the response.
:return: HTTP status code of the response, e.g. 200.
"""
raise NotImplementedError("Abstract method.")
[docs] @abc.abstractmethod
def status_message(self) -> str:
"""
Return HTTP status message of the response.
:return: HTTP status message of the response, e.g. "OK".
"""
raise NotImplementedError("Abstract method.")
[docs] @abc.abstractmethod
def raw_data(self) -> bytes:
"""
Return encoded raw data of the response.
:return: Encoded raw data of the response.
"""
raise NotImplementedError("Abstract method.")
[docs]class WebClientErrorResponse(AbstractWebClientResponse, metaclass=abc.ABCMeta):
"""
Error response.
"""
__slots__ = [
'_message',
'_retryable',
]
def __init__(self, message: str, retryable: bool):
"""
Constructor.
:param message: Message describing what went wrong.
:param retryable: True if the request should be retried.
"""
super().__init__()
self._message = message
self._retryable = retryable
[docs] def message(self) -> str:
"""
Return message describing what went wrong.
:return: Message describing what went wrong.
"""
return self._message
[docs] def retryable(self) -> bool:
"""
Return True if request should be retried.
:return: True if request should be retried.
"""
return self._retryable
[docs]class AbstractWebClient(object, metaclass=abc.ABCMeta):
"""
Abstract web client to be used by the sitemap fetcher.
"""
[docs] @abc.abstractmethod
def set_max_response_data_length(self, max_response_data_length: int) -> None:
"""
Set the maximum number of bytes that the web client will fetch.
:param max_response_data_length: Maximum number of bytes that the web client will fetch.
"""
raise NotImplementedError("Abstract method.")
[docs] @abc.abstractmethod
def get(self, url: str) -> AbstractWebClientResponse:
"""
Fetch an URL and return a response.
Method shouldn't throw exceptions on connection errors (including timeouts); instead, such errors should be
reported via Response object.
:param url: URL to fetch.
:return: Response object.
"""
raise NotImplementedError("Abstract method.")