from argparse import ArgumentParser
import asyncio
import codecs
import concurrent.futures
import hashlib
import inspect
import functools
import logging
import mimetypes
import os
import pycurl
import re
import cchardet
import sentry_sdk
import signal
import sys
from tornado.curl_httpclient import CurlAsyncHTTPClient, CurlError
import tornado.simple_httpclient
import tornado.httpclient
import tornado.ioloop
import tornado.web
import traceback
import web_monitoring_diff
from .. import basic_diffs, html_render_diff, html_links_diff
from ..exceptions import UndiffableContentError, UndecodableContentError
from ..utils import shutdown_executor_in_loop, Signal
logger = logging.getLogger(__name__)
# Track errors with Sentry.io. It will automatically detect the `SENTRY_DSN`
# environment variable. If not set, all its methods will operate conveniently
# as no-ops.
sentry_sdk.init(ignore_errors=[KeyboardInterrupt])
# Tornado logs any non-success response at ERROR level, which Sentry captures
# by default. We don't really want those logs.
sentry_sdk.integrations.logging.ignore_logger('tornado.access')
DIFFER_PARALLELISM = int(os.environ.get('DIFFER_PARALLELISM', 10))
RESTART_BROKEN_DIFFER = os.environ.get('RESTART_BROKEN_DIFFER', 'False').strip().lower() == 'true'
# Map tokens in the REST API to functions in modules.
# The modules do not have to be part of the web_monitoring_diff package.
DIFF_ROUTES = {
"length": basic_diffs.compare_length,
"identical_bytes": basic_diffs.identical_bytes,
"side_by_side_text": basic_diffs.side_by_side_text,
"links": html_links_diff.links_diff_html,
"links_json": html_links_diff.links_diff_json,
# applying diff-match-patch (dmp) to strings (no tokenization)
"html_text_dmp": basic_diffs.html_text_diff,
"html_source_dmp": basic_diffs.html_source_diff,
"html_token": html_render_diff.html_diff_render,
}
# Optional, experimental diffs.
try:
from ..experimental import htmltreediff
DIFF_ROUTES["html_tree"] = htmltreediff.diff
except ModuleNotFoundError:
...
try:
from ..experimental import htmldiffer
DIFF_ROUTES["html_perma_cc"] = htmldiffer.diff
except ModuleNotFoundError:
...
# Matches a <meta> tag in HTML used to specify the character encoding:
# <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
# <meta charset="utf-8" />
META_TAG_PATTERN = re.compile(
b'<meta[^>]+charset\\s*=\\s*[\'"]?([^>]*?)[ /;\'">]',
re.IGNORECASE)
# Matches an XML prolog that specifies character encoding:
# <?xml version="1.0" encoding="ISO-8859-1"?>
XML_PROLOG_PATTERN = re.compile(
b'<?xml\\s[^>]*encoding=[\'"]([^\'"]+)[\'"].*\?>',
re.IGNORECASE)
MAX_BODY_SIZE = None
try:
MAX_BODY_SIZE = int(os.environ.get('DIFFER_MAX_BODY_SIZE', 0))
if MAX_BODY_SIZE < 0:
print('DIFFER_MAX_BODY_SIZE must be >= 0', file=sys.stderr)
sys.exit(1)
except ValueError:
print('DIFFER_MAX_BODY_SIZE must be an integer', file=sys.stderr)
sys.exit(1)
class LimitedCurlAsyncHTTPClient(CurlAsyncHTTPClient):
"""
A customized version of Tornado's CurlAsyncHTTPClient that adds support for
maximum response body sizes. The API is the same as that for Tornado's
SimpleAsyncHTTPClient: set ``max_body_size`` to an integer representing the
maximum number of bytes in a response body.
"""
def initialize(self, max_clients=10, defaults=None, max_body_size=None):
self.max_body_size = max_body_size
defaults = defaults or {}
defaults['prepare_curl_callback'] = self.prepare_curl
super().initialize(max_clients=max_clients, defaults=defaults)
def prepare_curl(self, curl):
if self.max_body_size:
# NOTE: cURL's docs suggest this doesn't work if the server doesn't
# send a Content-Length header, but it seems to do just fine in
# tests. ¯\_(ツ)_/¯
curl.setopt(pycurl.MAXFILESIZE, self.max_body_size)
HTTP_CLIENT = LimitedCurlAsyncHTTPClient
if os.getenv('USE_SIMPLE_HTTP_CLIENT'):
HTTP_CLIENT = None
tornado.httpclient.AsyncHTTPClient.configure(HTTP_CLIENT,
max_body_size=MAX_BODY_SIZE)
def get_http_client():
return tornado.httpclient.AsyncHTTPClient()
class PublicError(tornado.web.HTTPError):
"""
Customized version of Tornado's HTTP error designed for reporting publicly
visible error messages. Please always raise this instead of calling
`send_error()` directly, since it lets you attach a user-visible
explanation of what went wrong.
Parameters
----------
status_code : int, default: 500
Status code for the response.
public_message : str, optional
Textual description of the error. This will be publicly visible in
production mode, unlike `log_message`.
log_message : str, optional
Error message written to logs and to error tracking service. Will be
included in the HTTP response only in debug mode. Same as the
`log_message` parameter to `tornado.web.HTTPError`, but with no
interpolation.
extra : dict, optional
Dict of additional keys and values to include in the error response.
"""
def __init__(self, status_code=500, public_message=None, log_message=None,
extra=None, **kwargs):
self.extra = extra or {}
if public_message is not None:
if 'error' not in self.extra:
self.extra['error'] = public_message
if log_message is None:
log_message = public_message
super().__init__(status_code, log_message, **kwargs)
class MockRequest:
"An HTTPRequest-like object for local file:/// requests."
def __init__(self, url):
self.url = url
class MockResponse:
"An HTTPResponse-like object for local file:/// requests."
def __init__(self, url, body, headers=None):
self.request = MockRequest(url)
self.body = body
self.headers = headers
self.error = None
if self.headers is None:
self.headers = {}
if 'Content-Type' not in self.headers:
self.headers.update(self._get_content_type_headers_from_url(url))
@staticmethod
def _get_content_type_headers_from_url(url):
# If the extension is not recognized, assume text/html
headers = {'Content-Type': 'text/html'}
content_type, content_encoding = mimetypes.guess_type(url)
if content_type is not None:
headers['Content-Type'] = content_type
if content_encoding is not None:
headers['Content-Encoding'] = content_encoding
return headers
DEBUG_MODE = os.environ.get('DIFFING_SERVER_DEBUG', 'False').strip().lower() == 'true'
VALIDATE_TARGET_CERTIFICATES = \
os.environ.get('VALIDATE_TARGET_CERTIFICATES', 'False').strip().lower() == 'true'
access_control_allow_origin_header = \
os.environ.get('ACCESS_CONTROL_ALLOW_ORIGIN_HEADER')
def initialize_diff_worker():
signal.signal(signal.SIGINT, signal.SIG_IGN)
class DiffServer(tornado.web.Application):
terminating = False
server = None
def listen(self, port, address='', **kwargs):
self.server = super().listen(port, address, **kwargs)
return self.server
async def shutdown(self, immediate=False):
"""
Shut down the server as gracefully as possible. If `immediate` is True,
the server will kill any in-progress diff processes immediately.
Otherwise, diffs are allowed to try and finish.
"""
self.terminating = True
if self.server:
self.server.stop()
await self.shutdown_differs(immediate)
if self.server:
await self.server.close_all_connections()
async def shutdown_differs(self, immediate=False):
"""Stop all child processes used for running diffs."""
differs = self.settings.get('diff_executor')
if differs:
if immediate:
# NOTE: this might be fragile since we are grabbing a private
# attribute. One alternative is to use psutil to find all child
# pids and indiscriminately kill them, but that has its own
# issues.
for child in differs._processes.values():
child.kill()
else:
await shutdown_executor_in_loop(differs)
async def quit(self, immediate=False, code=0):
await self.shutdown(immediate=immediate)
tornado.ioloop.IOLoop.current().stop()
if code:
sys.exit(code)
def handle_signal(self, signal_type, frame):
"""Handle a signal by shutting down the application and IO loop."""
loop = tornado.ioloop.IOLoop.current()
async def shutdown_and_stop():
try:
immediate = self.terminating
method = 'immediately' if immediate else 'gracefully'
print(f'Shutting down server {method}...')
await self.shutdown(immediate=immediate)
loop.stop()
print('Shutdown complete.')
except Exception:
logger.exception('Failed to gracefully stop server!')
sys.exit(1)
loop.add_callback_from_signal(shutdown_and_stop)
class BaseHandler(tornado.web.RequestHandler):
def set_default_headers(self):
if access_control_allow_origin_header is not None:
if 'allowed_origins' not in self.settings:
self.settings['allowed_origins'] = \
set([origin.strip() for origin
in access_control_allow_origin_header.split(',')])
req_origin = self.request.headers.get('Origin')
if req_origin:
allowed = self.settings.get('allowed_origins')
if allowed and (req_origin in allowed or '*' in allowed):
self.set_header('Access-Control-Allow-Origin', req_origin)
self.set_header('Access-Control-Allow-Credentials', 'true')
self.set_header('Access-Control-Allow-Headers', 'x-requested-with')
self.set_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
def options(self):
# no body
self.set_status(204)
self.finish()
class DiffHandler(BaseHandler):
# subclass must define `differs` attribute
# If query parameters repeat, take last one.
# Decode clean query parameters into unicode strings and cache the results.
@functools.lru_cache()
def decode_query_params(self):
query_params = {k: v[-1].decode() for k, v in
self.request.arguments.items()}
return query_params
# Compute our own ETag header values.
def compute_etag(self):
# We're not actually hashing content for this, since that is expensive.
validation_bytes = str(
web_monitoring_diff.__version__
+ self.request.path
+ str(self.decode_query_params())
).encode('utf-8')
# Uses the "weak validation" directive since we don't guarantee that future
# responses for the same diff will be byte-for-byte identical.
etag = f'W/"{web_monitoring_diff.utils.hash_content(validation_bytes)}"'
return etag
async def get(self, differ):
# Skip a whole bunch of work if possible.
self.set_etag_header()
if self.check_etag_header():
self.set_status(304)
self.finish()
return
# Find the diffing function registered with the name given by `differ`.
try:
func = self.differs[differ]
except KeyError:
raise PublicError(404, f'Unknown diffing method: `{differ}`. '
f'You can get a list of '
f'supported differs from '
f'the `/` endpoint.')
query_params = self.decode_query_params()
# The logic here is a bit tortured in order to allow one or both URLs
# to be local files, while still optimizing the common case of two
# remote URLs that we want to fetch in parallel.
try:
urls = {param: query_params.pop(param) for param in ('a', 'b')}
except KeyError:
raise PublicError(400,
'Malformed request. You must provide a URL '
'as the value for both `a` and `b` query '
'parameters.')
# TODO: Add caching of fetched URIs.
requests = [self.fetch_diffable_content(url,
query_params.pop(f'{param}_hash', None),
query_params)
for param, url in urls.items()]
content = await asyncio.gather(*requests)
# Pass the bytes and any remaining args to the diffing function.
res = await self.diff(func, content[0], content[1], query_params)
res['version'] = web_monitoring_diff.__version__
# Echo the client's request unless the differ func has specified
# somethine else.
res.setdefault('type', differ)
self.write(res)
async def fetch_diffable_content(self, url, expected_hash, query_params):
"""
Fetch and validate a content to diff from a given URL.
"""
response = None
# For testing convenience, support file:// URLs in development.
if url.startswith('file://'):
if os.environ.get('WEB_MONITORING_APP_ENV') == 'production':
raise PublicError(403, 'Local files cannot be used in '
'production environment.')
with open(url[7:], 'rb') as f:
body = f.read()
response = MockResponse(url, body)
# Only support HTTP(S) URLs.
elif not url.startswith('http://') and not url.startswith('https://'):
raise PublicError(400,
f'URL must use HTTP or HTTPS protocol: "{url}"',
'Invalid URL for upstream content',
extra={'url': url})
else:
# Include request headers defined by the query param
# `pass_headers=HEADER_NAMES` in the upstream request. This is
# useful for passing data like cookie headers. HEADER_NAMES is a
# comma-separated list of HTTP header names.
headers = {}
header_keys = query_params.get('pass_headers')
if header_keys:
for header_key in header_keys.split(','):
header_key = header_key.strip()
header_value = self.request.headers.get(header_key)
if header_value:
headers[header_key] = header_value
try:
client = get_http_client()
response = await client.fetch(url, headers=headers,
validate_cert=VALIDATE_TARGET_CERTIFICATES)
except ValueError as error:
raise PublicError(400, str(error))
# Only raised by the simple client and not by the cURL client.
except OSError as error:
raise PublicError(502,
f'Could not fetch "{url}": {error}',
'Could not fetch upstream content',
extra={'url': url, 'cause': str(error)})
# --- SIMPLE CLIENT ERRORS ----------------------------------------
except tornado.simple_httpclient.HTTPTimeoutError:
raise PublicError(504,
f'Timed out while fetching "{url}"',
'Could not fetch upstream content',
extra={'url': url})
except tornado.simple_httpclient.HTTPStreamClosedError:
# Unfortunately we get pretty ambiguous info if the connection
# was closed because we exceeded the max size. :(
message = f'The connection was closed while fetching "{url}"'
if client.max_body_size:
message += (f' -- this may have been caused by a large '
f'response (the maximum diffable response is '
f'{client.max_body_size} bytes)')
raise PublicError(502,
message,
'Connection closed while fetching upstream',
extra={'url': url,
'max_size': client.max_body_size})
# --- CURL CLIENT ERRORS ------------------------------------------
except CurlError as error:
# Documentation for cURL error codes:
# https://curl.haxx.se/libcurl/c/libcurl-errors.html
# PyCurl has constants named `E_*` vs. libcurl's `CURLE_*`
if error.errno == pycurl.E_URL_MALFORMAT:
raise PublicError(400,
str(error),
'Invalid URL for cURL',
extra={'url': url})
# TODO: raise a nicer error from LimitedCurlAsyncHTTPClient
elif error.errno == pycurl.E_FILESIZE_EXCEEDED:
raise PublicError(502,
f'Upstream response too big for "{url}"'
f'(max: {client.max_body_size} bytes)',
'Upstream content too big',
extra={'url': url,
'max_size': client.max_body_size})
elif (error.errno == pycurl.E_COULDNT_RESOLVE_PROXY
or error.errno == pycurl.E_COULDNT_CONNECT
or error.errno == 8 # E_WEIRD_SERVER_REPLY
or error.errno == pycurl.E_REMOTE_ACCESS_DENIED
or error.errno == pycurl.E_HTTP2):
raise PublicError(502,
f'Could not fetch "{url}": {error}',
'Could not fetch upstream content',
extra={'url': url, 'cause': str(error)})
elif error.errno == pycurl.E_OPERATION_TIMEDOUT:
raise PublicError(504,
f'Timed out while fetching "{url}"',
'Could not fetch upstream content',
extra={'url': url})
else:
raise PublicError(502,
f'Unknown error fetching "{url}"',
f'Unknown error fetching upstream content: {error}',
extra={'url': url})
# --- COMMON ERRORS SUPPORTED BY ALL CLIENTS ----------------------
except tornado.httpclient.HTTPError as error:
# If the response is actually coming from a web archive,
# allow error codes. The Memento-Datetime header indicates
# the response is an archived one, and not an actual failure
# to respond with the desired content.
if error.response is not None and \
error.response.headers.get('Memento-Datetime') is not None:
response = error.response
else:
code = error.response and error.response.code
raise PublicError(502,
(f'Received a {code or "?"} '
f'status while fetching "{url}": '
f'{error}'),
log_message='Could not fetch upstream content',
extra={'type': 'UPSTREAM_ERROR',
'url': url,
'upstream_code': code})
if response and expected_hash:
actual_hash = hashlib.sha256(response.body).hexdigest()
if actual_hash != expected_hash:
raise PublicError(502,
(f'Fetched content at "{url}" does not '
f'match hash "{expected_hash}".'),
log_message='Could not fetch upstream content',
extra={'type': 'HASH_MISMATCH',
'url': url,
'expected_hash': expected_hash,
'actual_hash': actual_hash})
return response
# TODO: we should split out all the management of the executor and diffing
# (so this, get_diff_executor, caller, etc.) into a separate object owned
# by the server so we don't need weird bits checking the server's
# `terminating` state and so that all the parts are grouped together.
async def diff(self, func, a, b, params, tries=2):
"""
Actually do a diff between two pieces of content, optionally retrying
if the process pool that executes the diff breaks.
"""
executor = self.get_diff_executor()
loop = asyncio.get_running_loop()
for attempt in range(tries):
try:
return await loop.run_in_executor(
executor, functools.partial(caller, func, a, b, **params))
except concurrent.futures.process.BrokenProcessPool:
if attempt + 1 < tries:
# There could be many diffs happening in parallel, so
# before trying to reset the process pool, make sure other
# parallel diffs haven't already done it. If it's already
# been reset, then we can just go and use the new one.
old_executor, executor = executor, self.get_diff_executor()
if executor == old_executor:
executor = self.get_diff_executor(reset=True)
else:
# If we shouldn't allow the server to keep rebuilding the
# differ pool for new requests, schedule a shutdown.
# (*Schuduled* so that current requests have a chance to
# complete with an error.)
if not RESTART_BROKEN_DIFFER:
logger.error('Process pool for diffing has failed too '
'many times; quitting server...')
tornado.ioloop.IOLoop.current().add_callback(
self.application.quit,
code=10)
raise
# NOTE: this doesn't do anything async, but if we change it to do so, we
# need to add a lock (either asyncio.Lock or tornado.locks.Lock).
def get_diff_executor(self, reset=False):
if self.application.terminating:
raise RuntimeError('Diff executor is being shut down.')
executor = self.settings.get('diff_executor')
if reset or not executor:
if executor:
try:
# NOTE: we don't need await this; we just want to make sure
# the old executor gets cleaned up.
shutdown_executor_in_loop(executor)
except Exception:
pass
executor = concurrent.futures.ProcessPoolExecutor(
DIFFER_PARALLELISM,
initializer=initialize_diff_worker)
self.settings['diff_executor'] = executor
return executor
def write_error(self, status_code, **kwargs):
response = {'code': status_code, 'error': self._reason}
# Handle errors that are allowed to be public
# TODO: this error filtering should probably be in `send_error()`
actual_error = 'exc_info' in kwargs and kwargs['exc_info'][1] or None
if isinstance(actual_error, (UndiffableContentError, UndecodableContentError)):
response['code'] = 422
response['error'] = str(actual_error)
if 'extra' in kwargs:
response.update(kwargs['extra'])
if isinstance(actual_error, PublicError):
response.update(actual_error.extra)
# Instances of PublicError and tornado.web.HTTPError won't get tracked
# by Sentry by default, but we do want to track unexpected, server-side
# issues. (Usually a non-HTTPError will have been raised in this case,
# but PublicError can be used for special status codes.)
if isinstance(actual_error, tornado.web.HTTPError) and response['code'] >= 500:
with sentry_sdk.push_scope() as scope:
# TODO: this breadcrumb should happen at the start of the
# request handler, but we need to test and make sure crumbs are
# properly attached to *this* HTTP request and don't bleed over
# to others, since Sentry's special support for Tornado has
# been dropped.
scope.clear_breadcrumbs()
headers = dict(self.request.headers)
if 'Authorization' in headers:
headers['Authorization'] = '[removed]'
sentry_sdk.add_breadcrumb(category='request', data={
'url': self.request.full_url(),
'method': self.request.method,
'headers': headers,
})
sentry_sdk.add_breadcrumb(category='response', data=response)
scope.level = 'info'
sentry_sdk.capture_exception(actual_error)
# Fill in full info if configured to do so
if self.settings.get('serve_traceback') and 'exc_info' in kwargs:
response['error'] = str(kwargs['exc_info'][1])
stack_lines = traceback.format_exception(*kwargs['exc_info'])
response['stack'] = ''.join(stack_lines)
if response['code'] != status_code:
self.set_status(response['code'])
self.finish(response)
def _extract_encoding(headers, content):
encoding = None
content_type = headers.get('Content-Type', '').lower()
if 'charset=' in content_type:
encoding = content_type.split('charset=')[-1]
if not encoding:
meta_tag_match = META_TAG_PATTERN.search(content, endpos=2048)
if meta_tag_match:
encoding = meta_tag_match.group(1).decode('ascii', errors='ignore')
if not encoding:
prolog_match = XML_PROLOG_PATTERN.search(content, endpos=2048)
if prolog_match:
encoding = prolog_match.group(1).decode('ascii', errors='ignore')
if encoding:
encoding = encoding.strip()
if not encoding and content:
# try to identify encoding using cchardet. Use up to 18kb of the
# content for detection. Its not necessary to use the full content
# as it could be huge. Also, if you use too little, detection is not
# accurate.
detected = cchardet.detect(content[:18432])
if detected:
detected_encoding = detected.get('encoding')
if detected_encoding:
encoding = detected_encoding.lower()
# Handle common mistakes and errors in encoding names
if encoding == 'iso-8559-1':
encoding = 'iso-8859-1'
# Windows-1252 is so commonly mislabeled, WHATWG recommends assuming it's a
# mistake: https://encoding.spec.whatwg.org/#names-and-labels
if encoding == 'iso-8859-1' and 'html' in content_type:
encoding = 'windows-1252'
# Check if the selected encoding is known. If not, fallback to default.
try:
codecs.lookup(encoding)
except (LookupError, ValueError, TypeError):
encoding = 'utf-8'
return encoding
def _decode_body(response, name, raise_if_binary=True):
encoding = _extract_encoding(response.headers, response.body)
text = response.body.decode(encoding, errors='replace')
text_length = len(text)
if text_length == 0:
return text
# Replace null terminators; some differs (especially those written in C)
# don't handle them well in the middle of a string.
text = text.replace('\u0000', '\ufffd')
# If a significantly large portion of the document was totally undecodable,
# it's likely this wasn't text at all, but binary data.
if raise_if_binary and text.count('\ufffd') / text_length > 0.25:
raise UndecodableContentError(f'The response body of `{name}` could not be decoded as {encoding}.')
return text
def caller(func, a, b, **query_params):
"""
A translation layer between HTTPResponses and differ functions.
Parameters
----------
func : callable
a 'differ' function
a : tornado.httpclient.HTTPResponse
b : tornado.httpclient.HTTPResponse
**query_params
additional parameters parsed from the REST diffing request
The function `func` may expect required and/or optional arguments. Its
signature serves as a dependency injection scheme, specifying what it
needs from the HTTPResponses. The following argument names have special
meaning:
* a_url, b_url: URL of HTTP request
* a_body, b_body: Raw HTTP reponse body (bytes)
* a_text, b_text: Decoded text of HTTP response body (str)
* a_headers, b_headers: Dict of HTTP headers
Any other argument names in the signature will take their values from the
REST query parameters.
"""
# Supplement the query_parameters from the REST call with special items
# extracted from `a` and `b`.
query_params.setdefault('a_url', a.request.url)
query_params.setdefault('b_url', b.request.url)
query_params.setdefault('a_body', a.body)
query_params.setdefault('b_body', b.body)
query_params.setdefault('a_headers', a.headers)
query_params.setdefault('b_headers', b.headers)
# The differ's signature is a dependency injection scheme.
sig = inspect.signature(func)
raise_if_binary = not query_params.get('ignore_decoding_errors', False)
if 'a_text' in sig.parameters:
query_params.setdefault(
'a_text',
_decode_body(a, 'a', raise_if_binary=raise_if_binary))
if 'b_text' in sig.parameters:
query_params.setdefault(
'b_text',
_decode_body(b, 'b', raise_if_binary=raise_if_binary))
kwargs = dict()
for name, param in sig.parameters.items():
try:
kwargs[name] = query_params[name]
except KeyError:
if param.default is inspect._empty:
# This is a required argument.
raise KeyError("{} requires a parameter {} which was not "
"provided in the query"
"".format(func.__name__, name))
return func(**kwargs)
class IndexHandler(BaseHandler):
async def get(self):
# TODO Show swagger API or Markdown instead.
info = {'diff_types': list(DIFF_ROUTES),
'version': web_monitoring_diff.__version__}
self.write(info)
class HealthCheckHandler(BaseHandler):
async def get(self):
# TODO Include more information about health here.
# The 200 repsonse code with an empty object is just a liveness check.
self.write({})
[docs]
def make_app():
"""
Create and return a Tornado application object that serves diffs.
"""
class BoundDiffHandler(DiffHandler):
differs = DIFF_ROUTES
return DiffServer([
(r"/healthcheck", HealthCheckHandler),
(r"/([A-Za-z0-9_]+)", BoundDiffHandler),
(r"/", IndexHandler),
], debug=DEBUG_MODE, compress_response=True,
diff_executor=None)
def start_app(port):
"""
Create and start the diff server on a given port.
This is a blocking call -- it starts an event loop for the server and does
not return until the server has shut down. For more control, use
:func:`create_app`.
Parameters
----------
port : int
The port to listen on.
"""
app = make_app()
print(f'Starting server on port {port}')
app.listen(port)
with Signal((signal.SIGINT, signal.SIGTERM), app.handle_signal):
tornado.ioloop.IOLoop.current().start()
[docs]
def cli():
"""
Start the diff server from the CLI. This will parse the current process's
arguments, start an event loop, and begin serving.
"""
parser = ArgumentParser(description='Start a diffing server.')
parser.add_argument('--version', action='store_true',
help='Show version information')
parser.add_argument('--port', type=int, default=8888,
help='Port to listen on')
arguments = parser.parse_args()
if arguments.version:
print(web_monitoring_diff.__version__)
return
start_app(arguments.port)
if __name__ == '__main__':
cli()