From 922600f526945a2b86cb30247db1ec89ba29cc28 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 28 Apr 2020 11:45:53 +0300 Subject: [PATCH 01/61] Tornado 6 support --- core/script/cli/ssh.py | 11 +++++++---- core/script/cli/telnet.py | 34 +++++++++++++++++++++++++++------- requirements/docker.txt | 2 +- requirements/node.txt | 2 +- tests/web/base.py | 2 +- 5 files changed, 37 insertions(+), 14 deletions(-) diff --git a/core/script/cli/ssh.py b/core/script/cli/ssh.py index 9ca56546a3..27de9cd3e1 100644 --- a/core/script/cli/ssh.py +++ b/core/script/cli/ssh.py @@ -19,6 +19,7 @@ import tornado.gen from ssh2.session import Session, LIBSSH2_HOSTKEY_HASH_SHA1 from ssh2.exceptions import SSH2Error from ssh2.error_codes import LIBSSH2_ERROR_EAGAIN +from typing import Optional, Union # NOC modules from noc.config import config @@ -112,10 +113,10 @@ class SSHIOStream(IOStream): self.logger.info("SSH Error: %s", e) raise CLISSHProtocolError("SSH Error: %s" % e) - def read_from_fd(self): + def read_from_fd(self, buf: Union[bytearray, memoryview]) -> Optional[int]: try: metrics["ssh_reads"] += 1 - code, data = self.channel.read(self.read_chunk_size) + code, data = self.channel.read(len(buf)) if code == 0: if self.channel.eof(): self.logger.info("SSH session reset") @@ -123,8 +124,10 @@ class SSHIOStream(IOStream): metrics["ssh_reads_blocked"] += 1 return None elif code > 0: - metrics["ssh_read_bytes"] += len(data) - return data + n = len(data) + metrics["ssh_read_bytes"] += n + buf[:n] = data + return n elif code == LIBSSH2_ERROR_EAGAIN: metrics["ssh_reads_blocked"] += 1 return None # Blocking call diff --git a/core/script/cli/telnet.py b/core/script/cli/telnet.py index 8cb9a5a360..4b4cf1cc35 100644 --- a/core/script/cli/telnet.py +++ b/core/script/cli/telnet.py @@ -12,7 +12,7 @@ import codecs # Third-party modules from tornado.iostream import IOStream import tornado.gen -from typing import List, Optional +from typing import List, Optional, Union # NOC modules from noc.core.perf import metrics @@ -173,6 +173,14 @@ class TelnetParser(object): self.out_iac_seq = [] return b"".join(r) + def refeed(self, chunk: bytes) -> None: + """ + Return unprocessed chunk to start of buffer + :param chunk: + :return: + """ + self.out_iac_seq.insert(0, chunk) + def send_iac(self, cmd: int, opt: int) -> None: """ Send IAC response @@ -257,14 +265,26 @@ class TelnetIOStream(IOStream): self.logger.debug("Sending %r on connect", self.cli.profile.telnet_send_on_connect) yield self.write(self.cli.profile.telnet_send_on_connect) - def read_from_fd(self): + def read_from_fd(self, buf: Union[bytearray, memoryview]) -> Optional[int]: metrics["telnet_reads"] += 1 - chunk = super(TelnetIOStream, self).read_from_fd() - if chunk: - metrics["telnet_read_bytes"] += len(chunk) - elif chunk is None: + buf_len = len(buf) + n = super(TelnetIOStream, self).read_from_fd(buf) + if n: + metrics["telnet_read_bytes"] += n + parsed = self.parser.feed(buf) + n = len(parsed) + if n > buf_len: + buf[:buf_len] = parsed[:buf_len] + self.parser.refeed(parsed[buf_len:]) + # WARNING: May hang forever, if it is the last reply from the box + # and no new packets to be received for this interaction + return buf_len + else: + buf[:n] = parsed + return n + else: metrics["telnet_reads_blocked"] += 1 - return self.parser.feed(chunk) + return n def write(self, data, callback=None): data = self.parser.escape(data) diff --git a/requirements/docker.txt b/requirements/docker.txt index 6a5987679c..760d646b2d 100644 --- a/requirements/docker.txt +++ b/requirements/docker.txt @@ -42,7 +42,7 @@ argparse>=1.4.0 demjson==2.2.4 crontab==0.22.0 fs==2.4.11 -tornado==4.5.3 +tornado==6.0.4 tornadis==0.8.1 typing==3.7.4.1 hiredis==0.2.0 diff --git a/requirements/node.txt b/requirements/node.txt index 7d0dc51410..3a87abdb3b 100644 --- a/requirements/node.txt +++ b/requirements/node.txt @@ -24,7 +24,7 @@ mistune==0.5 mongoengine==0.19.1 networkx==2.4 numpy==1.18.3 -tornado==4.5.3 +tornado==6.0.4 typing==3.7.4.1 requests==2.20.0 supervisor==4.1.0 diff --git a/tests/web/base.py b/tests/web/base.py index 42d2e6e948..807995bbf6 100644 --- a/tests/web/base.py +++ b/tests/web/base.py @@ -31,7 +31,7 @@ class APIHandler(object): APIHandler.io_loop.make_current() sock, port = tornado.testing.bind_unused_port() app = tornado.web.Application(handlers) - self.server = tornado.httpserver.HTTPServer(app, io_loop=self.io_loop) + self.server = tornado.httpserver.HTTPServer(app) self.server.add_socket(sock) self.base_url = "http://127.0.0.1:%d" % port self.server.start() -- GitLab From 5d9814335d3d692b3fe39b8dffdae2cd4d62ee3e Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 28 Apr 2020 16:27:55 +0300 Subject: [PATCH 02/61] Use pynsq 0.9.0b1 --- requirements/docker.txt | 2 +- requirements/node.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/docker.txt b/requirements/docker.txt index 760d646b2d..d040f2d0e9 100644 --- a/requirements/docker.txt +++ b/requirements/docker.txt @@ -15,7 +15,7 @@ blinker==1.3 futures>=3.0.5 Jinja2==2.8 jsonschema==2.4.0 -pynsq==0.8.3 +git+https://github.com/ploxiln/pynsq.git@a9f9a75eaec62904f06d497397d1b019cdb9581a pyproj==1.9.4 python-creole==1.3.2 pytz>=2014.4 diff --git a/requirements/node.txt b/requirements/node.txt index 3a87abdb3b..ed5da07b8f 100644 --- a/requirements/node.txt +++ b/requirements/node.txt @@ -12,7 +12,7 @@ blinker==1.3 futures>=3.0.5 Jinja2==2.8 jsonschema==2.4.0 -pynsq==0.8.3 +git+https://github.com/ploxiln/pynsq.git@a9f9a75eaec62904f06d497397d1b019cdb9581a pyproj==1.9.4 python-creole==1.3.2 pytz>=2014.4 -- GitLab From 158526e6a23c9b4715786b5e7e9f70aba72b7fb7 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Wed, 29 Apr 2020 13:01:03 +0300 Subject: [PATCH 03/61] Fix run_sync --- core/ioloop/util.py | 27 +++++++++++---------------- settings.py | 2 ++ 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index 63b3b9624e..67f9cc9ee2 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -7,6 +7,7 @@ # Python modules import sys +import asyncio # Third-party modules from typing import Callable, TypeVar, List, Tuple, Any @@ -29,10 +30,9 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: :return: Callable result """ - @tornado.gen.coroutine - def wrapper(): + async def wrapper(): try: - r = yield cb() + r = await cb() result.append(r) except Exception: error.append(sys.exc_info()) @@ -40,19 +40,14 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: result: List[T] = [] error: List[Tuple[Any, Any, Any]] = [] - # Get current instance or None - prev_io_loop = IOLoop.current(instance=False) - # Instantiate new IOLoop - ioloop = IOLoop() - ioloop.make_current() - try: - ioloop.run_sync(wrapper) - finally: - ioloop.close(all_fds=close_all) - if prev_io_loop: - prev_io_loop.make_current() - else: - IOLoop.clear_current() + prev_loop = asyncio._get_running_loop() + new_loop = asyncio.new_event_loop() + if prev_loop: + # Reset running loop + asyncio._set_running_loop(None) + new_loop.run_until_complete(wrapper()) + asyncio._set_running_loop(prev_loop) + # @todo: close_all if error: reraise(*error[0]) return result[0] diff --git a/settings.py b/settings.py index 39eed5c61b..6d78230d75 100644 --- a/settings.py +++ b/settings.py @@ -8,10 +8,12 @@ # Python modules import logging import re +import os # NOC modules from noc.config import config +os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "1" DEBUG = False TEMPLATE_DEBUG = DEBUG ADMINS = [] -- GitLab From 5d33b7d8630e2a1e23dfe26ba971fbbd5e1c9d12 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Wed, 29 Apr 2020 13:03:33 +0300 Subject: [PATCH 04/61] Fix --- core/ioloop/util.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index 67f9cc9ee2..7f4c79110b 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -11,8 +11,6 @@ import asyncio # Third-party modules from typing import Callable, TypeVar, List, Tuple, Any -from tornado.ioloop import IOLoop -import tornado.gen # NOC modules from noc.core.comp import reraise -- GitLab From 7b4acb1ac09d330b9a4760181a8892a7cf34bb03 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Thu, 30 Apr 2020 12:34:12 +0300 Subject: [PATCH 05/61] Set up asyncio policy --- commands/ping.py | 10 +++------ core/ioloop/util.py | 44 ++++++++++++++++++++++++++++++++++++++++ core/script/cli/base.py | 4 ++-- core/script/mml/base.py | 8 +++----- core/script/rtsp/base.py | 8 +++----- core/service/base.py | 7 ++----- core/service/loader.py | 2 ++ core/service/stub.py | 6 +++--- 8 files changed, 62 insertions(+), 27 deletions(-) diff --git a/commands/ping.py b/commands/ping.py index c5315e2a7b..3f516dfc2f 100755 --- a/commands/ping.py +++ b/commands/ping.py @@ -1,7 +1,7 @@ # ---------------------------------------------------------------------- # Pretty command # ---------------------------------------------------------------------- -# Copyright (C) 2007-2019 The NOC Project +# Copyright (C) 2007-2020 The NOC Project # See LICENSE for details # ---------------------------------------------------------------------- @@ -17,7 +17,7 @@ import tornado.queues from noc.core.management.base import BaseCommand from noc.core.validators import is_ipv4 from noc.core.ioloop.ping import Ping -from noc.config import config +from noc.core.ioloop.util import setup_asyncio class Command(BaseCommand): @@ -46,11 +46,7 @@ class Command(BaseCommand): except OSError as e: self.die("Cannot read file %s: %s\n" % (fn, e)) # Ping - if config.features.use_uvlib: - from tornaduv import UVLoop - - self.stderr.write("Using libuv\n") - tornado.ioloop.IOLoop.configure(UVLoop) + setup_asyncio() self.ping = Ping() self.jobs = jobs self.queue = tornado.queues.Queue(self.jobs) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index 7f4c79110b..95a05ff065 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -8,13 +8,16 @@ # Python modules import sys import asyncio +import logging # Third-party modules from typing import Callable, TypeVar, List, Tuple, Any # NOC modules +from noc.config import config from noc.core.comp import reraise +logger = logging.getLogger(__name__) T = TypeVar("T") @@ -45,7 +48,48 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: asyncio._set_running_loop(None) new_loop.run_until_complete(wrapper()) asyncio._set_running_loop(prev_loop) + if prev_loop: + asyncio._set_running_loop(prev_loop) + else: + asyncio._set_running_loop(None) + asyncio.get_event_loop_policy()._local._set_called = False # @todo: close_all if error: reraise(*error[0]) return result[0] + + +_setup_completed = False + + +def setup_asyncio() -> None: + """ + Initial setup of asyncio + + :return: + """ + global _setup_completed + + if _setup_completed: + return + logger.info("Setting up asyncio event loop policy") + if config.features.use_uvlib: + try: + import uvloop + + logger.info("Setting up libuv event loop") + uvloop.install() + except ImportError: + logger.info("libuv is not installed, using default event loop") + asyncio.set_event_loop_policy(NOCEventLoopPolicy()) + _setup_completed = True + + +class NOCEventLoopPolicy(asyncio.DefaultEventLoopPolicy): + def get_event_loop(self) -> asyncio.AbstractEventLoop: + try: + return super().get_event_loop() + except RuntimeError: + loop = self.new_event_loop() + self.set_event_loop(loop) + return loop diff --git a/core/script/cli/base.py b/core/script/cli/base.py index f01547ae50..b1cd7fe4c6 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -145,7 +145,7 @@ class CLI(object): # Cannot call call_later directly due to # thread-safety problems # See tornado issue #1773 - IOLoop.instance().add_callback(self._set_close_timeout, session_timeout) + IOLoop.current().add_callback(self._set_close_timeout, session_timeout) def _set_close_timeout(self, session_timeout): """ @@ -154,7 +154,7 @@ class CLI(object): :return: """ with self.close_timeout_lock: - self.close_timeout = IOLoop.instance().call_later(session_timeout, self.maybe_close) + self.close_timeout = IOLoop.current().call_later(session_timeout, self.maybe_close) def create_iostream(self): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) diff --git a/core/script/mml/base.py b/core/script/mml/base.py index 9c9558e24b..89a4fe82de 100644 --- a/core/script/mml/base.py +++ b/core/script/mml/base.py @@ -88,7 +88,7 @@ class MMLBase(object): # Cannot call call_later directly due to # thread-safety problems # See tornado issue #1773 - tornado.ioloop.IOLoop.instance().add_callback(self._set_close_timeout, session_timeout) + tornado.ioloop.IOLoop.current().add_callback(self._set_close_timeout, session_timeout) def _set_close_timeout(self, session_timeout): """ @@ -96,9 +96,7 @@ class MMLBase(object): :param session_timeout: :return: """ - self.close_timeout = tornado.ioloop.IOLoop.instance().call_later( - session_timeout, self.close - ) + self.close_timeout = tornado.ioloop.IOLoop.current().call_later(session_timeout, self.close) def create_iostream(self): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -128,7 +126,7 @@ class MMLBase(object): def set_script(self, script): self.script = script if self.close_timeout: - tornado.ioloop.IOLoop.instance().remove_timeout(self.close_timeout) + tornado.ioloop.IOLoop.current().remove_timeout(self.close_timeout) self.close_timeout = None @tornado.gen.coroutine diff --git a/core/script/rtsp/base.py b/core/script/rtsp/base.py index 86a7d6b2e2..aefeef1505 100644 --- a/core/script/rtsp/base.py +++ b/core/script/rtsp/base.py @@ -94,7 +94,7 @@ class RTSPBase(object): # Cannot call call_later directly due to # thread-safety problems # See tornado issue #1773 - tornado.ioloop.IOLoop.instance().add_callback(self._set_close_timeout, session_timeout) + tornado.ioloop.IOLoop.current().add_callback(self._set_close_timeout, session_timeout) def _set_close_timeout(self, session_timeout): """ @@ -102,9 +102,7 @@ class RTSPBase(object): :param session_timeout: :return: """ - self.close_timeout = tornado.ioloop.IOLoop.instance().call_later( - session_timeout, self.close - ) + self.close_timeout = tornado.ioloop.IOLoop.current().call_later(session_timeout, self.close) def create_iostream(self): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -134,7 +132,7 @@ class RTSPBase(object): def set_script(self, script): self.script = script if self.close_timeout: - tornado.ioloop.IOLoop.instance().remove_timeout(self.close_timeout) + tornado.ioloop.IOLoop.current().remove_timeout(self.close_timeout) self.close_timeout = None def get_uri(self, port=None): diff --git a/core/service/base.py b/core/service/base.py index 3a392d1ea5..7dfb55c68c 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -45,6 +45,7 @@ from noc.core.nsq.topic import TopicQueue from noc.core.nsq.pub import mpub from noc.core.nsq.error import NSQPubError from noc.core.clickhouse.shard import ShardingFunction +from noc.core.ioloop.util import setup_asyncio from .api import API, APIRequestHandler from .doc import DocRequestHandler from .mon import MonRequestHandler @@ -321,11 +322,7 @@ class Service(object): else: self.logger.warning("Running service %s", self.name) try: - if config.features.use_uvlib: - from tornaduv import UVLoop - - self.logger.warning("Using libuv") - IOLoop.configure(UVLoop) + setup_asyncio() self.ioloop = IOLoop.current() # Initialize DCS self.dcs = get_dcs(cmd_options["dcs"]) diff --git a/core/service/loader.py b/core/service/loader.py index 31795663c7..26070ede57 100644 --- a/core/service/loader.py +++ b/core/service/loader.py @@ -18,8 +18,10 @@ def get_service(): global _service if not _service: + from noc.core.ioloop.util import setup_asyncio from .stub import ServiceStub + setup_asyncio() _service = ServiceStub() _service.start() return _service diff --git a/core/service/stub.py b/core/service/stub.py index e15726befd..c6ca6dca17 100644 --- a/core/service/stub.py +++ b/core/service/stub.py @@ -11,12 +11,12 @@ import threading from collections import defaultdict # Third-party modules -import tornado.ioloop +from tornado.ioloop import IOLoop # NOC modules from noc.core.dcs.loader import get_dcs, DEFAULT_DCS -from .rpc import RPCProxy from noc.config import config +from .rpc import RPCProxy class ServiceStub(object): @@ -36,7 +36,7 @@ class ServiceStub(object): self.is_ready.wait() def _start(self): - self.ioloop = tornado.ioloop.IOLoop.instance() + self.ioloop = IOLoop.current() # Initialize DCS self.dcs = get_dcs(DEFAULT_DCS) # Activate service -- GitLab From 98a7ad4783719bb7c84e3671e50fbfe35205da67 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Thu, 30 Apr 2020 16:09:04 +0300 Subject: [PATCH 06/61] Fix UDP timeout handling --- core/ioloop/ping.py | 2 +- core/ioloop/udp.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/ioloop/ping.py b/core/ioloop/ping.py index 23d4cbcdd5..8be6d1dfab 100644 --- a/core/ioloop/ping.py +++ b/core/ioloop/ping.py @@ -154,7 +154,7 @@ class PingSocket(object): """ Check future is not timed out """ - if future.running(): + if not future.done(): if future.sid in self.sessions: logger.debug("[%s] Timed out", future.sid[0]) del self.sessions[future.sid] diff --git a/core/ioloop/udp.py b/core/ioloop/udp.py index 0be5a984e3..c18311f535 100644 --- a/core/ioloop/udp.py +++ b/core/ioloop/udp.py @@ -42,7 +42,7 @@ class UDPSocket(object): if tos: self.socket.setsockopt(socket.IPPROTO_IP, socket.IP_TOS, tos) self.fd = self.socket.fileno() - self.socket.setblocking(0) + self.socket.setblocking(False) self.future = None self.timeout = None self.events = None @@ -130,7 +130,7 @@ class UDPSocket(object): self.sendto(data, address) def on_timeout(self): - if self.future and self.future.running(): + if self.future and not self.future.done(): self.timeout_task = None try: raise socket.timeout() -- GitLab From a4d97bc0b98ec2b5d8e0fddd2bc76de007cc9093 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Thu, 30 Apr 2020 16:57:29 +0300 Subject: [PATCH 07/61] Fix telnet --- core/script/cli/telnet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/script/cli/telnet.py b/core/script/cli/telnet.py index 4b4cf1cc35..e39c4f0b87 100644 --- a/core/script/cli/telnet.py +++ b/core/script/cli/telnet.py @@ -13,6 +13,7 @@ import codecs from tornado.iostream import IOStream import tornado.gen from typing import List, Optional, Union +from tornado.concurrent import Future # NOC modules from noc.core.perf import metrics @@ -286,11 +287,11 @@ class TelnetIOStream(IOStream): metrics["telnet_reads_blocked"] += 1 return n - def write(self, data, callback=None): + def write(self, data: Union[bytes, memoryview]) -> "Future[None]": data = self.parser.escape(data) metrics["telnet_writes"] += 1 metrics["telnet_write_bytes"] += len(data) - return super(TelnetIOStream, self).write(data, callback=callback) + return super().write(data) class TelnetCLI(CLI): -- GitLab From 3eb3cfd8eef505056a8f7e6a923dd25483c1e01a Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Fri, 1 May 2020 12:22:27 +0300 Subject: [PATCH 08/61] Fix loop socket closing --- core/ioloop/util.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index 95a05ff065..f091e32460 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -46,13 +46,16 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: if prev_loop: # Reset running loop asyncio._set_running_loop(None) - new_loop.run_until_complete(wrapper()) - asyncio._set_running_loop(prev_loop) - if prev_loop: + try: + new_loop.run_until_complete(wrapper()) + finally: + new_loop.close() asyncio._set_running_loop(prev_loop) - else: - asyncio._set_running_loop(None) - asyncio.get_event_loop_policy()._local._set_called = False + if prev_loop: + asyncio._set_running_loop(prev_loop) + else: + asyncio._set_running_loop(None) + asyncio.get_event_loop_policy().reset_called() # @todo: close_all if error: reraise(*error[0]) @@ -93,3 +96,10 @@ class NOCEventLoopPolicy(asyncio.DefaultEventLoopPolicy): loop = self.new_event_loop() self.set_event_loop(loop) return loop + + def reset_called(self) -> None: + """ + Reset called status + :return: + """ + self._local._set_called = False -- GitLab From 18e139ab85579f11dd72eb306424f5c0c1ee3c5a Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Fri, 1 May 2020 12:39:00 +0300 Subject: [PATCH 09/61] http client: Fix connection refused handling --- core/http/client.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/http/client.py b/core/http/client.py index 9d0e6a32c4..8532247ac5 100644 --- a/core/http/client.py +++ b/core/http/client.py @@ -178,6 +178,10 @@ def fetch( future=stream.connect(connect_address, server_hostname=u.netloc), ) except tornado.iostream.StreamClosedError: + # May be not relevant on Tornado6 anymore + metrics["httpclient_timeouts"] += 1 + return ERR_TIMEOUT, {}, "Connection refused" + except ConnectionRefusedError: metrics["httpclient_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection refused" except tornado.gen.TimeoutError: -- GitLab From 365cf04ea63727749323a291c37e9dab070e2ff4 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Fri, 1 May 2020 15:00:38 +0300 Subject: [PATCH 10/61] Fix --- core/ioloop/util.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index f091e32460..710ee7e83e 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -30,6 +30,7 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: :param close_all: Close all file descriptors :return: Callable result """ + global _setup_completed async def wrapper(): try: @@ -38,6 +39,9 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: except Exception: error.append(sys.exc_info()) + if not _setup_completed: + setup_asyncio() + result: List[T] = [] error: List[Tuple[Any, Any, Any]] = [] -- GitLab From 8d64cf89442b711339eff8f668f2971a6adcfce5 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 09:58:06 +0300 Subject: [PATCH 11/61] Fix synchronous resolver --- core/dcs/base.py | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index 26aa9ee872..d85b1c4315 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -9,8 +9,7 @@ import logging import random import signal -import sys -from threading import Lock, Event +from threading import Lock import datetime import os from urllib.parse import urlparse @@ -23,7 +22,7 @@ import tornado.locks # NOC modules from noc.config import config from noc.core.perf import metrics -from noc.core.comp import reraise +from noc.core.ioloop.util import run_sync from .error import ResolutionError @@ -157,26 +156,12 @@ class DCSBase(object): @tornado.gen.coroutine def _resolve(): - try: - r = yield self.resolve( - name, hint=hint, wait=wait, timeout=timeout, full_result=full_result - ) - result.append(r) - except tornado.gen.Return as e: - result.append(e.value) - except Exception: - error.append(sys.exc_info()) - event.set() + r = yield self.resolve( + name, hint=hint, wait=wait, timeout=timeout, full_result=full_result + ) + return r - event = Event() - result = [] - error = [] - IOLoop.current().add_callback(_resolve) - event.wait() - if error: - reraise(*error[0]) - else: - return result[0] + return run_sync(_resolve) @tornado.gen.coroutine def resolve_near( -- GitLab From 987e87cb19dd65dd6acc7fd0eaf396fe155374c2 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 10:00:07 +0300 Subject: [PATCH 12/61] async/await resolver --- core/dcs/base.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index d85b1c4315..d8fc60c833 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -154,9 +154,8 @@ class DCSBase(object): :return: """ - @tornado.gen.coroutine - def _resolve(): - r = yield self.resolve( + async def _resolve(): + r = await self.resolve( name, hint=hint, wait=wait, timeout=timeout, full_result=full_result ) return r -- GitLab From f1ad4a4f943a5e906c70e7ed23ed711622b5479a Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 10:25:36 +0300 Subject: [PATCH 13/61] Fix PeriodicCallback calls --- services/chwriter/service.py | 4 ++-- services/classifier/service.py | 2 +- services/syslogcollector/service.py | 2 +- services/trapcollector/service.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/services/chwriter/service.py b/services/chwriter/service.py index 92f5012b0b..87b84d53fb 100755 --- a/services/chwriter/service.py +++ b/services/chwriter/service.py @@ -46,10 +46,10 @@ class CHWriterService(Service): @tornado.gen.coroutine def on_activate(self): - report_callback = tornado.ioloop.PeriodicCallback(self.report, 10000, self.ioloop) + report_callback = tornado.ioloop.PeriodicCallback(self.report, 10000) report_callback.start() check_callback = tornado.ioloop.PeriodicCallback( - self.check_channels, config.chwriter.batch_delay_ms, self.ioloop + self.check_channels, config.chwriter.batch_delay_ms ) check_callback.start() yield self.subscribe( diff --git a/services/classifier/service.py b/services/classifier/service.py index 701b4f685e..982adb8edd 100755 --- a/services/classifier/service.py +++ b/services/classifier/service.py @@ -131,7 +131,7 @@ class ClassifierService(Service): self.load_link_action() self.load_handlers() yield self.subscribe("events.%s" % config.pool, "fmwriter", self.on_event) - report_callback = tornado.ioloop.PeriodicCallback(self.report, 1000, self.ioloop) + report_callback = tornado.ioloop.PeriodicCallback(self.report, 1000) report_callback.start() def load_triggers(self): diff --git a/services/syslogcollector/service.py b/services/syslogcollector/service.py index cc7dfd60e1..8d5bd82ad4 100755 --- a/services/syslogcollector/service.py +++ b/services/syslogcollector/service.py @@ -57,7 +57,7 @@ class SyslogCollectorService(Service): # Report invalid sources every 60 seconds self.logger.info("Stating invalid sources reporting task") self.report_invalid_callback = tornado.ioloop.PeriodicCallback( - self.report_invalid_sources, 60000, self.ioloop + self.report_invalid_sources, 60000 ) self.report_invalid_callback.start() # Start tracking changes diff --git a/services/trapcollector/service.py b/services/trapcollector/service.py index aaed1769cc..febc6a2e05 100755 --- a/services/trapcollector/service.py +++ b/services/trapcollector/service.py @@ -54,7 +54,7 @@ class TrapCollectorService(Service): # Report invalid sources every 60 seconds self.logger.info("Stating invalid sources reporting task") self.report_invalid_callback = tornado.ioloop.PeriodicCallback( - self.report_invalid_sources, 60000, self.ioloop + self.report_invalid_sources, 60000 ) self.report_invalid_callback.start() # Start tracking changes -- GitLab From b3af3e141ef61dd5137df07e74ab8e3efd888588 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 11:44:37 +0300 Subject: [PATCH 14/61] async/await snmp --- commands/snmp.py | 17 +++++++---------- core/ioloop/snmp.py | 30 +++++++++++++----------------- core/script/snmp/base.py | 23 ++++++++--------------- 3 files changed, 28 insertions(+), 42 deletions(-) diff --git a/commands/snmp.py b/commands/snmp.py index c472f8ea22..895d8ff971 100755 --- a/commands/snmp.py +++ b/commands/snmp.py @@ -11,7 +11,6 @@ from time import perf_counter # Third-party modules from tornado.ioloop import IOLoop -import tornado.gen import tornado.queues # NOC modules @@ -69,23 +68,21 @@ class Command(BaseCommand): self.ioloop.spawn_callback(self.poll_worker, community, oid, timeout, version) self.ioloop.run_sync(self.poll_task) - @tornado.gen.coroutine - def poll_task(self): + async def poll_task(self): for a in self.addresses: - yield self.queue.put(a) + await self.queue.put(a) for i in range(self.jobs): - yield self.queue.put(None) - yield self.queue.join() + await self.queue.put(None) + await self.queue.join() - @tornado.gen.coroutine - def poll_worker(self, community, oid, timeout, version): + async def poll_worker(self, community, oid, timeout, version): while True: - a = yield self.queue.get() + a = await self.queue.get() if a: for c in community: t0 = perf_counter() try: - r = yield snmp_get( + r = await snmp_get( address=a, oids=oid, community=c, version=version, timeout=timeout ) s = "OK" diff --git a/core/ioloop/snmp.py b/core/ioloop/snmp.py index 855223e305..9368957d3d 100644 --- a/core/ioloop/snmp.py +++ b/core/ioloop/snmp.py @@ -39,8 +39,7 @@ logger = logging.getLogger(__name__) BULK_MAX_REPETITIONS = 20 -@coroutine -def snmp_get( +async def snmp_get( address, oids, port=161, @@ -75,8 +74,8 @@ def snmp_get( sock.settimeout(timeout) # Wait for result try: - yield sock.sendto(pdu, (address, port)) - data, addr = yield sock.recvfrom(4096) + await sock.sendto(pdu, (address, port)) + data, addr = await sock.recvfrom(4096) except socket.timeout: raise SNMPError(code=TIMED_OUT, oid=oids[0]) except socket.gaierror as e: @@ -127,7 +126,7 @@ def snmp_get( oid_parts += [[vb[0] for vb in resp.varbinds[b_idx + 1 :]]] for new_oids in oid_parts: try: - new_result = yield snmp_get( + new_result = await snmp_get( address=address, oids={k: k for k in new_oids}, port=port, @@ -168,8 +167,7 @@ def snmp_get( raise SNMPError(code=resp.error_status, oid=oid) -@coroutine -def snmp_count( +async def snmp_count( address, oid, port=161, @@ -209,8 +207,8 @@ def snmp_count( pdu = getnext_pdu(community, oid, version=version) # Send request and wait for response try: - yield sock.sendto(pdu, (address, port)) - data, addr = yield sock.recvfrom(4096) + await sock.sendto(pdu, (address, port)) + data, addr = await sock.recvfrom(4096) except socket.timeout: raise SNMPError(code=TIMED_OUT, oid=oid) except socket.gaierror as e: @@ -248,8 +246,7 @@ def snmp_count( return result -@coroutine -def snmp_getnext( +async def snmp_getnext( address, oid, port=161, @@ -305,8 +302,8 @@ def snmp_getnext( pdu = getnext_pdu(community, oid, version=version) # Send request and wait for response try: - yield sock.sendto(pdu, (address, port)) - data, addr = yield sock.recvfrom(4096) + await sock.sendto(pdu, (address, port)) + data, addr = await sock.recvfrom(4096) except socket.timeout: if not max_retries: close_socket() @@ -354,8 +351,7 @@ def snmp_getnext( close_socket() -@coroutine -def snmp_set( +async def snmp_set( address, varbinds, port=161, @@ -380,8 +376,8 @@ def snmp_set( pdu = set_pdu(community=community, varbinds=varbinds, version=version) # Wait for result try: - yield sock.sendto(pdu, (address, port)) - data, addr = yield sock.recvfrom(4096) + await sock.sendto(pdu, (address, port)) + data, addr = await sock.recvfrom(4096) except socket.timeout: raise SNMPError(code=TIMED_OUT, oid=varbinds[0][0]) except socket.gaierror as e: diff --git a/core/script/snmp/base.py b/core/script/snmp/base.py index 69e4aa7550..df98c8fcc1 100644 --- a/core/script/snmp/base.py +++ b/core/script/snmp/base.py @@ -8,9 +8,6 @@ # Python modules import weakref -# Third-party modules -import tornado.gen - # NOC modules from noc.core.ioloop.snmp import snmp_get, snmp_count, snmp_getnext, snmp_set from noc.core.snmp.error import SNMPError, TIMED_OUT @@ -102,10 +99,9 @@ class SNMP(object): :returns: eigther result scalar or dict of name -> value """ - @tornado.gen.coroutine - def run(): + async def run(): try: - r = yield snmp_get( + r = await snmp_get( address=self.script.credentials["address"], oids=oids, community=str(self.script.credentials["snmp_ro"]), @@ -141,10 +137,9 @@ class SNMP(object): :returns: eigther result scalar or dict of name -> value """ - @tornado.gen.coroutine - def run(): + async def run(): try: - r = yield snmp_set( + r = await snmp_set( address=self.script.credentials["address"], varbinds=varbinds, community=str(self.script.credentials["snmp_rw"]), @@ -175,10 +170,9 @@ class SNMP(object): :param filter: Callable accepting oid and value and returning boolean """ - @tornado.gen.coroutine - def run(): + async def run(): try: - r = yield snmp_count( + r = await snmp_count( address=self.script.credentials["address"], oid=oid, community=str(self.script.credentials["snmp_ro"]), @@ -215,10 +209,9 @@ class SNMP(object): raw_varbinds=False, display_hints=None, ): - @tornado.gen.coroutine - def run(): + async def run(): try: - r = yield snmp_getnext( + r = await snmp_getnext( address=self.script.credentials["address"], oid=oid, community=str(self.script.credentials["snmp_ro"]), -- GitLab From d0af8aaa6a783d96ea419fd4265c59d4988f0e64 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 12:03:50 +0300 Subject: [PATCH 15/61] async/await ping --- commands/ping.py | 16 +++++++--------- core/ioloop/ping.py | 11 ++++------- core/ioloop/snmp.py | 3 --- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/commands/ping.py b/commands/ping.py index 3f516dfc2f..9c99d043c9 100755 --- a/commands/ping.py +++ b/commands/ping.py @@ -54,20 +54,18 @@ class Command(BaseCommand): IOLoop.current().spawn_callback(self.ping_worker) IOLoop.current().run_sync(self.ping_task) - @tornado.gen.coroutine - def ping_task(self): + async def ping_task(self): for a in self.addresses: - yield self.queue.put(a) + await self.queue.put(a) for i in range(self.jobs): - yield self.queue.put(None) - yield self.queue.join() + await self.queue.put(None) + await self.queue.join() - @tornado.gen.coroutine - def ping_worker(self): + async def ping_worker(self): while True: - a = yield self.queue.get() + a = await self.queue.get() if a: - rtt, attempts = yield self.ping.ping_check_rtt(a, count=1, timeout=1000) + rtt, attempts = await self.ping.ping_check_rtt(a, count=1, timeout=1000) if rtt: self.stdout.write("%s %.2fms\n" % (a, rtt * 1000)) else: diff --git a/core/ioloop/ping.py b/core/ioloop/ping.py index 8be6d1dfab..8b8980a9fd 100644 --- a/core/ioloop/ping.py +++ b/core/ioloop/ping.py @@ -18,7 +18,6 @@ from time import perf_counter # Third-party modules from tornado.ioloop import IOLoop -import tornado.gen from tornado.concurrent import Future from tornado.util import errno_from_exception @@ -322,8 +321,7 @@ class Ping(object): logger.error("Failed to create ping socket: %s", e) return None - @tornado.gen.coroutine - def ping_check(self, address, size=64, count=1, timeout=1000, policy=CHECK_FIRST): + async def ping_check(self, address, size=64, count=1, timeout=1000, policy=CHECK_FIRST): """ Perform ping check and return status :param address: IPv4/IPv6 address of host @@ -340,7 +338,7 @@ class Ping(object): req_id = next(self.iter_request) & 0xFFFF result = policy == self.CHECK_ALL and count > 0 for seq in range(count): - r = yield socket.ping(address, timeout, size, req_id, seq) + r = await socket.ping(address, timeout, size, req_id, seq) if r and policy == self.CHECK_FIRST: result = True break @@ -350,8 +348,7 @@ class Ping(object): logger.debug("[%s] Result: %s", address, result) return result - @tornado.gen.coroutine - def ping_check_rtt(self, address, size=64, count=1, timeout=1000, policy=CHECK_FIRST): + async def ping_check_rtt(self, address, size=64, count=1, timeout=1000, policy=CHECK_FIRST): """ Perform ping check and return round-trip time :param address: IPv4/IPv6 address of host @@ -370,7 +367,7 @@ class Ping(object): rtts = [] attempt = 0 for seq in range(count): - rtt = yield socket.ping(address, timeout, size, req_id, seq) + rtt = await socket.ping(address, timeout, size, req_id, seq) if rtt is not None: rtts += [rtt] if rtt and policy == self.CHECK_FIRST: diff --git a/core/ioloop/snmp.py b/core/ioloop/snmp.py index 9368957d3d..6af2517520 100644 --- a/core/ioloop/snmp.py +++ b/core/ioloop/snmp.py @@ -10,9 +10,6 @@ import logging import socket import errno -# Third-party modules -from tornado.gen import coroutine - # NOC modules from noc.core.snmp.version import SNMP_v2c from noc.core.snmp.get import ( -- GitLab From d4f2ca309c5395e8835e03400fbaa84c2eab4c59 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 15:33:05 +0300 Subject: [PATCH 16/61] async/await consul and http clients --- commands/ping.py | 1 - core/config/proto/consul.py | 15 +++---- core/consul.py | 8 ++-- core/dcs/consuldcs.py | 84 ++++++++++++++++--------------------- core/http/client.py | 28 ++++++------- 5 files changed, 58 insertions(+), 78 deletions(-) diff --git a/commands/ping.py b/commands/ping.py index 9c99d043c9..a048d121a0 100755 --- a/commands/ping.py +++ b/commands/ping.py @@ -10,7 +10,6 @@ import argparse # Third-party modules from tornado.ioloop import IOLoop -import tornado.gen import tornado.queues # NOC modules diff --git a/core/config/proto/consul.py b/core/config/proto/consul.py index 7b53eaf730..4435683ba7 100644 --- a/core/config/proto/consul.py +++ b/core/config/proto/consul.py @@ -5,14 +5,11 @@ # See LICENSE for details # ---------------------------------------------------------------------- -# Third-party modules -import tornado.ioloop -import tornado.gen - # NOC modules -from .base import BaseProtocol from noc.core.consul import ConsulClient from noc.core.comp import smart_text +from noc.core.ioloop.util import run_sync +from .base import BaseProtocol class ConsulProtocol(BaseProtocol): @@ -37,8 +34,7 @@ class ConsulProtocol(BaseProtocol): self.token = self.url_query.get("token") self.path = self.parsed_url.path[1:] - @tornado.gen.coroutine - def load_async(self): + async def load_async(self): consul = ConsulClient(host=self.host, port=self.port, token=self.token) # Convert to dict data = {} @@ -46,7 +42,7 @@ class ConsulProtocol(BaseProtocol): pl = len(self.path) else: pl = len(self.path) + 1 - index, kv_data = yield consul.kv.get(self.path, recurse=True, token=self.token) + index, kv_data = await consul.kv.get(self.path, recurse=True, token=self.token) if not kv_data: return for i in kv_data: @@ -67,8 +63,7 @@ class ConsulProtocol(BaseProtocol): self.config.update(data) def load(self): - ioloop = tornado.ioloop.IOLoop.current() - ioloop.run_sync(self.load_async) + run_sync(self.load_async) def dump(self, section=None): raise NotImplementedError diff --git a/core/consul.py b/core/consul.py index 6c57552b82..e1382a60aa 100644 --- a/core/consul.py +++ b/core/consul.py @@ -6,12 +6,11 @@ # ---------------------------------------------------------------------- # Third-party modules -import tornado.gen -import tornado.ioloop import tornado.httpclient import consul.base import consul.tornado +# NOC modules from noc.config import config @@ -27,11 +26,10 @@ class ConsulHTTPClient(consul.tornado.HTTPClient): Gentler version of tornado http client """ - @tornado.gen.coroutine - def _request(self, callback, request): + async def _request(self, callback, request): client = tornado.httpclient.AsyncHTTPClient(force_instance=True, max_clients=1) try: - response = yield client.fetch(request) + response = await client.fetch(request) except tornado.httpclient.HTTPError as e: if e.code == 599: raise consul.base.Timeout diff --git a/core/dcs/consuldcs.py b/core/dcs/consuldcs.py index 96e1f0289b..ff5cb8e8a3 100644 --- a/core/dcs/consuldcs.py +++ b/core/dcs/consuldcs.py @@ -37,9 +37,8 @@ class ConsulHTTPClient(consul.tornado.HTTPClient): Gentler version of tornado http client """ - @tornado.gen.coroutine - def _request(self, callback, url, method="GET", body=None): - code, headers, body = yield fetch( + async def _request(self, callback, url, method="GET", body=None): + code, headers, body = await fetch( url, method=method, body=body, @@ -74,15 +73,14 @@ class ConsulClient(consul.base.Consul): class ConsulResolver(ResolverBase): - @tornado.gen.coroutine - def start(self): + async def start(self): index = 0 self.logger.info("[%s] Starting resolver (near=%s)", self.name, self.near) while not self.to_shutdown: self.logger.debug("[%s] Requesting changes from index %d", self.name, index) try: old_index = index - index, services = yield self.dcs.consul.health.service( + index, services = await self.dcs.consul.health.service( service=self.name, index=index, token=self.dcs.consul_token, @@ -190,8 +188,7 @@ class ConsulDCS(DCSBase): elif k == "release_after": self.release_after = v - @tornado.gen.coroutine - def create_session(self): + async def create_session(self): """ Create consul session :return: @@ -200,7 +197,7 @@ class ConsulDCS(DCSBase): checks = ["serfHealth"] while True: try: - self.session = yield self.consul.session.create( + self.session = await self.consul.session.create( name=self.name, checks=checks, behavior="delete", @@ -209,7 +206,7 @@ class ConsulDCS(DCSBase): ) break except ConsulRepeatableErrors: - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) continue self.logger.info("Session id: %s", self.session) self.keep_alive_task = PeriodicCallback( @@ -217,26 +214,24 @@ class ConsulDCS(DCSBase): ) self.keep_alive_task.start() - @tornado.gen.coroutine - def destroy_session(self): + async def destroy_session(self): if self.session: self.logger.info("Destroying session %s", self.session) if self.keep_alive_task: self.keep_alive_task.stop() self.keep_alive_task = None try: - yield self.consul.session.destroy(self.session) + await self.consul.session.destroy(self.session) except ConsulRepeatableErrors: pass # Ignore consul errors self.session = None - @tornado.gen.coroutine - def register(self, name, address, port, pool=None, lock=None, tags=None): + async def register(self, name, address, port, pool=None, lock=None, tags=None): if pool: name = "%s-%s" % (name, pool) self.name = name if lock: - yield self.acquire_lock(lock) + await self.acquire_lock(lock) svc_id = self.session or str(uuid.uuid4()) tags = tags[:] if tags else [] tags += [svc_id] @@ -255,7 +250,7 @@ class ConsulDCS(DCSBase): "Registering service %s: %s:%s (id=%s)", name, address, port, svc_id ) try: - r = yield self.consul.agent.service.register( + r = await self.consul.agent.service.register( name=name, service_id=svc_id, address=address, @@ -266,7 +261,7 @@ class ConsulDCS(DCSBase): except ConsulRepeatableErrors as e: metrics["error", ("type", "cant_register_consul")] += 1 self.logger.info("Cannot register service %s: %s", name, e) - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) continue if r: self.svc_id = svc_id @@ -275,11 +270,10 @@ class ConsulDCS(DCSBase): else: return True - @tornado.gen.coroutine - def deregister(self): + async def deregister(self): if self.session: try: - yield self.destroy_session() + await self.destroy_session() except ConsulRepeatableErrors: metrics["error", ("type", "cant_destroy_consul_session_soft")] += 1 except Exception as e: @@ -287,7 +281,7 @@ class ConsulDCS(DCSBase): self.logger.error("Cannot destroy session: %s", e) if self.svc_id and config.features.service_registration: try: - yield self.consul.agent.service.deregister(self.svc_id) + await self.consul.agent.service.deregister(self.svc_id) except ConsulRepeatableErrors: metrics["error", ("type", "cant_deregister_consul_soft")] += 1 except Exception as e: @@ -295,8 +289,7 @@ class ConsulDCS(DCSBase): self.logger.error("Cannot deregister service: %s", e) self.svc_id = None - @tornado.gen.coroutine - def keep_alive(self): + async def keep_alive(self): metrics["dcs_consul_keepalives"] += 1 if self.in_keep_alive: metrics["error", ("type", "dcs_consul_overlapped_keepalives")] += 1 @@ -307,7 +300,7 @@ class ConsulDCS(DCSBase): touched = False for n in range(self.keepalive_attempts): try: - yield self.consul.session.renew(self.session) + await self.consul.session.renew(self.session) self.logger.debug("Session renewed") touched = True break @@ -317,7 +310,7 @@ class ConsulDCS(DCSBase): except ConsulRepeatableErrors as e: self.logger.warning("Cannot refresh session due to ignorable error: %s", e) metrics["error", ("type", "dcs_consul_keepalive_retries")] += 1 - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) if not touched: self.logger.critical("Cannot refresh session, stopping") if self.keep_alive_task: @@ -333,16 +326,15 @@ class ConsulDCS(DCSBase): def get_lock_path(self, lock): return self.consul_prefix + "/locks/" + lock - @tornado.gen.coroutine - def acquire_lock(self, name): + async def acquire_lock(self, name): if not self.session: - yield self.create_session() + await self.create_session() key = self.get_lock_path(name) index = None while True: self.logger.info("Acquiring lock: %s", key) try: - status = yield self.consul.kv.put( + status = await self.consul.kv.put( key=key, value=self.session, acquire=self.session, token=self.consul_token ) if status: @@ -350,28 +342,27 @@ class ConsulDCS(DCSBase): else: metrics["error", ("type", "dcs_consul_failed_get_lock")] += 1 self.logger.info("Failed to acquire lock") - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) except ConsulRepeatableErrors: - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) continue # Waiting for lock release while True: try: - index, data = yield self.consul.kv.get( + index, data = await self.consul.kv.get( key=key, index=index, token=self.consul_token ) if not data: index = None # Key has been deleted - yield tornado.gen.sleep( + await tornado.gen.sleep( self.DEFAULT_CONSUL_LOCK_DELAY * (0.5 + random.random()) ) break except ConsulRepeatableErrors: - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) self.logger.info("Lock acquired") - @tornado.gen.coroutine - def acquire_slot(self, name, limit): + async def acquire_slot(self, name, limit): """ Acquire shard slot :param name: - @@ -379,7 +370,7 @@ class ConsulDCS(DCSBase): :return: (slot number, number of instances) """ if not self.session: - yield self.create_session() + await self.create_session() if self.total_slots is not None: return self.slot_number, self.total_slots prefix = "%s/slots/%s" % (self.consul_prefix, name) @@ -389,7 +380,7 @@ class ConsulDCS(DCSBase): self.logger.info("Writing contender slot info into %s", contender_path) while True: try: - status = yield self.consul.kv.put( + status = await self.consul.kv.put( key=contender_path, value=contender_info, acquire=self.session, @@ -400,9 +391,9 @@ class ConsulDCS(DCSBase): else: metrics["error", ("type", "dcs_consul_failed_get_slot")] += 1 self.logger.info("Failed to write contender slot info") - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) except ConsulRepeatableErrors: - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) index = 0 cas = 0 while True: @@ -413,9 +404,9 @@ class ConsulDCS(DCSBase): # Non-blocking for a first time # Block until change every next try try: - index, cv = yield self.consul.kv.get(key=prefix, index=index, recurse=True) + index, cv = await self.consul.kv.get(key=prefix, index=index, recurse=True) except ConsulRepeatableErrors: - yield tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) continue for e in cv: if e["Key"] == manifest_path: @@ -453,7 +444,7 @@ class ConsulDCS(DCSBase): # Update manifest self.logger.info("Attempting to acquire slot %s/%s", slot_number, total_slots) try: - r = yield self.consul.kv.put( + r = await self.consul.kv.put( key=manifest_path, value=ujson.dumps({"Limit": total_slots, "Holders": holders}, indent=2), cas=cas, @@ -468,8 +459,7 @@ class ConsulDCS(DCSBase): return slot_number, total_slots self.logger.info("Cannot acquire slot: CAS changed, retry") - @tornado.gen.coroutine - def resolve_near( + async def resolve_near( self, name, hint=None, wait=True, timeout=None, full_result=False, critical=False ): """ @@ -487,7 +477,7 @@ class ConsulDCS(DCSBase): index = 0 while True: try: - index, services = yield self.consul.health.service( + index, services = await self.consul.health.service( service=name, index=index, near="_agent", token=self.consul_token, passing=True ) except ConsulRepeatableErrors as e: diff --git a/core/http/client.py b/core/http/client.py index 8532247ac5..b40c25a170 100644 --- a/core/http/client.py +++ b/core/http/client.py @@ -30,6 +30,7 @@ from noc.core.validators import is_ipv4 from .proxy import SYSTEM_PROXIES from noc.config import config from noc.core.comp import smart_bytes, smart_text + from http_parser.parser import HttpParser logger = logging.getLogger(__name__) @@ -59,8 +60,7 @@ CE_DEFLATE = "deflate" CE_GZIP = "gzip" -@tornado.gen.coroutine -def resolve(host): +async def resolve(host): """ Resolve host and return address :param host: @@ -79,8 +79,7 @@ def resolve(host): return None -@tornado.gen.coroutine -def fetch( +async def fetch( url, method="GET", headers=None, @@ -147,7 +146,7 @@ def fetch( if is_ipv4(host): addr = host else: - addr = yield resolver(host) + addr = await resolver(host) if not addr: return ERR_TIMEOUT, {}, "Cannot resolve host: %s" % host # Detect proxy server @@ -173,7 +172,7 @@ def fetch( if proxy: logger.debug("Connecting to proxy %s:%s", connect_address[0], connect_address[1]) - yield tornado.gen.with_timeout( + await tornado.gen.with_timeout( IOLoop.current().time() + connect_timeout, future=stream.connect(connect_address, server_hostname=u.netloc), ) @@ -198,7 +197,7 @@ def fetch( smart_bytes(DEFAULT_USER_AGENT), ) try: - yield tornado.gen.with_timeout( + await tornado.gen.with_timeout( deadline, future=stream.write(smart_bytes(req)), quiet_exceptions=(tornado.iostream.StreamClosedError,), @@ -213,7 +212,7 @@ def fetch( parser = HttpParser() while not parser.is_headers_complete(): try: - data = yield tornado.gen.with_timeout( + data = await tornado.gen.with_timeout( deadline, future=stream.read_bytes(max_buffer_size, partial=True), quiet_exceptions=(tornado.iostream.StreamClosedError,), @@ -236,7 +235,7 @@ def fetch( if use_tls: logger.debug("Starting TLS negotiation") try: - stream = yield tornado.gen.with_timeout( + stream = await tornado.gen.with_timeout( deadline, future=stream.start_tls( server_side=False, @@ -304,7 +303,7 @@ def fetch( body, ) try: - yield tornado.gen.with_timeout( + await tornado.gen.with_timeout( deadline, future=stream.write(req), quiet_exceptions=(tornado.iostream.StreamClosedError,), @@ -319,7 +318,7 @@ def fetch( response_body = [] while not parser.is_message_complete(): try: - data = yield tornado.gen.with_timeout( + data = await tornado.gen.with_timeout( deadline, future=stream.read_bytes(max_buffer_size, partial=True), quiet_exceptions=(tornado.iostream.StreamClosedError,), @@ -360,7 +359,7 @@ def fetch( if not new_url: return ERR_PARSE_ERROR, {}, "No Location header" logger.debug("HTTP redirect %s %s", code, new_url) - code, parsed_headers, response_body = yield fetch( + code, parsed_headers, response_body = await fetch( new_url, method="GET", headers=headers, @@ -405,9 +404,8 @@ def fetch_sync( content_encoding=None, eof_mark=None, ): - @tornado.gen.coroutine - def _fetch(): - result = yield fetch( + async def _fetch(): + result = await fetch( url, method=method, headers=headers, -- GitLab From 6179fc87cdf8324e630f138679ed70ebdc5fa8f3 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 15:56:39 +0300 Subject: [PATCH 17/61] async/await CLI --- core/profile/base.py | 10 ++-- core/script/cli/base.py | 106 +++++++++++++++++--------------------- core/script/cli/ssh.py | 4 +- core/script/cli/telnet.py | 6 +-- 4 files changed, 53 insertions(+), 73 deletions(-) diff --git a/core/profile/base.py b/core/profile/base.py index c5cad00761..73c71597e0 100644 --- a/core/profile/base.py +++ b/core/profile/base.py @@ -11,7 +11,6 @@ import functools import warnings # Third-party modules -import tornado.gen from typing import Dict, Callable, Union, Optional # NOC modules @@ -599,14 +598,13 @@ class BaseProfile(object, metaclass=BaseProfileMetaclass): return cls.enable_cli_session @classmethod - @tornado.gen.coroutine - def send_backspaces(cls, cli, command, error_text): + async def send_backspaces(cls, cli, command, error_text): # Send backspaces to clean up previous command - yield cli.iostream.write(b"\x08" * len(command)) + await cli.iostream.write(b"\x08" * len(command)) # Send command_submit to force prompt - yield cli.iostream.write(cls.command_submit) + await cli.iostream.write(cls.command_submit) # Wait until prompt - yield cli.read_until_prompt() + await cli.read_until_prompt() def get_mml_login(self, script): """ diff --git a/core/script/cli/base.py b/core/script/cli/base.py index b1cd7fe4c6..caee932cc9 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -257,8 +257,7 @@ class CLI(object): raise self.error return self.result - @tornado.gen.coroutine - def submit(self, parser=None): + async def submit(self, parser=None): # Create iostream and connect, when necessary if not self.iostream: self.iostream = self.create_iostream() @@ -269,7 +268,7 @@ class CLI(object): self.logger.debug("Connecting %s", address) try: metrics["cli_connection", ("proto", self.name)] += 1 - yield self.iostream.connect(address) + await self.iostream.connect(address) metrics["cli_connection_success", ("proto", self.name)] += 1 except tornado.iostream.StreamClosedError: self.logger.debug("Connection refused") @@ -277,27 +276,27 @@ class CLI(object): self.error = CLIConnectionRefused("Connection refused") return None self.logger.debug("Connected") - yield self.iostream.startup() + await self.iostream.startup() # Perform all necessary login procedures metrics["cli_commands", ("proto", self.name)] += 1 if not self.is_started: - yield self.on_start() - motd = yield self.read_until_prompt() + await self.on_start() + motd = await self.read_until_prompt() self.motd = smart_text(motd, errors="ignore", encoding=self.native_encoding) self.script.set_motd(self.motd) self.is_started = True # Send command # @todo: encode to object's encoding if self.profile.batch_send_multiline or self.profile.command_submit not in self.command: - yield self.send(self.command) + await self.send(self.command) else: # Send multiline commands line-by-line for cmd in self.command.split(self.profile.command_submit): # Send line - yield self.send(cmd + self.profile.command_submit) + await self.send(cmd + self.profile.command_submit) # @todo: Await response parser = parser or self.read_until_prompt - self.result = yield parser() + self.result = await parser() self.logger.debug("Command: %s\n%s", self.command.strip(), self.result) if ( self.profile.rx_pattern_syntax_error @@ -311,7 +310,7 @@ class CLI(object): error_text = self.result if self.profile.send_on_syntax_error and self.name != "beef_cli": self.allow_empty_response = True - yield self.on_error_sequence( + await self.on_error_sequence( self.profile.send_on_syntax_error, self.command, error_text ) self.error = self.script.CLISyntaxError(error_text) @@ -329,23 +328,21 @@ class CLI(object): # Clean control sequences return self.profile.cleaned_input(s) - @tornado.gen.coroutine - def send(self, cmd: bytes) -> None: + async def send(self, cmd: bytes) -> None: # cmd = str(cmd) self.logger.debug("Send: %r", cmd) - yield self.iostream.write(cmd) + await self.iostream.write(cmd) - @tornado.gen.coroutine - def read_until_prompt(self): + async def read_until_prompt(self): connect_retries = self.CONNECT_RETRIES while True: try: metrics["cli_reads", ("proto", self.name)] += 1 f = self.iostream.read_bytes(self.BUFFER_SIZE, partial=True) if self.current_timeout: - r = yield tornado.gen.with_timeout(self.current_timeout, f) + r = await tornado.gen.with_timeout(self.current_timeout, f) else: - r = yield f + r = await f if r == self.SYNTAX_ERROR_CODE: metrics["cli_syntax_errors", ("proto", self.name)] += 1 return self.SYNTAX_ERROR_CODE @@ -362,7 +359,7 @@ class CLI(object): self.CONNECT_TIMEOUT, ) while connect_retries: - yield tornado.gen.sleep(self.CONNECT_TIMEOUT) + await tornado.gen.sleep(self.CONNECT_TIMEOUT) connect_retries -= 1 self.iostream = self.create_iostream() address = ( @@ -371,8 +368,8 @@ class CLI(object): ) self.logger.debug("Connecting %s", address) try: - yield self.iostream.connect(address) - yield self.iostream.startup() + await self.iostream.connect(address) + await self.iostream.startup() break except tornado.iostream.StreamClosedError: if not connect_retries: @@ -402,16 +399,15 @@ class CLI(object): self.buffer = self.buffer[match.end() :] if isinstance(handler, tuple): metrics["cli_state", ("state", handler[0].__name__)] += 1 - r = yield handler[0](matched, match, *handler[1:]) + r = await handler[0](matched, match, *handler[1:]) else: metrics["cli_state", ("state", handler.__name__)] += 1 - r = yield handler(matched, match) + r = await handler(matched, match) if r is not None: return r break # This state is processed - @tornado.gen.coroutine - def parse_object_stream(self, parser=None, cmd_next=None, cmd_stop=None): + async def parse_object_stream(self, parser=None, cmd_next=None, cmd_stop=None): """ :param cmd: :param command_submit: @@ -433,7 +429,7 @@ class CLI(object): stop_sent = False done = False while not done: - r = yield self.iostream.read_bytes(self.BUFFER_SIZE, partial=True) + r = await self.iostream.read_bytes(self.BUFFER_SIZE, partial=True) if self.script.to_track: self.script.push_cli_tracking(r, self.state) self.logger.debug("Received: %r", r) @@ -446,7 +442,7 @@ class CLI(object): ): error_text = self.buffer if self.profile.send_on_syntax_error: - yield self.on_error_sequence( + await self.on_error_sequence( self.profile.send_on_syntax_error, self.command, error_text ) self.error = self.script.CLISyntaxError(error_text) @@ -477,13 +473,13 @@ class CLI(object): # Stop loop at final page # After 3 repeats self.logger.debug("Stopping stream. Sending %r" % cmd_stop) - self.send(cmd_stop) + await self.send(cmd_stop) stop_sent = True else: r_key = key if cmd_next: self.logger.debug("Next screen. Sending %r" % cmd_next) - self.send(cmd_next) + await self.send(cmd_next) # Check for prompt for rx, handler in self.pattern_table.items(): offset = max(0, len(buffer) - self.MATCH_TAIL) @@ -523,8 +519,7 @@ class CLI(object): self.pattern_table[rx] = patterns[pattern_name] self.set_timeout(timeout) - @tornado.gen.coroutine - def on_start(self, data=None, match=None): + async def on_start(self, data=None, match=None): self.set_state("start") if self.profile.setup_sequence and not self.setup_complete: self.expect({"setup": self.on_setup_sequence}, self.profile.cli_timeout_setup) @@ -540,10 +535,9 @@ class CLI(object): self.profile.cli_timeout_start, ) - @tornado.gen.coroutine - def on_username(self, data, match): + async def on_username(self, data, match): self.set_state("username") - self.send( + await self.send( smart_bytes( self.script.credentials.get("user", "") or "", encoding=self.native_encoding ) @@ -559,10 +553,9 @@ class CLI(object): self.profile.cli_timeout_user, ) - @tornado.gen.coroutine - def on_password(self, data, match): + async def on_password(self, data, match): self.set_state("password") - self.send( + await self.send( smart_bytes( self.script.credentials.get("password", "") or "", encoding=self.native_encoding ) @@ -580,14 +573,13 @@ class CLI(object): self.profile.cli_timeout_password, ) - @tornado.gen.coroutine - def on_unprivileged_prompt(self, data, match): + async def on_unprivileged_prompt(self, data, match): self.set_state("unprivileged_prompt") if self.to_raise_privileges: # Start privilege raising sequence if not self.profile.command_super: self.on_failure(data, match, CLINoSuperCommand) - self.send( + await self.send( smart_bytes(self.profile.command_super, encoding=self.native_encoding) + (self.profile.command_submit or b"\n") ) @@ -611,14 +603,12 @@ class CLI(object): self.patterns["prompt"] = self.patterns["unprivileged_prompt"] return self.on_prompt(data, match) - @tornado.gen.coroutine - def on_failure(self, data, match, error_cls=None): + async def on_failure(self, data, match, error_cls=None): self.set_state("failure") error_cls = error_cls or CLIError raise error_cls(self.buffer or data or None) - @tornado.gen.coroutine - def on_prompt(self, data, match): + async def on_prompt(self, data, match): self.set_state("prompt") if not self.allow_empty_response: s_data = data.strip() @@ -631,10 +621,9 @@ class CLI(object): self.expect({"prompt": self.on_prompt, "pager": self.send_pager_reply}) return d - @tornado.gen.coroutine - def on_super_username(self, data, match): + async def on_super_username(self, data, match): self.set_state("super_username") - self.send( + await self.send( smart_bytes( self.script.credentials.get("user", "") or "", encoding=self.native_encoding ) @@ -651,10 +640,9 @@ class CLI(object): self.profile.cli_timeout_user, ) - @tornado.gen.coroutine - def on_super_password(self, data, match): + async def on_super_password(self, data, match): self.set_state("super_password") - self.send( + await self.send( smart_bytes( self.script.credentials.get("super_password", "") or "", encoding=self.native_encoding, @@ -677,20 +665,19 @@ class CLI(object): self.profile.cli_timeout_password, ) - @tornado.gen.coroutine - def on_setup_sequence(self, data, match): + async def on_setup_sequence(self, data, match): self.set_state("setup") self.logger.debug("Performing setup sequence: %s", self.profile.setup_sequence) lseq = len(self.profile.setup_sequence) for i, c in enumerate(self.profile.setup_sequence): if isinstance(c, int) or isinstance(c, float): - yield tornado.gen.sleep(c) + await tornado.gen.sleep(c) continue cmd = c % self.script.credentials - yield self.send(cmd) + await self.send(cmd) # Waiting for response and drop it if i < lseq - 1: - resp = yield tornado.gen.with_timeout( + resp = await tornado.gen.with_timeout( self.ioloop.time() + 30, future=self.iostream.read_bytes(4096, partial=True) ) if self.script.to_track: @@ -698,7 +685,7 @@ class CLI(object): self.logger.debug("Receiving: %r", resp) self.logger.debug("Setup sequence complete") self.setup_complete = True - yield self.on_start(data, match) + await self.on_start(data, match) def resolve_pattern_prompt(self, match): """ @@ -768,8 +755,7 @@ class CLI(object): self.logger.debug("Shutdown session") self.profile.shutdown_session(self.script) - @tornado.gen.coroutine - def on_error_sequence(self, seq, command, error_text): + async def on_error_sequence(self, seq, command, error_text): """ Process error sequence :param seq: @@ -779,11 +765,11 @@ class CLI(object): """ if isinstance(seq, str): self.logger.debug("Recovering from error. Sending %r", seq) - yield self.iostream.write(seq) + await self.iostream.write(seq) elif callable(seq): if tornado.gen.is_coroutine_function(seq): # Yield coroutine - yield seq(self, command, error_text) + await seq(self, command, error_text) else: seq = seq(self, command, error_text) - yield self.iostream.write(seq) + await self.iostream.write(seq) diff --git a/core/script/cli/ssh.py b/core/script/cli/ssh.py index 27de9cd3e1..aecbb2fb1d 100644 --- a/core/script/cli/ssh.py +++ b/core/script/cli/ssh.py @@ -15,7 +15,6 @@ import codecs # Third-party modules modules import cachetools from tornado.iostream import IOStream -import tornado.gen from ssh2.session import Session, LIBSSH2_HOSTKEY_HASH_SHA1 from ssh2.exceptions import SSH2Error from ssh2.error_codes import LIBSSH2_ERROR_EAGAIN @@ -68,8 +67,7 @@ class SSHIOStream(IOStream): with open(pub_path) as fpub, open(priv_path) as fpriv: return fpub.read(), fpriv.read() - @tornado.gen.coroutine - def startup(self): + async def startup(self): """ SSH session startup """ diff --git a/core/script/cli/telnet.py b/core/script/cli/telnet.py index e39c4f0b87..e6bdfeeb39 100644 --- a/core/script/cli/telnet.py +++ b/core/script/cli/telnet.py @@ -11,7 +11,6 @@ import codecs # Third-party modules from tornado.iostream import IOStream -import tornado.gen from typing import List, Optional, Union from tornado.concurrent import Future @@ -260,11 +259,10 @@ class TelnetIOStream(IOStream): logger=self.logger, writer=self.write_to_fd, naws=cli.profile.get_telnet_naws() ) - @tornado.gen.coroutine - def startup(self): + async def startup(self): if self.cli.profile.telnet_send_on_connect: self.logger.debug("Sending %r on connect", self.cli.profile.telnet_send_on_connect) - yield self.write(self.cli.profile.telnet_send_on_connect) + await self.write(self.cli.profile.telnet_send_on_connect) def read_from_fd(self, buf: Union[bytearray, memoryview]) -> Optional[int]: metrics["telnet_reads"] += 1 -- GitLab From ec1e4eec83f0e2fbd360409242a97acc1d909ff4 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 19:15:46 +0300 Subject: [PATCH 18/61] Fix telnet --- core/script/cli/telnet.py | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/core/script/cli/telnet.py b/core/script/cli/telnet.py index e6bdfeeb39..6397cbd068 100644 --- a/core/script/cli/telnet.py +++ b/core/script/cli/telnet.py @@ -173,14 +173,6 @@ class TelnetParser(object): self.out_iac_seq = [] return b"".join(r) - def refeed(self, chunk: bytes) -> None: - """ - Return unprocessed chunk to start of buffer - :param chunk: - :return: - """ - self.out_iac_seq.insert(0, chunk) - def send_iac(self, cmd: int, opt: int) -> None: """ Send IAC response @@ -266,24 +258,19 @@ class TelnetIOStream(IOStream): def read_from_fd(self, buf: Union[bytearray, memoryview]) -> Optional[int]: metrics["telnet_reads"] += 1 - buf_len = len(buf) - n = super(TelnetIOStream, self).read_from_fd(buf) - if n: - metrics["telnet_read_bytes"] += n - parsed = self.parser.feed(buf) - n = len(parsed) - if n > buf_len: - buf[:buf_len] = parsed[:buf_len] - self.parser.refeed(parsed[buf_len:]) - # WARNING: May hang forever, if it is the last reply from the box - # and no new packets to be received for this interaction - return buf_len - else: - buf[:n] = parsed - return n - else: + n = super().read_from_fd(buf) + if n == 0: + return 0 # EOF + if n is None: metrics["telnet_reads_blocked"] += 1 - return n + return None # EAGAIN + metrics["telnet_read_bytes"] += n + parsed = self.parser.feed(buf[:n]) + pn = len(parsed) + if not pn: + return None # Incomplete data, blocked until next read + buf[:pn] = parsed + return pn def write(self, data: Union[bytes, memoryview]) -> "Future[None]": data = self.parser.escape(data) -- GitLab From 4ba582ede6bde1cf644a03f7f7d1fb2f21aaa70d Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sat, 2 May 2020 19:33:50 +0300 Subject: [PATCH 19/61] Fix pager sending --- core/script/cli/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/script/cli/base.py b/core/script/cli/base.py index caee932cc9..0e05868439 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -495,7 +495,7 @@ class CLI(object): break return objects - def send_pager_reply(self, data, match): + async def send_pager_reply(self, data, match): """ Send proper pager reply """ @@ -503,7 +503,7 @@ class CLI(object): for p, c in self.patterns["more_patterns_commands"]: if p.search(pg): self.collected_data += [data] - self.send(c) + await self.send(c) return raise self.InvalidPagerPattern(pg) -- GitLab From 6f3e91f7ecf8f634685300c0c80b151a69b5f280 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 07:04:57 +0300 Subject: [PATCH 20/61] async/await DCS --- core/dcs/base.py | 31 +++++++++++-------------------- core/dcs/util.py | 7 +++---- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index d8fc60c833..f41424a46f 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -72,10 +72,8 @@ class DCSBase(object): self.logger.info("Stopping resolver for service %s", svc) r.stop() self.resolvers = {} - # self.ioloop.stop() - @tornado.gen.coroutine - def register(self, name, address, port, pool=None, lock=None, tags=None): + async def register(self, name, address, port, pool=None, lock=None, tags=None): """ Register service :param name: @@ -92,8 +90,7 @@ class DCSBase(object): self.logger.info("Shooting self with SIGTERM") os.kill(os.getpid(), signal.SIGTERM) - @tornado.gen.coroutine - def acquire_slot(self, name, limit): + async def acquire_slot(self, name, limit): """ Acquire shard slot :param name: - @@ -102,8 +99,7 @@ class DCSBase(object): """ raise NotImplementedError() - @tornado.gen.coroutine - def get_resolver(self, name, critical=False, near=False, track=True): + async def get_resolver(self, name, critical=False, near=False, track=True): if track: with self.resolvers_lock: resolver = self.resolvers.get((name, critical, near)) @@ -118,8 +114,7 @@ class DCSBase(object): IOLoop.current().add_callback(resolver.start) return resolver - @tornado.gen.coroutine - def resolve( + async def resolve( self, name, hint=None, @@ -130,12 +125,11 @@ class DCSBase(object): near=False, track=True, ): - resolver = yield self.get_resolver(name, critical=critical, near=near, track=track) - r = yield resolver.resolve(hint=hint, wait=wait, timeout=timeout, full_result=full_result) + resolver = await self.get_resolver(name, critical=critical, near=near, track=track) + r = await resolver.resolve(hint=hint, wait=wait, timeout=timeout, full_result=full_result) return r - @tornado.gen.coroutine - def expire_resolvers(self): + async def expire_resolvers(self): with self.resolvers_lock: for svc in self.resolvers: r = self.resolvers[svc] @@ -162,8 +156,7 @@ class DCSBase(object): return run_sync(_resolve) - @tornado.gen.coroutine - def resolve_near( + async def resolve_near( self, name, hint=None, wait=True, timeout=None, full_result=False, critical=False ): """ @@ -214,8 +207,7 @@ class ResolverBase(object): self.to_shutdown = True metrics["dcs_resolver_activeservices", ("name", self.name)] = 0 - @tornado.gen.coroutine - def start(self): + async def start(self): raise NotImplementedError() def set_services(self, services): @@ -247,8 +239,7 @@ class ResolverBase(object): self.ready_event.clear() metrics["dcs_resolver_activeservices", ("name", self.name)] = len(self.services) - @tornado.gen.coroutine - def resolve(self, hint=None, wait=True, timeout=None, full_result=False): + async def resolve(self, hint=None, wait=True, timeout=None, full_result=False): metrics["dcs_resolver_requests"] += 1 if wait: # Wait until service catalog populated @@ -257,7 +248,7 @@ class ResolverBase(object): else: t = self.dcs.DEFAULT_SERVICE_RESOLUTION_TIMEOUT try: - yield self.ready_event.wait(timeout=t) + await self.ready_event.wait(timeout=t) except tornado.gen.TimeoutError: metrics["errors", ("type", "dcs_resolver_timeout")] += 1 if self.critical: diff --git a/core/dcs/util.py b/core/dcs/util.py index 05ea1b1a26..8c3d73ff9c 100644 --- a/core/dcs/util.py +++ b/core/dcs/util.py @@ -28,13 +28,12 @@ def resolve( :return: """ - @tornado.gen.coroutine - def _resolve(): + async def _resolve(): url = get_dcs_url() dcs = get_dcs_class()(url) try: if near: - r = yield dcs.resolve_near( + r = await dcs.resolve_near( name, hint=hint, wait=wait, @@ -43,7 +42,7 @@ def resolve( critical=critical, ) else: - r = yield dcs.resolve( + r = await dcs.resolve( name, hint=hint, wait=wait, -- GitLab From 812b86711bdc99821d33cb0fb8cad79b15e33460 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 07:10:37 +0300 Subject: [PATCH 21/61] async/await datastream client --- core/datastream/client.py | 11 ++++------- core/dcs/util.py | 3 --- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/core/datastream/client.py b/core/datastream/client.py index 2c5b36f2ac..a811f6791f 100644 --- a/core/datastream/client.py +++ b/core/datastream/client.py @@ -9,7 +9,6 @@ import logging # Third-party modules -import tornado.gen import ujson # NOC modules @@ -39,8 +38,7 @@ class DataStreamClient(object): :return: """ - @tornado.gen.coroutine - def query(self, change_id=None, filters=None, block=False, limit=None): + async def query(self, change_id=None, filters=None, block=False, limit=None): """ Query datastream :param filters: @@ -69,7 +67,7 @@ class DataStreamClient(object): url = base_url # Get data logger.debug("Request: %s", url) - code, headers, data = yield fetch(url, resolver=self.resolve, headers=req_headers) + code, headers, data = await fetch(url, resolver=self.resolve, headers=req_headers) logger.debug("Response: %s %s", code, headers) if code == ERR_TIMEOUT or code == ERR_READ_TIMEOUT: continue # Retry on timeout @@ -94,10 +92,9 @@ class DataStreamClient(object): elif not block: break # Empty batch, stop if non-blocking mode - @tornado.gen.coroutine - def resolve(self, host): + async def resolve(self, host): try: - svc = yield self.service.dcs.resolve(host) + svc = await self.service.dcs.resolve(host) except ResolutionError: return None host, port = svc.split(":") diff --git a/core/dcs/util.py b/core/dcs/util.py index 8c3d73ff9c..110ab17ddd 100644 --- a/core/dcs/util.py +++ b/core/dcs/util.py @@ -5,9 +5,6 @@ # See LICENSE for details # ---------------------------------------------------------------------- -# Third-party modules -import tornado.gen - # NOC modules from noc.core.ioloop.util import run_sync from .loader import get_dcs_url, get_dcs_class -- GitLab From 44e2cf1a7c8ac7c89f290e7b92d466276e64680e Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 07:16:12 +0300 Subject: [PATCH 22/61] async/await RPC --- core/service/rpc.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/core/service/rpc.py b/core/service/rpc.py index 1baa64c0d4..090223240c 100644 --- a/core/service/rpc.py +++ b/core/service/rpc.py @@ -53,10 +53,8 @@ class RPCProxy(object): self._sync = sync def __getattr__(self, item): - @tornado.gen.coroutine - def _call(method, *args, **kwargs): - @tornado.gen.coroutine - def make_call(url, body, limit=3): + async def _call(method, *args, **kwargs): + async def make_call(url, body, limit=3): req_headers = { "X-NOC-Calling-Service": self._service.name, "Content-Type": "text/json", @@ -72,7 +70,7 @@ class RPCProxy(object): if sample: req_headers["X-NOC-Span-Ctx"] = span.span_context req_headers["X-NOC-Span"] = span.span_id - code, headers, data = yield fetch( + code, headers, data = await fetch( url, method="POST", headers=req_headers, @@ -89,7 +87,7 @@ class RPCProxy(object): raise RPCException("Redirects limit exceeded") url = headers.get("location") self._logger.debug("Redirecting to %s", url) - r = yield make_call(url, data, limit - 1) + r = await make_call(url, data, limit - 1) return r elif code in (598, 599): span.set_error(code) @@ -122,12 +120,12 @@ class RPCProxy(object): if self._hints: svc = random.choice(self._hints) else: - svc = yield self._service.dcs.resolve(self._service_name) - response = yield make_call("http://%s/api/%s/" % (svc, self._api), body) + svc = await self._service.dcs.resolve(self._service_name) + response = await make_call("http://%s/api/%s/" % (svc, self._api), body) if response: break else: - yield tornado.gen.sleep(t) + await tornado.gen.sleep(t) t = perf_counter() - t0 self._logger.debug("[CALL<] %s.%s (%.2fms)", self._service_name, method, t * 1000) if response: @@ -150,16 +148,14 @@ class RPCProxy(object): else: raise RPCNoService("No active service %s found" % self._service_name) - @tornado.gen.coroutine - def async_wrapper(*args, **kwargs): - result = yield _call(item, *args, **kwargs) + async def async_wrapper(*args, **kwargs): + result = await _call(item, *args, **kwargs) return result def sync_wrapper(*args, **kwargs): - @tornado.gen.coroutine - def _sync_call(): + async def _sync_call(): try: - r = yield _call(item, *args, **kwargs) + r = await _call(item, *args, **kwargs) result.append(r) except tornado.gen.Return as e: result.append(e.value) -- GitLab From 8f6f15cb5e4b29be417685a7625a995c8f17a059 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 07:19:54 +0300 Subject: [PATCH 23/61] async/await mpub --- core/nsq/pub.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/core/nsq/pub.py b/core/nsq/pub.py index b9b0b336aa..083e581ea2 100644 --- a/core/nsq/pub.py +++ b/core/nsq/pub.py @@ -70,8 +70,7 @@ def mpub_encode(messages: List[Any]) -> bytes: return b"".join(iter_msg()) -@tornado.gen.coroutine -def mpub(topic, messages, dcs=None, retries=None): +async def mpub(topic, messages, dcs=None, retries=None): """ Asynchronously publish message to NSQ topic @@ -105,9 +104,9 @@ def mpub(topic, messages, dcs=None, retries=None): # Get actual nsqd service's address and port si = services[s_index] if not nsqd_http_service_param.is_static(si): - si = yield dcs.resolve(si, near=True) + si = await dcs.resolve(si, near=True) # Send message - code, _, body = yield fetch( + code, _, body = await fetch( "http://%s/mpub?topic=%s&binary=true" % (si, topic), method="POST", body=msg, @@ -120,7 +119,7 @@ def mpub(topic, messages, dcs=None, retries=None): logger.error("Failed to pub to topic '%s': %s (Code=%d)", topic, body, code) retries -= 1 if retries > 0: - yield tornado.gen.sleep(config.nsqd.pub_retry_delay) + await tornado.gen.sleep(config.nsqd.pub_retry_delay) s_index = (s_index + 1) % num_services if code != 200: logger.error("Failed to pub to topic '%s'. Giving up", topic) -- GitLab From 5bba7e3c3e28ecc32b7448a083ea7249f98694a7 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 07:23:58 +0300 Subject: [PATCH 24/61] async/await NSQ --- core/nsq/reader.py | 7 +++---- core/nsq/topic.py | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/core/nsq/reader.py b/core/nsq/reader.py index 04948f6cb5..e5629ac953 100644 --- a/core/nsq/reader.py +++ b/core/nsq/reader.py @@ -22,8 +22,7 @@ logger = logging.getLogger(__name__) class Reader(BaseReader): - @tornado.gen.coroutine - def query_lookupd(self): + async def query_lookupd(self): logger.info("query_lookupd") endpoint = self.lookupd_http_addresses[self.lookupd_query_index] self.lookupd_query_index = (self.lookupd_query_index + 1) % len(self.lookupd_http_addresses) @@ -39,10 +38,10 @@ class Reader(BaseReader): params = parse_qs(query) params["topic"] = self.topic - query = urlencode(_utf8_params(params), doseq=1) + query = urlencode(_utf8_params(params), doseq=True) lookupd_url = urlunsplit((scheme, netloc, path, query, fragment)) - code, headers, body = yield fetch( + code, headers, body = await fetch( lookupd_url, headers={"Accept": "application/vnd.nsq; version=1.0"}, connect_timeout=self.lookupd_connect_timeout, diff --git a/core/nsq/topic.py b/core/nsq/topic.py index 2adbc0733f..0c1cd575be 100644 --- a/core/nsq/topic.py +++ b/core/nsq/topic.py @@ -192,8 +192,7 @@ class TopicQueue(object): with self.lock: self.put_condition.notify_all() - @tornado.gen.coroutine - def wait(self, timeout=None, rate=None): + async def wait(self, timeout=None, rate=None): # (Optional[float], Optional[int]) -> None """ Block and wait up to `timeout` @@ -206,7 +205,7 @@ class TopicQueue(object): now = perf_counter() delta = max(self.last_get + 1.0 / rate - now, 0) if delta > 0: - yield tornado.gen.sleep(delta) + await tornado.gen.sleep(delta) # Adjust remaining timeout if timeout: # Adjust timeout @@ -219,7 +218,7 @@ class TopicQueue(object): # No messages, wait if timeout is not None: timeout = datetime.timedelta(seconds=timeout) - yield self.put_condition.wait(timeout) + await self.put_condition.wait(timeout) def apply_metrics(self, data: Dict[str, Any]) -> None: data.update( -- GitLab From 0fb69c453befa3e06130a2824f2da6b94415f6f5 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 07:27:24 +0300 Subject: [PATCH 25/61] Flake8 fix --- core/nsq/reader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/nsq/reader.py b/core/nsq/reader.py index e5629ac953..3e84a73884 100644 --- a/core/nsq/reader.py +++ b/core/nsq/reader.py @@ -10,7 +10,6 @@ import logging from urllib.parse import urlencode, urlsplit, parse_qs, urlunsplit # Third-party modules -import tornado.gen from nsq.reader import Reader as BaseReader, _utf8_params import ujson -- GitLab From 6f437810e34f6d4b37c434b43844ff6d601ca9e0 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 20:05:30 +0300 Subject: [PATCH 26/61] DCS: Fix resolver expiration task --- core/dcs/base.py | 8 +++++--- core/dcs/loader.py | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index f41424a46f..fc732b8dc2 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -43,7 +43,7 @@ class DCSBase(object): # service -> resolver instances self.resolvers = {} self.resolvers_lock = Lock() - self.resolver_expiration_task = PeriodicCallback(self.expire_resolvers, 10000) + self.resolver_expiration_task = None self.health_check_service_id = None self.status = True self.status_message = "" @@ -56,15 +56,17 @@ class DCSBase(object): Run IOLoop if not started yet :return: """ + self.resolver_expiration_task = PeriodicCallback(self.expire_resolvers, 10000) self.resolver_expiration_task.start() - # self.ioloop.start() def stop(self): """ Stop IOLoop if not stopped yet :return: """ - self.resolver_expiration_task.stop() + if self.resolver_expiration_task: + self.resolver_expiration_task.stop() + self.resolver_expiration_task = None # Stop all resolvers with self.resolvers_lock: for svc in self.resolvers: diff --git a/core/dcs/loader.py b/core/dcs/loader.py index 09c58f79f8..a90f57cc54 100644 --- a/core/dcs/loader.py +++ b/core/dcs/loader.py @@ -44,5 +44,7 @@ def get_dcs(url=None): url = get_dcs_url(url) with _lock: if url not in _instances: - _instances[url] = get_dcs_class(url)(url) + dcs = get_dcs_class(url)(url) + dcs.start() + _instances[url] = dcs return _instances[url] -- GitLab From 3f1853089f431ced3b854ea111cc1489a5850461 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 20:09:48 +0300 Subject: [PATCH 27/61] Fix deactivate --- core/service/base.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/core/service/base.py b/core/service/base.py index 7dfb55c68c..1a2a5ceb84 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -338,7 +338,7 @@ class Service(object): error_report() finally: if self.ioloop: - self.ioloop.add_callback(self.deactivate()) + self.ioloop.add_callback(self.deactivate) for cb, args, kwargs in self.close_callbacks: cb(*args, **kwargs) self.logger.warning("Service %s has been terminated", self.name) @@ -456,8 +456,7 @@ class Service(object): self.start_telemetry_callback() self.ioloop.add_callback(self.on_register) - @tornado.gen.coroutine - def deactivate(self): + async def deactivate(self): if not self.is_active: return self.is_active = False @@ -468,20 +467,20 @@ class Service(object): # Release registration if self.dcs: self.logger.info("Deregistration") - yield self.dcs.deregister() + await self.dcs.deregister() # Shutdown schedulers if self.scheduler: try: self.logger.info("Shutting down scheduler") - yield self.scheduler.shutdown() + await self.scheduler.shutdown() except tornado.gen.TimeoutError: self.logger.info("Timed out when shutting down scheduler") # Shutdown executors - yield self.shutdown_executors() + await self.shutdown_executors() # Custom deactivation - yield self.on_deactivate() + await self.on_deactivate() # Shutdown NSQ topics - yield self.shutdown_topic_queues() + await self.shutdown_topic_queues() # Continue deactivation # Finally stop ioloop self.dcs = None -- GitLab From 8c5299c3d5d3922c5abcbed281c71368ef0f51fb Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Sun, 3 May 2020 20:19:39 +0300 Subject: [PATCH 28/61] Fix resolver.is_expired() --- config.py | 1 + core/dcs/base.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/config.py b/config.py index 351006c0b4..d67799cb10 100644 --- a/config.py +++ b/config.py @@ -202,6 +202,7 @@ class Config(BaseConfig): class dcs(ConfigSection): resolution_timeout = SecondsParameter(default="5M") + resolver_expiration_timeout = SecondsParameter(default="10M") class discovery(ConfigSection): max_threads = IntParameter(default=20) diff --git a/core/dcs/base.py b/core/dcs/base.py index fc732b8dc2..3769814f87 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -13,6 +13,7 @@ from threading import Lock import datetime import os from urllib.parse import urlparse +from time import perf_counter # Third-party modules import tornado.gen @@ -204,6 +205,7 @@ class ResolverBase(object): self.near = near self.ready_event = tornado.locks.Event() self.track = track + self.last_used = perf_counter() def stop(self): self.to_shutdown = True @@ -242,6 +244,7 @@ class ResolverBase(object): metrics["dcs_resolver_activeservices", ("name", self.name)] = len(self.services) async def resolve(self, hint=None, wait=True, timeout=None, full_result=False): + self.last_used = perf_counter() metrics["dcs_resolver_requests"] += 1 if wait: # Wait until service catalog populated @@ -287,3 +290,10 @@ class ResolverBase(object): """ self.rr_index = min(self.rr_index + 1, len(self.service_ids) - 1) return self.service_ids[self.rr_index] + + def is_expired(self) -> bool: + """ + Check if resolver is no longer used and may be expired + :return: + """ + return perf_counter() - self.last_used > config.dcs.resolver_expiration_timeout -- GitLab From eb1b89cb44cde722e98037635881c17003f9b6a3 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 09:44:25 +0300 Subject: [PATCH 29/61] CLI: Safer ioloop handling --- core/ioloop/util.py | 48 ++++++++++++++++++++++++++++------------- core/script/cli/base.py | 15 +++++++------ 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index 710ee7e83e..aa0c206c18 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -21,6 +21,37 @@ logger = logging.getLogger(__name__) T = TypeVar("T") +class IOLoopContext(object): + def __init__(self): + self.prev_loop = None + self.new_loop = None + + def get_context(self): + self.prev_loop = asyncio._get_running_loop() + self.new_loop = asyncio.new_event_loop() + if self.prev_loop: + # Reset running loop + asyncio._set_running_loop(None) + return self.new_loop + + def drop_context(self): + self.new_loop.close() + self.new_loop = None + asyncio._set_running_loop(self.prev_loop) + if self.prev_loop: + asyncio._set_running_loop(self.prev_loop) + else: + asyncio._set_running_loop(None) + asyncio.get_event_loop_policy().reset_called() + self.prev_loop = None + + def __enter__(self): + return self.get_context() + + def __exit__(self, exc_type, exc_val, exc_tb): + self.drop_context() + + def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: """ Run callable on dedicated IOLoop in safe manner @@ -45,21 +76,8 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: result: List[T] = [] error: List[Tuple[Any, Any, Any]] = [] - prev_loop = asyncio._get_running_loop() - new_loop = asyncio.new_event_loop() - if prev_loop: - # Reset running loop - asyncio._set_running_loop(None) - try: - new_loop.run_until_complete(wrapper()) - finally: - new_loop.close() - asyncio._set_running_loop(prev_loop) - if prev_loop: - asyncio._set_running_loop(prev_loop) - else: - asyncio._set_running_loop(None) - asyncio.get_event_loop_policy().reset_called() + with IOLoopContext() as loop: + loop.run_until_complete(wrapper()) # @todo: close_all if error: reraise(*error[0]) diff --git a/core/script/cli/base.py b/core/script/cli/base.py index 0e05868439..e374c18b22 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -28,6 +28,7 @@ from noc.config import config from noc.core.span import Span from noc.core.perf import metrics from noc.core.comp import smart_bytes, smart_text +from noc.core.ioloop.util import IOLoopContext from .error import ( CLIError, CLIAuthFailed, @@ -73,7 +74,7 @@ class CLI(object): self.iostream = None self.motd = "" self.ioloop = None - self.prev_ioloop = None + self.loop_context: Optional[IOLoopContext] = None self.command = None self.prompt_stack = [] self.patterns = self.profile.patterns.copy() @@ -105,9 +106,9 @@ class CLI(object): self.ioloop.close(all_fds=True) self.ioloop = None # Restore previous ioloop - if self.prev_ioloop: - self.prev_ioloop.make_current() - self.prev_ioloop = None + if self.loop_context: + self.loop_context.drop_context() + self.loop_context = None else: IOLoop.clear_current() self.is_closed = True @@ -238,9 +239,9 @@ class CLI(object): self.allow_empty_response = allow_empty_response if not self.ioloop: self.logger.debug("Creating IOLoop") - self.prev_ioloop = IOLoop.current(instance=False) - self.ioloop = IOLoop() - self.ioloop.make_current() + self.loop_context = IOLoopContext() + self.loop_context.get_context() + self.ioloop = IOLoop.current() if obj_parser: parser = functools.partial( self.parse_object_stream, obj_parser, smart_bytes(cmd_next), smart_bytes(cmd_stop) -- GitLab From b7813231bf6a4fa7c35d6fb6ec298d3482e3b780 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 10:07:18 +0300 Subject: [PATCH 30/61] More asyncio ports --- core/dcs/consuldcs.py | 22 +++++++++++----------- core/nsq/pub.py | 4 ++-- core/nsq/topic.py | 3 ++- core/scheduler/job.py | 5 ++--- core/scheduler/scheduler.py | 12 +++++------- core/script/cli/base.py | 5 +++-- core/script/mml/base.py | 37 +++++++++++++++++-------------------- core/script/rtsp/base.py | 37 +++++++++++++++++-------------------- core/service/rpc.py | 7 ++----- core/span.py | 7 +------ 10 files changed, 62 insertions(+), 77 deletions(-) diff --git a/core/dcs/consuldcs.py b/core/dcs/consuldcs.py index ff5cb8e8a3..2ee7461d80 100644 --- a/core/dcs/consuldcs.py +++ b/core/dcs/consuldcs.py @@ -11,11 +11,11 @@ import time import ujson import uuid from urllib.parse import unquote +import asyncio # Third-party modules import consul.base import consul.tornado -import tornado.gen from tornado.ioloop import PeriodicCallback # NOC modules @@ -206,7 +206,7 @@ class ConsulDCS(DCSBase): ) break except ConsulRepeatableErrors: - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) continue self.logger.info("Session id: %s", self.session) self.keep_alive_task = PeriodicCallback( @@ -261,7 +261,7 @@ class ConsulDCS(DCSBase): except ConsulRepeatableErrors as e: metrics["error", ("type", "cant_register_consul")] += 1 self.logger.info("Cannot register service %s: %s", name, e) - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) continue if r: self.svc_id = svc_id @@ -310,7 +310,7 @@ class ConsulDCS(DCSBase): except ConsulRepeatableErrors as e: self.logger.warning("Cannot refresh session due to ignorable error: %s", e) metrics["error", ("type", "dcs_consul_keepalive_retries")] += 1 - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) if not touched: self.logger.critical("Cannot refresh session, stopping") if self.keep_alive_task: @@ -342,9 +342,9 @@ class ConsulDCS(DCSBase): else: metrics["error", ("type", "dcs_consul_failed_get_lock")] += 1 self.logger.info("Failed to acquire lock") - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) except ConsulRepeatableErrors: - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) continue # Waiting for lock release while True: @@ -354,12 +354,12 @@ class ConsulDCS(DCSBase): ) if not data: index = None # Key has been deleted - await tornado.gen.sleep( + await asyncio.sleep( self.DEFAULT_CONSUL_LOCK_DELAY * (0.5 + random.random()) ) break except ConsulRepeatableErrors: - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) self.logger.info("Lock acquired") async def acquire_slot(self, name, limit): @@ -391,9 +391,9 @@ class ConsulDCS(DCSBase): else: metrics["error", ("type", "dcs_consul_failed_get_slot")] += 1 self.logger.info("Failed to write contender slot info") - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) except ConsulRepeatableErrors: - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) index = 0 cas = 0 while True: @@ -406,7 +406,7 @@ class ConsulDCS(DCSBase): try: index, cv = await self.consul.kv.get(key=prefix, index=index, recurse=True) except ConsulRepeatableErrors: - await tornado.gen.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) + await asyncio.sleep(self.DEFAULT_CONSUL_RETRY_TIMEOUT) continue for e in cv: if e["Key"] == manifest_path: diff --git a/core/nsq/pub.py b/core/nsq/pub.py index 083e581ea2..30055cdf78 100644 --- a/core/nsq/pub.py +++ b/core/nsq/pub.py @@ -9,10 +9,10 @@ import logging import struct import random +import asyncio # Third-party modules import ujson -import tornado.gen from typing import List, Any # NOC modules @@ -119,7 +119,7 @@ async def mpub(topic, messages, dcs=None, retries=None): logger.error("Failed to pub to topic '%s': %s (Code=%d)", topic, body, code) retries -= 1 if retries > 0: - await tornado.gen.sleep(config.nsqd.pub_retry_delay) + await asyncio.sleep(config.nsqd.pub_retry_delay) s_index = (s_index + 1) % num_services if code != 200: logger.error("Failed to pub to topic '%s'. Giving up", topic) diff --git a/core/nsq/topic.py b/core/nsq/topic.py index 0c1cd575be..155c8bbe57 100644 --- a/core/nsq/topic.py +++ b/core/nsq/topic.py @@ -10,6 +10,7 @@ from collections import deque from threading import Lock import datetime from time import perf_counter +import asyncio # Third-party modules import ujson @@ -205,7 +206,7 @@ class TopicQueue(object): now = perf_counter() delta = max(self.last_get + 1.0 / rate - now, 0) if delta > 0: - await tornado.gen.sleep(delta) + await asyncio.sleep(delta) # Adjust remaining timeout if timeout: # Adjust timeout diff --git a/core/scheduler/job.py b/core/scheduler/job.py index 798b52d6c3..c948782ee1 100644 --- a/core/scheduler/job.py +++ b/core/scheduler/job.py @@ -121,8 +121,7 @@ class Job(object): if ctx not in self.context: self.context[ctx] = {} - @tornado.gen.coroutine - def run(self): + async def run(self): with Span( server=self.scheduler.name, service=self.attrs[self.ATTR_CLASS], @@ -162,7 +161,7 @@ class Job(object): result = self.handler(**data) if tornado.gen.is_future(result): # Wait for future - result = yield result + result = await result status = self.E_SUCCESS except RetryAfter as e: self.logger.info("Retry after %ss: %s", e.delay, e) diff --git a/core/scheduler/scheduler.py b/core/scheduler/scheduler.py index 7bb31224a2..42084868c9 100644 --- a/core/scheduler/scheduler.py +++ b/core/scheduler/scheduler.py @@ -12,22 +12,21 @@ import random import threading import time from time import perf_counter +import asyncio # Third-party modules import pymongo.errors -import tornado.gen -import tornado.ioloop from tornado.ioloop import IOLoop from concurrent.futures import Future from pymongo import DeleteOne, UpdateOne # NOC modules -from .job import Job from noc.core.mongo.connection import get_db from noc.core.handler import get_handler from noc.core.threadpool import ThreadPoolExecutor from noc.core.perf import metrics from noc.config import config +from .job import Job class Scheduler(object): @@ -205,8 +204,7 @@ class Scheduler(object): self.apply_bulk_ops() self.apply_cache_ops() - @tornado.gen.coroutine - def scheduler_loop(self): + async def scheduler_loop(self): """ Primary scheduler loop """ @@ -215,14 +213,14 @@ class Scheduler(object): n = 0 if self.get_executor().may_submit(): try: - n = yield self.executor.submit(self.scheduler_tick) + n = await self.executor.submit(self.scheduler_tick) except Exception as e: self.logger.error("Failed to execute scheduler tick: %s", e) dt = self.check_time - (perf_counter() - t0) * 1000 if dt > 0: if n: dt = min(dt, self.check_time / n) - yield tornado.gen.sleep(dt / 1000.0) + await asyncio.sleep(dt / 1000.0) self.apply_ops() def iter_pending_jobs(self, limit): diff --git a/core/script/cli/base.py b/core/script/cli/base.py index e374c18b22..44a012d2b2 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -13,6 +13,7 @@ import datetime from functools import reduce import sys from threading import Lock +import asyncio # Third-party modules import tornado.gen @@ -360,7 +361,7 @@ class CLI(object): self.CONNECT_TIMEOUT, ) while connect_retries: - await tornado.gen.sleep(self.CONNECT_TIMEOUT) + await asyncio.sleep(self.CONNECT_TIMEOUT) connect_retries -= 1 self.iostream = self.create_iostream() address = ( @@ -672,7 +673,7 @@ class CLI(object): lseq = len(self.profile.setup_sequence) for i, c in enumerate(self.profile.setup_sequence): if isinstance(c, int) or isinstance(c, float): - await tornado.gen.sleep(c) + await asyncio.sleep(c) continue cmd = c % self.script.credentials await self.send(cmd) diff --git a/core/script/mml/base.py b/core/script/mml/base.py index 89a4fe82de..a6c7949b84 100644 --- a/core/script/mml/base.py +++ b/core/script/mml/base.py @@ -9,6 +9,7 @@ import socket import datetime import re +import asyncio # Third-party modules import tornado.ioloop @@ -129,15 +130,13 @@ class MMLBase(object): tornado.ioloop.IOLoop.current().remove_timeout(self.close_timeout) self.close_timeout = None - @tornado.gen.coroutine - def send(self, cmd): + async def send(self, cmd): # @todo: Apply encoding cmd = str(cmd) self.logger.debug("Send: %r", cmd) - yield self.iostream.write(cmd) + await self.iostream.write(cmd) - @tornado.gen.coroutine - def submit(self): + async def submit(self): # Create iostream and connect, when necessary if not self.iostream: self.iostream = self.create_iostream() @@ -147,32 +146,31 @@ class MMLBase(object): ) self.logger.debug("Connecting %s", address) try: - yield self.iostream.connect(address) + await self.iostream.connect(address) except tornado.iostream.StreamClosedError: self.logger.debug("Connection refused") self.error = MMLConnectionRefused("Connection refused") return None self.logger.debug("Connected") - yield self.iostream.startup() + await self.iostream.startup() # Perform all necessary login procedures if not self.is_started: self.is_started = True - yield self.send(self.profile.get_mml_login(self.script)) - yield self.get_mml_response() + await self.send(self.profile.get_mml_login(self.script)) + await self.get_mml_response() if self.error: self.error = MMLAuthFailed(str(self.error)) return None # Send command - yield self.send(self.command) - r = yield self.get_mml_response() + await self.send(self.command) + r = await self.get_mml_response() return r - @tornado.gen.coroutine - def get_mml_response(self): + async def get_mml_response(self): result = [] header_sep = self.profile.mml_header_separator while True: - r = yield self.read_until_end() + r = await self.read_until_end() r = r.strip() # Process header if header_sep not in r: @@ -228,16 +226,15 @@ class MMLBase(object): else: return self.result - @tornado.gen.coroutine - def read_until_end(self): + async def read_until_end(self): connect_retries = self.CONNECT_RETRIES while True: try: f = self.iostream.read_bytes(self.BUFFER_SIZE, partial=True) if self.current_timeout: - r = yield tornado.gen.with_timeout(self.current_timeout, f) + r = await tornado.gen.with_timeout(self.current_timeout, f) else: - r = yield f + r = await f except tornado.iostream.StreamClosedError: # Check if remote end closes connection just # after connection established @@ -248,7 +245,7 @@ class MMLBase(object): self.CONNECT_TIMEOUT, ) while connect_retries: - yield tornado.gen.sleep(self.CONNECT_TIMEOUT) + await asyncio.sleep(self.CONNECT_TIMEOUT) connect_retries -= 1 self.iostream = self.create_iostream() address = ( @@ -257,7 +254,7 @@ class MMLBase(object): ) self.logger.debug("Connecting %s", address) try: - yield self.iostream.connect(address) + await self.iostream.connect(address) break except tornado.iostream.StreamClosedError: if not connect_retries: diff --git a/core/script/rtsp/base.py b/core/script/rtsp/base.py index aefeef1505..b185ffb62e 100644 --- a/core/script/rtsp/base.py +++ b/core/script/rtsp/base.py @@ -10,6 +10,7 @@ import socket import datetime import os from urllib.request import parse_http_list, parse_keqv_list +import asyncio # Third-party modules import tornado.ioloop @@ -144,8 +145,7 @@ class RTSPBase(object): uri = "rtsp://%s%s" % (address, self.path) return uri.encode("utf-8") - @tornado.gen.coroutine - def send(self, method=None, body=None): + async def send(self, method=None, body=None): # @todo: Apply encoding self.error = None body = body or "" @@ -169,29 +169,28 @@ class RTSPBase(object): ) self.logger.debug("Send: %r", req) - yield self.iostream.write(req) + await self.iostream.write(req) self.cseq += 1 - @tornado.gen.coroutine - def submit(self): + async def submit(self): # Create iostream and connect, when necessary if not self.iostream: self.iostream = self.create_iostream() address = (self.script.credentials.get("address"), self.default_port) self.logger.debug("Connecting %s", address) try: - yield self.iostream.connect(address) + await self.iostream.connect(address) except tornado.iostream.StreamClosedError: self.logger.debug("Connection refused") self.error = RTSPConnectionRefused("Connection refused") return None self.logger.debug("Connected") - yield self.iostream.startup() + await self.iostream.startup() # Perform all necessary login procedures if not self.is_started: self.is_started = True - yield self.send("OPTIONS") - yield self.get_rtsp_response() + await self.send("OPTIONS") + await self.get_rtsp_response() if self.error and self.error.code == 401: self.logger.info("Authentication needed") self.auth = DigestAuth( @@ -199,16 +198,15 @@ class RTSPBase(object): password=self.script.credentials.get("password"), ) # Send command - yield self.send() - r = yield self.get_rtsp_response() + await self.send() + r = await self.get_rtsp_response() return r - @tornado.gen.coroutine - def get_rtsp_response(self): + async def get_rtsp_response(self): result = [] header_sep = "\r\n\r\n" while True: - r = yield self.read_until_end() + r = await self.read_until_end() # r = r.strip() # Process header if header_sep not in r: @@ -284,16 +282,15 @@ class RTSPBase(object): else: return self.result - @tornado.gen.coroutine - def read_until_end(self): + async def read_until_end(self): connect_retries = self.CONNECT_RETRIES while True: try: f = self.iostream.read_bytes(self.BUFFER_SIZE, partial=True) if self.current_timeout: - r = yield tornado.gen.with_timeout(self.current_timeout, f) + r = await tornado.gen.with_timeout(self.current_timeout, f) else: - r = yield f + r = await f except tornado.iostream.StreamClosedError: # Check if remote end closes connection just # after connection established @@ -304,7 +301,7 @@ class RTSPBase(object): self.CONNECT_TIMEOUT, ) while connect_retries: - yield tornado.gen.sleep(self.CONNECT_TIMEOUT) + await asyncio.sleep(self.CONNECT_TIMEOUT) connect_retries -= 1 self.iostream = self.create_iostream() address = ( @@ -313,7 +310,7 @@ class RTSPBase(object): ) self.logger.debug("Connecting %s", address) try: - yield self.iostream.connect(address) + await self.iostream.connect(address) break except tornado.iostream.StreamClosedError: if not connect_retries: diff --git a/core/service/rpc.py b/core/service/rpc.py index 090223240c..c0dc53b7dc 100644 --- a/core/service/rpc.py +++ b/core/service/rpc.py @@ -12,10 +12,9 @@ import random import threading import sys from time import perf_counter +import asyncio # Third-party modules -import tornado.concurrent -import tornado.gen import ujson # NOC modules @@ -125,7 +124,7 @@ class RPCProxy(object): if response: break else: - await tornado.gen.sleep(t) + await asyncio.sleep(t) t = perf_counter() - t0 self._logger.debug("[CALL<] %s.%s (%.2fms)", self._service_name, method, t * 1000) if response: @@ -157,8 +156,6 @@ class RPCProxy(object): try: r = await _call(item, *args, **kwargs) result.append(r) - except tornado.gen.Return as e: - result.append(e.value) except Exception: error.append(sys.exc_info()) finally: diff --git a/core/span.py b/core/span.py index 041d855078..fbf8aff34e 100644 --- a/core/span.py +++ b/core/span.py @@ -17,7 +17,6 @@ import uuid from collections import namedtuple # Third-party modules -import tornado.gen from typing import Optional # NOC modules @@ -144,7 +143,7 @@ class Span(object): forensic_logger.info("[<%s]", self.forensic_id) if not self.is_sampled: return - if exc_type and not self.error_text and not self.is_ignorable_error(exc_type): + if exc_type and not self.error_text: self.error_code = ERR_UNKNOWN self.error_text = str(exc_val).strip("\t").replace("\t", " ").replace("\n", " ") lt = time.localtime(self.start) @@ -176,10 +175,6 @@ class Span(object): if self.suppress_trace: return True - @staticmethod - def is_ignorable_error(exc_type): - return exc_type == tornado.gen.Return - def set_error(self, code: Optional[int] = None, text: Optional[str] = None) -> None: """ Set error result and code for current span -- GitLab From 3e360f71ab0a9ca74f92362275f56e943251e9d3 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 10:16:38 +0300 Subject: [PATCH 31/61] More asyncio ports --- core/ioloop/whois.py | 24 +++++++++--------------- core/service/base.py | 42 +++++++++++++++++------------------------- 2 files changed, 26 insertions(+), 40 deletions(-) diff --git a/core/ioloop/whois.py b/core/ioloop/whois.py index 753a0850fb..61b4c73f20 100644 --- a/core/ioloop/whois.py +++ b/core/ioloop/whois.py @@ -10,12 +10,11 @@ import logging # Third-party modules from tornado.tcpclient import TCPClient -import tornado.gen -from tornado.ioloop import IOLoop # NOC modules from noc.core.validators import is_fqdn from noc.core.comp import smart_bytes, smart_text +from noc.core.ioloop.util import run_sync DEFAULT_WHOIS_SERVER = "whois.ripe.net" DEFAULT_WHOIS_PORT = 43 @@ -49,8 +48,7 @@ def parse_response(data): return r -@tornado.gen.coroutine -def whois_async(query, fields=None): +async def whois_async(query, fields=None): """ Perform whois request :param query: @@ -68,15 +66,15 @@ def whois_async(query, fields=None): # Perform query try: client = TCPClient() - stream = yield client.connect(server, DEFAULT_WHOIS_PORT) + stream = await client.connect(server, DEFAULT_WHOIS_PORT) except IOError as e: logger.error("Cannot resolve host '%s': %s", server, e) return try: - yield stream.write(smart_bytes(query) + b"\r\n") - data = yield stream.read_until_close() + await stream.write(smart_bytes(query) + b"\r\n") + data = await stream.read_until_close() finally: - yield stream.close() + await stream.close() data = smart_text(data) data = parse_response(data) if fields: @@ -85,11 +83,7 @@ def whois_async(query, fields=None): def whois(query, fields=None): - @tornado.gen.coroutine - def _whois(): - result = yield whois_async(query, fields) - r.append(result) + async def _whois(): + return await whois_async(query, fields) - r = [] - IOLoop().run_sync(_whois) - return r[0] + return run_sync(_whois) diff --git a/core/service/base.py b/core/service/base.py index 1a2a5ceb84..77d7d81488 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -511,10 +511,9 @@ class Service(object): tags += ["traefik.backend.maxconn.amount=%s" % limit] return tags - @tornado.gen.coroutine - def on_register(self): + async def on_register(self): addr, port = self.get_service_address() - r = yield self.dcs.register( + r = await self.dcs.register( self.name, addr, port, @@ -524,7 +523,7 @@ class Service(object): ) if r: # Finally call on_activate - yield self.on_activate() + await self.on_activate() self.logger.info("Service is active (in %.2fms)", self.uptime() * 1000) else: raise self.RegistrationError() @@ -536,17 +535,15 @@ class Service(object): """ return - @tornado.gen.coroutine - def acquire_lock(self): - yield self.dcs.acquire_lock("lock-%s" % self.name) + async def acquire_lock(self): + await self.dcs.acquire_lock("lock-%s" % self.name) - @tornado.gen.coroutine - def acquire_slot(self): + async def acquire_slot(self): if self.pooled: name = "%s-%s" % (self.name, config.pool) else: name = self.name - slot_number, total_slots = yield self.dcs.acquire_slot(name, config.global_n_instances) + slot_number, total_slots = await self.dcs.acquire_slot(name, config.global_n_instances) if total_slots <= 0: self.die("Service misconfiguration detected: Invalid total_slots") return slot_number, total_slots @@ -600,8 +597,7 @@ class Service(object): for t in config.rpc.retry_timeout.split(","): yield float(t) - @tornado.gen.coroutine - def subscribe(self, topic, channel, handler, raw=False, **kwargs): + async def subscribe(self, topic, channel, handler, raw=False, **kwargs): """ Subscribe message to channel """ @@ -695,17 +691,15 @@ class Service(object): self.ioloop.add_callback(self.nsq_publisher_guard, q) return q - @tornado.gen.coroutine - def nsq_publisher_guard(self, queue: TopicQueue) -> Generator: + async def nsq_publisher_guard(self, queue: TopicQueue) -> Generator: while not queue.to_shutdown: try: - yield self.nsq_publisher(queue) + await self.nsq_publisher(queue) except Exception as e: self.logger.error("Unhandled exception in NSQ publisher, restarting: %s", e) queue.shutdown_complete.set() - @tornado.gen.coroutine - def nsq_publisher(self, queue): + async def nsq_publisher(self, queue): """ Publisher for NSQ topic @@ -715,7 +709,7 @@ class Service(object): self.logger.info("[nsq|%s] Starting NSQ publisher", topic) while not queue.to_shutdown: # Message throttling. Wait and allow to collect more messages - yield queue.wait(timeout=10, rate=config.nsqd.topic_mpub_rate) + await queue.wait(timeout=10, rate=config.nsqd.topic_mpub_rate) # Get next batch up to `mpub_messages` messages or up to `mpub_size` size messages = list( queue.iter_get( @@ -729,7 +723,7 @@ class Service(object): continue try: self.logger.debug("[nsq|%s] Publishing %d messages", topic, len(messages)) - yield mpub(topic, messages, dcs=self.dcs) + await mpub(topic, messages, dcs=self.dcs) except NSQPubError: if queue.to_shutdown: self.logger.debug( @@ -746,19 +740,17 @@ class Service(object): del messages # Release memory self.logger.info("[nsq|%s] Stopping NSQ publisher", topic) - @tornado.gen.coroutine - def shutdown_executors(self): + async def shutdown_executors(self): if self.executors: self.logger.info("Shutting down executors") for x in self.executors: try: self.logger.info("Shutting down %s", x) - yield self.executors[x].shutdown() + await self.executors[x].shutdown() except tornado.gen.TimeoutError: self.logger.info("Timed out when shutting down %s", x) - @tornado.gen.coroutine - def shutdown_topic_queues(self): + async def shutdown_topic_queues(self): # Issue shutdown with self.topic_queue_lock: has_topics = bool(self.topic_queues) @@ -775,7 +767,7 @@ class Service(object): has_topics = bool(self.topic_queues) try: self.logger.info("Waiting shutdown of topic queue %s", topic) - yield queue.shutdown_complete.wait(datetime.timedelta(seconds=5)) + await queue.shutdown_complete.wait(datetime.timedelta(seconds=5)) except tornado.gen.TimeoutError: self.logger.info("Failed to shutdown topic queue %s: Timed out", topic) -- GitLab From 60e7b5ce258b5c8982175dcb0dbd5d96f00e426d Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 10:26:17 +0300 Subject: [PATCH 32/61] async/await service activation/deactivation --- core/service/base.py | 6 ++---- services/chwriter/service.py | 5 ++--- services/classifier/service.py | 5 ++--- services/correlator/service.py | 5 ++--- services/datastream/service.py | 3 +-- services/discovery/service.py | 5 ++--- services/escalator/service.py | 8 +++----- services/icqsender/service.py | 5 ++--- services/mailsender/service.py | 5 ++--- services/mrt/service.py | 2 +- services/ping/service.py | 5 ++--- services/sae/service.py | 2 +- services/scheduler/service.py | 3 +-- services/selfmon/service.py | 5 ++--- services/syslogcollector/service.py | 3 +-- services/tgsender/service.py | 5 ++--- services/trapcollector/service.py | 3 +-- services/web/service.py | 2 +- 18 files changed, 30 insertions(+), 47 deletions(-) diff --git a/core/service/base.py b/core/service/base.py index 77d7d81488..f3fdf26f51 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -528,8 +528,7 @@ class Service(object): else: raise self.RegistrationError() - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): """ Called when service activated """ @@ -548,8 +547,7 @@ class Service(object): self.die("Service misconfiguration detected: Invalid total_slots") return slot_number, total_slots - @tornado.gen.coroutine - def on_deactivate(self): + async def on_deactivate(self): return def open_rpc(self, name, pool=None, sync=False, hints=None): diff --git a/services/chwriter/service.py b/services/chwriter/service.py index 87b84d53fb..b9596d417b 100755 --- a/services/chwriter/service.py +++ b/services/chwriter/service.py @@ -44,15 +44,14 @@ class CHWriterService(Service): self.ch_address = config.clickhouse.rw_addresses[0] self.restore_timeout = None - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): report_callback = tornado.ioloop.PeriodicCallback(self.report, 10000) report_callback.start() check_callback = tornado.ioloop.PeriodicCallback( self.check_channels, config.chwriter.batch_delay_ms ) check_callback.start() - yield self.subscribe( + await self.subscribe( config.chwriter.topic, "chwriter", self.on_data, diff --git a/services/classifier/service.py b/services/classifier/service.py index 982adb8edd..a85d2ebce7 100755 --- a/services/classifier/service.py +++ b/services/classifier/service.py @@ -119,8 +119,7 @@ class ClassifierService(Service): self.last_ts = None self.stats = {} - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): """ Load rules from database after loading config """ @@ -130,7 +129,7 @@ class ClassifierService(Service): self.load_suppression() self.load_link_action() self.load_handlers() - yield self.subscribe("events.%s" % config.pool, "fmwriter", self.on_event) + await self.subscribe("events.%s" % config.pool, "fmwriter", self.on_event) report_callback = tornado.ioloop.PeriodicCallback(self.report, 1000) report_callback.start() diff --git a/services/correlator/service.py b/services/correlator/service.py index 8bd8cc3ced..53dbaa91fb 100755 --- a/services/correlator/service.py +++ b/services/correlator/service.py @@ -59,8 +59,7 @@ class CorrelatorService(Service): self.rca_lock = Lock() self.topology_rca_lock = Lock() - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): self.scheduler = Scheduler( self.name, pool=config.pool, @@ -71,7 +70,7 @@ class CorrelatorService(Service): max_chunk=100, ) self.scheduler.correlator = self - yield self.subscribe( + await self.subscribe( "correlator.dispose.%s" % config.pool, "dispose", self.on_dispose_event, diff --git a/services/datastream/service.py b/services/datastream/service.py index 9fcd56e2f5..7f18f3e433 100755 --- a/services/datastream/service.py +++ b/services/datastream/service.py @@ -59,8 +59,7 @@ class DataStreamService(Service): for ds in self.get_datastreams() ] - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): # Detect we have working .watch() implementation if self.has_watch(): has_watch = True diff --git a/services/discovery/service.py b/services/discovery/service.py index 533aca4890..a80612c154 100755 --- a/services/discovery/service.py +++ b/services/discovery/service.py @@ -29,9 +29,8 @@ class DiscoveryService(Service): self.slot_number = 0 self.total_slots = 0 - @tornado.gen.coroutine - def on_activate(self): - self.slot_number, self.total_slots = yield self.acquire_slot() + async def on_activate(self): + self.slot_number, self.total_slots = await self.acquire_slot() if self.total_slots > 1: self.logger.info( "Enabling distributed mode: Slot %d/%d", self.slot_number, self.total_slots diff --git a/services/escalator/service.py b/services/escalator/service.py index ac3fa69b2b..79baf0b263 100755 --- a/services/escalator/service.py +++ b/services/escalator/service.py @@ -30,16 +30,14 @@ class EscalatorService(Service): super(EscalatorService, self).__init__(*args, **kwargs) self.shards = {} - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): self.apply_shards() - @tornado.gen.coroutine - def on_deactivate(self): + async def on_deactivate(self): for s in self.shards: self.logger.info("Shutting down shard %s", s) try: - yield self.shards[s].shutdown() + await self.shards[s].shutdown() self.logger.info("Shard %s is down", s) except tornado.gen.TimeoutError: self.logger.info("Cannot shutdown shard %s cleanly: Timeout", s) diff --git a/services/icqsender/service.py b/services/icqsender/service.py index 06af538b89..93df6a7d42 100755 --- a/services/icqsender/service.py +++ b/services/icqsender/service.py @@ -28,14 +28,13 @@ API = "https://api.icq.net/bot/v1/messages/sendText?token=" class IcqSenderService(Service): name = "icqsender" - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): if not config.icqsender.token: self.logger.info("No ICQ token defined") self.url = None else: self.url = API + config.icqsender.token + "&" - yield self.subscribe(topic=self.name, channel="sender", handler=self.on_message) + await self.subscribe(topic=self.name, channel="sender", handler=self.on_message) def on_message(self, message, address, subject, body, attachments=None, **kwargs): self.logger.info( diff --git a/services/mailsender/service.py b/services/mailsender/service.py index 8b7b7fb97c..2b1a7ea3b3 100755 --- a/services/mailsender/service.py +++ b/services/mailsender/service.py @@ -32,10 +32,9 @@ class MailSenderService(Service): super(MailSenderService, self).__init__(*args, **kwargs) self.tz = None - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): self.tz = pytz.timezone(config.timezone) - yield self.subscribe(topic=self.name, channel="sender", handler=self.on_message) + await self.subscribe(topic=self.name, channel="sender", handler=self.on_message) def on_message(self, message, address, subject, body, attachments=None, **kwargs): self.logger.info( diff --git a/services/mrt/service.py b/services/mrt/service.py index 920125e6d8..86e2b1f847 100755 --- a/services/mrt/service.py +++ b/services/mrt/service.py @@ -21,7 +21,7 @@ class MRTService(Service): traefik_backend = "mrt" traefik_frontend_rule = "PathPrefix:/api/mrt" - def on_activate(self): + async def on_activate(self): self.sae = self.open_rpc("sae") def get_handlers(self): diff --git a/services/ping/service.py b/services/ping/service.py index 1a203f0025..73d0cc941e 100755 --- a/services/ping/service.py +++ b/services/ping/service.py @@ -45,10 +45,9 @@ class PingService(Service): self.slot_number = 0 self.total_slots = 0 - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): # Acquire slot - self.slot_number, self.total_slots = yield self.acquire_slot() + self.slot_number, self.total_slots = await self.acquire_slot() if self.total_slots > 1: self.logger.info( "Enabling distributed mode: Slot %d/%d", self.slot_number, self.total_slots diff --git a/services/sae/service.py b/services/sae/service.py index 686f318123..c0328ea7a9 100755 --- a/services/sae/service.py +++ b/services/sae/service.py @@ -40,7 +40,7 @@ class SAEService(Service): for p in Pool.objects.all(): self.pool_cache[str(p.id)] = p.name - def on_activate(self): + async def on_activate(self): self.load_pools() self.pg_pool = ThreadedConnectionPool(1, config.sae.db_threads, **config.pg_connection_args) self.pg_pool_ready.set() diff --git a/services/scheduler/service.py b/services/scheduler/service.py index c56795881d..0212b369aa 100755 --- a/services/scheduler/service.py +++ b/services/scheduler/service.py @@ -22,8 +22,7 @@ class SchedulerService(Service): leader_lock_name = "scheduler" use_mongo = True - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): self.scheduler = Scheduler( "scheduler", reset_running=True, max_threads=config.scheduler.max_threads, ) diff --git a/services/selfmon/service.py b/services/selfmon/service.py index b9ce8d4921..90db37d8e1 100755 --- a/services/selfmon/service.py +++ b/services/selfmon/service.py @@ -30,12 +30,11 @@ class SelfMonService(Service): self.collectors = [] self.runner_thread = None - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): self.collectors = [c(self) for c in iter_collectors() if c.is_enabled()] if not self.collectors: self.die("No collectors enabled") - yield self.acquire_lock() + await self.acquire_lock() self.reorder() self.runner_thread = Thread(target=self.runner) self.runner_thread.setDaemon(True) diff --git a/services/syslogcollector/service.py b/services/syslogcollector/service.py index 8d5bd82ad4..44a2e431b6 100755 --- a/services/syslogcollector/service.py +++ b/services/syslogcollector/service.py @@ -42,8 +42,7 @@ class SyslogCollectorService(Service): self.address_configs = {} # address -> SourceConfig self.invalid_sources = defaultdict(int) # ip -> count - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): # Listen sockets server = SyslogServer(service=self) for addr, port in server.iter_listen(config.syslogcollector.listen): diff --git a/services/tgsender/service.py b/services/tgsender/service.py index 6020d2d845..8f85a0135c 100755 --- a/services/tgsender/service.py +++ b/services/tgsender/service.py @@ -29,14 +29,13 @@ API = "https://api.telegram.org/bot" class TgSenderService(Service): name = "tgsender" - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): if not config.tgsender.token: self.logger.info("No token defined") self.url = None else: self.url = API + config.tgsender.token - yield self.subscribe(topic=self.name, channel="sender", handler=self.on_message) + await self.subscribe(topic=self.name, channel="sender", handler=self.on_message) def on_message(self, message, address, subject, body, attachments=None, **kwargs): self.logger.info( diff --git a/services/trapcollector/service.py b/services/trapcollector/service.py index febc6a2e05..3d401b3e5e 100755 --- a/services/trapcollector/service.py +++ b/services/trapcollector/service.py @@ -39,8 +39,7 @@ class TrapCollectorService(Service): self.address_configs = {} # address -> SourceConfig self.invalid_sources = defaultdict(int) # ip -> count - @tornado.gen.coroutine - def on_activate(self): + async def on_activate(self): # Listen sockets server = TrapServer(service=self) for addr, port in server.iter_listen(config.trapcollector.listen): diff --git a/services/web/service.py b/services/web/service.py index 9088b18864..7c14c49dc9 100755 --- a/services/web/service.py +++ b/services/web/service.py @@ -40,7 +40,7 @@ class WebService(Service): (r"^.*$", NOCWSGIHandler, {"service": self}) ] - def on_activate(self): + async def on_activate(self): # Initialize audit trail from noc.main.models.audittrail import AuditTrail -- GitLab From f799ef850c7e56fbe0317b42e5fe5bc1a6ac72ec Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 10:28:03 +0300 Subject: [PATCH 33/61] Flake8 fix --- services/tgsender/service.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/services/tgsender/service.py b/services/tgsender/service.py index 8f85a0135c..63d1468f7d 100755 --- a/services/tgsender/service.py +++ b/services/tgsender/service.py @@ -13,9 +13,6 @@ import json import time from urllib.parse import urlencode -# Third-party modules -import tornado.gen - # NOC modules from noc.core.service.base import Service from noc.core.http.client import fetch_sync -- GitLab From 6858d900a189b14e3cb8a1ee505d12d660f1bd0a Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 10:31:59 +0300 Subject: [PATCH 34/61] Flake8 --- services/correlator/service.py | 3 --- services/discovery/service.py | 6 +----- services/icqsender/service.py | 3 --- services/mailsender/service.py | 1 - services/scheduler/service.py | 6 +----- services/selfmon/service.py | 5 +---- 6 files changed, 3 insertions(+), 21 deletions(-) diff --git a/services/correlator/service.py b/services/correlator/service.py index 53dbaa91fb..aa26a9500a 100755 --- a/services/correlator/service.py +++ b/services/correlator/service.py @@ -13,9 +13,6 @@ import re from collections import defaultdict from threading import Lock -# Third-party modules -import tornado.gen - # NOC modules from noc.config import config from noc.core.service.base import Service diff --git a/services/discovery/service.py b/services/discovery/service.py index a80612c154..5d1d9be0c8 100755 --- a/services/discovery/service.py +++ b/services/discovery/service.py @@ -2,14 +2,10 @@ # ---------------------------------------------------------------------- # Discovery # ---------------------------------------------------------------------- -# Copyright (C) 2007-2017 The NOC Project +# Copyright (C) 2007-2020 The NOC Project # See LICENSE for details # ---------------------------------------------------------------------- -# Third-party modules -import tornado.ioloop -import tornado.gen - # NOC modules from noc.config import config from noc.core.service.base import Service diff --git a/services/icqsender/service.py b/services/icqsender/service.py index 93df6a7d42..951ad13dc7 100755 --- a/services/icqsender/service.py +++ b/services/icqsender/service.py @@ -13,9 +13,6 @@ import json import time from urllib.parse import urlencode -# Third-party modules -import tornado.gen - # NOC modules from noc.core.service.base import Service from noc.core.http.client import fetch_sync diff --git a/services/mailsender/service.py b/services/mailsender/service.py index 2b1a7ea3b3..ffc8254229 100755 --- a/services/mailsender/service.py +++ b/services/mailsender/service.py @@ -17,7 +17,6 @@ from email.header import Header # Third-party modules import pytz -import tornado.gen # NOC modules from noc.config import config diff --git a/services/scheduler/service.py b/services/scheduler/service.py index 0212b369aa..3cf6cd37bf 100755 --- a/services/scheduler/service.py +++ b/services/scheduler/service.py @@ -2,14 +2,10 @@ # ---------------------------------------------------------------------- # Scheduler # ---------------------------------------------------------------------- -# Copyright (C) 2007-2019 The NOC Project +# Copyright (C) 2007-2020 The NOC Project # See LICENSE for details # ---------------------------------------------------------------------- -# Third-party modules -import tornado.ioloop -import tornado.gen - # NOC modules from noc.config import config from noc.core.service.base import Service diff --git a/services/selfmon/service.py b/services/selfmon/service.py index 90db37d8e1..9ac85f1ddd 100755 --- a/services/selfmon/service.py +++ b/services/selfmon/service.py @@ -2,7 +2,7 @@ # ---------------------------------------------------------------------- # metrics service # ---------------------------------------------------------------------- -# Copyright (C) 2007-2018 The NOC Project +# Copyright (C) 2007-2020 The NOC Project # See LICENSE for details # ---------------------------------------------------------------------- @@ -11,9 +11,6 @@ from threading import Thread import operator import time -# Third-party modules -import tornado.gen - # NOC modules from noc.core.service.base import Service from noc.core.debug import error_report -- GitLab From a9cbc875e259e7f45e209bb40f6ebb9282c541b9 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 10:57:22 +0300 Subject: [PATCH 35/61] Use asyncio.Futures --- core/etl/extractor/oracle.py | 6 ++---- core/scheduler/job.py | 6 ++---- core/scheduler/scheduler.py | 3 +-- core/threadpool.py | 2 +- requirements/docker.txt | 1 - requirements/node.txt | 1 - 6 files changed, 6 insertions(+), 13 deletions(-) diff --git a/core/etl/extractor/oracle.py b/core/etl/extractor/oracle.py index 9ab1d07481..2b41f616fb 100644 --- a/core/etl/extractor/oracle.py +++ b/core/etl/extractor/oracle.py @@ -7,9 +7,7 @@ # Python modules import os - -# Third-party modules -from concurrent.futures import as_completed +import asyncio # NOC modules from noc.core.threadpool import ThreadPoolExecutor @@ -91,5 +89,5 @@ class ORACLEExtractor(SQLExtractor): futures = [ pool.submit(fetch_sql, query, params) for query, params in self.get_sql() ] - for f in as_completed(futures): + for f in asyncio.as_completed(futures): yield from f.result() diff --git a/core/scheduler/job.py b/core/scheduler/job.py index c948782ee1..8792f4460f 100644 --- a/core/scheduler/job.py +++ b/core/scheduler/job.py @@ -10,9 +10,7 @@ import logging import time import datetime from time import perf_counter - -# Third-party modules -import tornado.gen +import asyncio # NOC modules from noc.core.log import PrefixLoggerAdapter @@ -159,7 +157,7 @@ class Job(object): try: data = self.attrs.get(self.ATTR_DATA) or {} result = self.handler(**data) - if tornado.gen.is_future(result): + if asyncio.isfuture(result): # Wait for future result = await result status = self.E_SUCCESS diff --git a/core/scheduler/scheduler.py b/core/scheduler/scheduler.py index 42084868c9..b9efaf4226 100644 --- a/core/scheduler/scheduler.py +++ b/core/scheduler/scheduler.py @@ -17,7 +17,6 @@ import asyncio # Third-party modules import pymongo.errors from tornado.ioloop import IOLoop -from concurrent.futures import Future from pymongo import DeleteOne, UpdateOne # NOC modules @@ -523,7 +522,7 @@ class Scheduler(object): if self.executor: f = self.executor.shutdown(sync) else: - f = Future() + f = asyncio.Future() f.set_result(True) f.add_done_callback(lambda _: self.apply_bulk_ops()) return f diff --git a/core/threadpool.py b/core/threadpool.py index 2c74b818ca..fa6970c7fa 100644 --- a/core/threadpool.py +++ b/core/threadpool.py @@ -14,9 +14,9 @@ import datetime from collections import deque import _thread from time import perf_counter +from asyncio import Future # Third-party modules -from concurrent.futures import Future from tornado.gen import with_timeout from typing import Optional, Dict, Any, Set, List diff --git a/requirements/docker.txt b/requirements/docker.txt index d040f2d0e9..f7585b550f 100644 --- a/requirements/docker.txt +++ b/requirements/docker.txt @@ -12,7 +12,6 @@ geopy==0.97 geojson==2.5.0 Django==3.0.5 blinker==1.3 -futures>=3.0.5 Jinja2==2.8 jsonschema==2.4.0 git+https://github.com/ploxiln/pynsq.git@a9f9a75eaec62904f06d497397d1b019cdb9581a diff --git a/requirements/node.txt b/requirements/node.txt index ed5da07b8f..40d567166a 100644 --- a/requirements/node.txt +++ b/requirements/node.txt @@ -9,7 +9,6 @@ geojson==2.5.0 Cython>=0.24 Django==3.0.5 blinker==1.3 -futures>=3.0.5 Jinja2==2.8 jsonschema==2.4.0 git+https://github.com/ploxiln/pynsq.git@a9f9a75eaec62904f06d497397d1b019cdb9581a -- GitLab From 6eac1e5f2408c483bde9a7c8abacb12fff669731 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 11:17:47 +0300 Subject: [PATCH 36/61] asyncio timeout handling --- core/http/client.py | 50 +++++++++++++++-------------------------- core/script/cli/base.py | 6 ++--- core/threadpool.py | 11 ++++----- 3 files changed, 25 insertions(+), 42 deletions(-) diff --git a/core/http/client.py b/core/http/client.py index b40c25a170..f933568aaf 100644 --- a/core/http/client.py +++ b/core/http/client.py @@ -15,6 +15,7 @@ import time import struct import codecs from urllib.parse import urlparse +import asyncio # Third-party modules import tornado.gen @@ -172,9 +173,8 @@ async def fetch( if proxy: logger.debug("Connecting to proxy %s:%s", connect_address[0], connect_address[1]) - await tornado.gen.with_timeout( - IOLoop.current().time() + connect_timeout, - future=stream.connect(connect_address, server_hostname=u.netloc), + await asyncio.wait_for( + stream.connect(connect_address, server_hostname=u.netloc), connect_timeout ) except tornado.iostream.StreamClosedError: # May be not relevant on Tornado6 anymore @@ -183,10 +183,9 @@ async def fetch( except ConnectionRefusedError: metrics["httpclient_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection refused" - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: metrics["httpclient_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection timed out" - deadline = IOLoop.current().time() + request_timeout # Proxy CONNECT if proxy: logger.debug("Sending CONNECT %s:%s", addr, port) @@ -197,30 +196,24 @@ async def fetch( smart_bytes(DEFAULT_USER_AGENT), ) try: - await tornado.gen.with_timeout( - deadline, - future=stream.write(smart_bytes(req)), - quiet_exceptions=(tornado.iostream.StreamClosedError,), - ) + await asyncio.wait_for(stream.write(smart_bytes(req)), request_timeout) except tornado.iostream.StreamClosedError: metrics["httpclient_proxy_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection reset while connecting to proxy" - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: metrics["httpclient_proxy_timeouts"] += 1 return ERR_TIMEOUT, {}, "Timed out while sending request to proxy" # Wait for proxy response parser = HttpParser() while not parser.is_headers_complete(): try: - data = await tornado.gen.with_timeout( - deadline, - future=stream.read_bytes(max_buffer_size, partial=True), - quiet_exceptions=(tornado.iostream.StreamClosedError,), + data = await asyncio.wait_for( + stream.read_bytes(max_buffer_size, partial=True), request_timeout ) except tornado.iostream.StreamClosedError: metrics["httpclient_proxy_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection reset while connecting to proxy" - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: metrics["httpclient_proxy_timeouts"] += 1 return ERR_TIMEOUT, {}, "Timed out while sending request to proxy" received = len(data) @@ -235,19 +228,18 @@ async def fetch( if use_tls: logger.debug("Starting TLS negotiation") try: - stream = await tornado.gen.with_timeout( - deadline, - future=stream.start_tls( + stream = await asyncio.wait_for( + stream.start_tls( server_side=False, ssl_options=get_ssl_options(), server_hostname=u.netloc, ), - quiet_exceptions=(tornado.iostream.StreamClosedError,), + request_timeout, ) except tornado.iostream.StreamClosedError: metrics["httpclient_proxy_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection reset while connecting to proxy" - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: metrics["httpclient_proxy_timeouts"] += 1 return ERR_TIMEOUT, {}, "Timed out while sending request to proxy" # Process request @@ -303,25 +295,19 @@ async def fetch( body, ) try: - await tornado.gen.with_timeout( - deadline, - future=stream.write(req), - quiet_exceptions=(tornado.iostream.StreamClosedError,), - ) + await asyncio.wait_for(stream.write(req), request_timeout) except tornado.iostream.StreamClosedError: metrics["httpclient_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection reset while sending request" - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: metrics["httpclient_timeouts"] += 1 return ERR_TIMEOUT, {}, "Timed out while sending request" parser = HttpParser() response_body = [] while not parser.is_message_complete(): try: - data = await tornado.gen.with_timeout( - deadline, - future=stream.read_bytes(max_buffer_size, partial=True), - quiet_exceptions=(tornado.iostream.StreamClosedError,), + data = await asyncio.wait_for( + stream.read_bytes(max_buffer_size, partial=True), request_timeout ) except tornado.iostream.StreamClosedError: if eof_mark and response_body: @@ -340,7 +326,7 @@ async def fetch( break metrics["httpclient_timeouts"] += 1 return ERR_READ_TIMEOUT, {}, "Connection reset" - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: metrics["httpclient_timeouts"] += 1 return ERR_READ_TIMEOUT, {}, "Request timed out" received = len(data) diff --git a/core/script/cli/base.py b/core/script/cli/base.py index 44a012d2b2..d006291b48 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -183,7 +183,7 @@ class CLI(object): def set_timeout(self, timeout: int) -> None: if timeout: self.logger.debug("Setting timeout: %ss", timeout) - self.current_timeout = datetime.timedelta(seconds=timeout) + self.current_timeout = timeout else: if self.current_timeout: self.logger.debug("Resetting timeouts") @@ -342,7 +342,7 @@ class CLI(object): metrics["cli_reads", ("proto", self.name)] += 1 f = self.iostream.read_bytes(self.BUFFER_SIZE, partial=True) if self.current_timeout: - r = await tornado.gen.with_timeout(self.current_timeout, f) + r = await asyncio.wait_for(f, self.current_timeout) else: r = await f if r == self.SYNTAX_ERROR_CODE: @@ -379,7 +379,7 @@ class CLI(object): continue else: raise tornado.iostream.StreamClosedError() - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: self.logger.info("Timeout error") metrics["cli_timeouts", ("proto", self.name)] += 1 # IOStream must be closed to prevent hanging read callbacks diff --git a/core/threadpool.py b/core/threadpool.py index fa6970c7fa..3f5fed04ec 100644 --- a/core/threadpool.py +++ b/core/threadpool.py @@ -14,10 +14,9 @@ import datetime from collections import deque import _thread from time import perf_counter -from asyncio import Future +from asyncio import Future, wait_for # Third-party modules -from tornado.gen import with_timeout from typing import Optional, Dict, Any, Set, List # NOC modules @@ -163,9 +162,7 @@ class ThreadPoolExecutor(object): self.done_event.wait(timeout=self.shutdown_timeout) return self.done_future else: - return with_timeout( - timeout=datetime.timedelta(seconds=self.shutdown_timeout), future=self.done_future - ) + return wait_for(self.done_future, self.shutdown_timeout) def worker(self): t = threading.current_thread() @@ -182,8 +179,8 @@ class ThreadPoolExecutor(object): if not future: logger.debug("Worker %s has no future. Stopping", t.name) break - if not future.set_running_or_notify_cancel(): - continue + # if not future.set_running_or_notify_cancel(): + # continue sample = 1 if span_ctx else 0 if config.features.forensic: if in_label and callable(in_label): -- GitLab From 59cb2f200c9c5ac07b2395584ca1c007f4b90ae2 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 11:29:57 +0300 Subject: [PATCH 37/61] Fixes --- core/dcs/base.py | 3 ++- core/http/client.py | 1 - core/nsq/topic.py | 1 - core/scheduler/job.py | 2 +- core/script/cli/base.py | 5 +---- core/threadpool.py | 1 - 6 files changed, 4 insertions(+), 9 deletions(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index 3769814f87..7d3549319f 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -14,6 +14,7 @@ import datetime import os from urllib.parse import urlparse from time import perf_counter +import asyncio # Third-party modules import tornado.gen @@ -254,7 +255,7 @@ class ResolverBase(object): t = self.dcs.DEFAULT_SERVICE_RESOLUTION_TIMEOUT try: await self.ready_event.wait(timeout=t) - except tornado.gen.TimeoutError: + except (tornado.gen.TimeoutError, asyncio.TimeoutError): metrics["errors", ("type", "dcs_resolver_timeout")] += 1 if self.critical: self.dcs.set_faulty_status("Failed to resolve %s: Timeout" % self.name) diff --git a/core/http/client.py b/core/http/client.py index f933568aaf..7dbdb52243 100644 --- a/core/http/client.py +++ b/core/http/client.py @@ -18,7 +18,6 @@ from urllib.parse import urlparse import asyncio # Third-party modules -import tornado.gen import tornado.ioloop from tornado.ioloop import IOLoop import tornado.iostream diff --git a/core/nsq/topic.py b/core/nsq/topic.py index 155c8bbe57..d81c7980bd 100644 --- a/core/nsq/topic.py +++ b/core/nsq/topic.py @@ -14,7 +14,6 @@ import asyncio # Third-party modules import ujson -import tornado.gen import tornado.locks from tornado.ioloop import IOLoop from typing import Union, Iterable, List, Dict, Any diff --git a/core/scheduler/job.py b/core/scheduler/job.py index 8792f4460f..b88a649c27 100644 --- a/core/scheduler/job.py +++ b/core/scheduler/job.py @@ -159,7 +159,7 @@ class Job(object): result = self.handler(**data) if asyncio.isfuture(result): # Wait for future - result = await result + await result status = self.E_SUCCESS except RetryAfter as e: self.logger.info("Retry after %ss: %s", e.delay, e) diff --git a/core/script/cli/base.py b/core/script/cli/base.py index d006291b48..5282c48e40 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -9,7 +9,6 @@ import socket import re import functools -import datetime from functools import reduce import sys from threading import Lock @@ -679,9 +678,7 @@ class CLI(object): await self.send(cmd) # Waiting for response and drop it if i < lseq - 1: - resp = await tornado.gen.with_timeout( - self.ioloop.time() + 30, future=self.iostream.read_bytes(4096, partial=True) - ) + resp = await asyncio.wait_for(self.iostream.read_bytes(4096, partial=True), 30) if self.script.to_track: self.script.push_cli_tracking(resp, self.state) self.logger.debug("Receiving: %r", resp) diff --git a/core/threadpool.py b/core/threadpool.py index 3f5fed04ec..635cbfb3d3 100644 --- a/core/threadpool.py +++ b/core/threadpool.py @@ -10,7 +10,6 @@ import threading import logging import itertools import time -import datetime from collections import deque import _thread from time import perf_counter -- GitLab From 27b6c8d16f6dc4d61ebbbc18d1d33db42dc133f3 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 11:48:32 +0300 Subject: [PATCH 38/61] Fix Job.run --- core/scheduler/job.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/scheduler/job.py b/core/scheduler/job.py index b88a649c27..8d37b56ae8 100644 --- a/core/scheduler/job.py +++ b/core/scheduler/job.py @@ -119,7 +119,7 @@ class Job(object): if ctx not in self.context: self.context[ctx] = {} - async def run(self): + def run(self): with Span( server=self.scheduler.name, service=self.attrs[self.ATTR_CLASS], @@ -159,7 +159,8 @@ class Job(object): result = self.handler(**data) if asyncio.isfuture(result): # Wait for future - await result + for _ in asyncio.as_completed([result]): + pass status = self.E_SUCCESS except RetryAfter as e: self.logger.info("Retry after %ss: %s", e.delay, e) -- GitLab From 8d056c28648d76fa98fbac50bacc3ca6e425fa1a Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 11:55:32 +0300 Subject: [PATCH 39/61] Suppress KeyError on cli ioloop close --- core/script/cli/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/script/cli/base.py b/core/script/cli/base.py index 5282c48e40..32993cdad3 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -103,7 +103,10 @@ class CLI(object): self.close_iostream() if self.ioloop: self.logger.debug("Closing IOLoop") - self.ioloop.close(all_fds=True) + try: + self.ioloop.close(all_fds=True) + except KeyError: + self.logger.info("IOLoop is already closed") self.ioloop = None # Restore previous ioloop if self.loop_context: -- GitLab From 5722a2dce606b8fd677aa38970135facd0f18af1 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 12:44:50 +0300 Subject: [PATCH 40/61] Thread-safe resolver --- core/dcs/base.py | 71 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index 7d3549319f..5b6259f490 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -9,8 +9,7 @@ import logging import random import signal -from threading import Lock -import datetime +import threading import os from urllib.parse import urlparse from time import perf_counter @@ -19,7 +18,6 @@ import asyncio # Third-party modules import tornado.gen from tornado.ioloop import IOLoop, PeriodicCallback -import tornado.locks # NOC modules from noc.config import config @@ -29,7 +27,6 @@ from .error import ResolutionError class DCSBase(object): - DEFAULT_SERVICE_RESOLUTION_TIMEOUT = datetime.timedelta(seconds=config.dcs.resolution_timeout) # Resolver class resolver_cls = None # HTTP code to be returned by /health endpoint when service is healthy @@ -44,7 +41,7 @@ class DCSBase(object): self.parse_url(urlparse(url)) # service -> resolver instances self.resolvers = {} - self.resolvers_lock = Lock() + self.resolvers_lock = threading.Lock() self.resolver_expiration_task = None self.health_check_service_id = None self.status = True @@ -199,12 +196,15 @@ class ResolverBase(object): self.services = {} self.service_ids = [] self.service_addresses = set() - self.lock = Lock() + self.lock = threading.Lock() self.policy = self.policy_random self.rr_index = -1 self.critical = critical self.near = near - self.ready_event = tornado.locks.Event() + self.is_ready = False + self.thread_id = threading.get_ident() + self.ready_event_async = asyncio.Event() + self.ready_event_sync = threading.Event() self.track = track self.last_used = perf_counter() @@ -238,29 +238,56 @@ class ResolverBase(object): self.name, ", ".join("%s: %s" % (i, self.services[i]) for i in self.services), ) - self.ready_event.set() + self.set_ready() else: self.logger.info("[%s] No active services", self.name) - self.ready_event.clear() + self.clear_ready() metrics["dcs_resolver_activeservices", ("name", self.name)] = len(self.services) + def set_ready(self): + self.is_ready = True + self.ready_event_async.set() + self.ready_event_sync.set() + + def clear_ready(self): + self.is_ready = False + self.ready_event_async.clear() + self.ready_event_sync.clear() + + def is_same_thread(self): + return self.thread_id == threading.get_ident() + + async def _wait_for_services_async(self, timeout): + try: + await asyncio.wait_for( + self.ready_event_async.wait(), timeout or config.dcs.resolution_timeout + ) + except asyncio.TimeoutError: + metrics["errors", ("type", "dcs_resolver_timeout")] += 1 + if self.critical: + self.dcs.set_faulty_status("Failed to resolve %s: Timeout" % self.name) + raise ResolutionError() + + def _wait_for_services_sync(self, timeout): + if not self.ready_event_sync.wait(timeout): + metrics["errors", ("type", "dcs_resolver_timeout")] += 1 + if self.critical: + self.dcs.set_faulty_status("Failed to resolve %s: Timeout" % self.name) + raise ResolutionError() + + async def _wait_for_services(self, timeout=None): + if self.is_same_thread(): + await self._wait_for_services_async(timeout) + else: + self._wait_for_services_sync(timeout) + async def resolve(self, hint=None, wait=True, timeout=None, full_result=False): self.last_used = perf_counter() metrics["dcs_resolver_requests"] += 1 - if wait: + if not self.services and wait: # Wait until service catalog populated - if timeout: - t = datetime.timedelta(seconds=timeout) - else: - t = self.dcs.DEFAULT_SERVICE_RESOLUTION_TIMEOUT - try: - await self.ready_event.wait(timeout=t) - except (tornado.gen.TimeoutError, asyncio.TimeoutError): - metrics["errors", ("type", "dcs_resolver_timeout")] += 1 - if self.critical: - self.dcs.set_faulty_status("Failed to resolve %s: Timeout" % self.name) - raise ResolutionError() - if not wait and not self.ready_event.is_set(): + await self._wait_for_services(timeout) + if not wait and not self.is_ready: if self.critical: self.dcs.set_faulty_status("Failed to resolve %s: No active services" % self.name) raise ResolutionError() -- GitLab From 38e0d52b1397a5084a0560ed23f92acd01d37c6b Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 12:46:54 +0300 Subject: [PATCH 41/61] Flake8 --- core/dcs/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index 5b6259f490..c68e92f0c9 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -16,7 +16,6 @@ from time import perf_counter import asyncio # Third-party modules -import tornado.gen from tornado.ioloop import IOLoop, PeriodicCallback # NOC modules -- GitLab From c1e0fdbedd549ae7aca8164f10c429f2f8c3c715 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 14:24:26 +0300 Subject: [PATCH 42/61] cli: Gentler IOLoop cleanup --- commands/test.py | 3 +++ core/ioloop/util.py | 5 ++++- core/ioloop/whois.py | 2 +- core/script/cli/base.py | 36 +----------------------------------- 4 files changed, 9 insertions(+), 37 deletions(-) diff --git a/commands/test.py b/commands/test.py index 2792deceb5..a6423b323e 100755 --- a/commands/test.py +++ b/commands/test.py @@ -57,6 +57,9 @@ class Command(BaseCommand): def run_tests(args): self.print("Running test") # Must be imported within coverage + from noc.core.ioloop.util import setup_asyncio + + setup_asyncio() from noc.config import config if test_db: diff --git a/core/ioloop/util.py b/core/ioloop/util.py index aa0c206c18..72eea743e0 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -11,7 +11,7 @@ import asyncio import logging # Third-party modules -from typing import Callable, TypeVar, List, Tuple, Any +from typing import Callable, TypeVar, List, Tuple, Any, Optional # NOC modules from noc.config import config @@ -45,6 +45,9 @@ class IOLoopContext(object): asyncio.get_event_loop_policy().reset_called() self.prev_loop = None + def get_loop(self) -> Optional[asyncio.AbstractEventLoop]: + return self.new_loop + def __enter__(self): return self.get_context() diff --git a/core/ioloop/whois.py b/core/ioloop/whois.py index 61b4c73f20..9f748b2060 100644 --- a/core/ioloop/whois.py +++ b/core/ioloop/whois.py @@ -74,7 +74,7 @@ async def whois_async(query, fields=None): await stream.write(smart_bytes(query) + b"\r\n") data = await stream.read_until_close() finally: - await stream.close() + stream.close() data = smart_text(data) data = parse_response(data) if fields: diff --git a/core/script/cli/base.py b/core/script/cli/base.py index 32993cdad3..96067d5227 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -191,40 +191,6 @@ class CLI(object): self.logger.debug("Resetting timeouts") self.current_timeout = None - def run_sync(self, func, *args, **kwargs): - """ - Simplified implementation of IOLoop.run_sync - to distinguish real TimeoutErrors from incomplete futures - :param func: - :param args: - :param kwargs: - :return: - """ - future_cell = [None] - - def run(): - try: - result = func(*args, **kwargs) - if result is not None: - result = tornado.gen.convert_yielded(result) - future_cell[0] = result - except Exception: - future_cell[0] = Future() - future_cell[0].set_exc_info(sys.exc_info()) - self.ioloop.add_future(future_cell[0], lambda future: self.ioloop.stop()) - - self.ioloop.add_callback(run) - self.ioloop.start() - if not future_cell[0].done(): - self.logger.info("Incomplete feature left. Restarting IOStream") - self.close_iostream() - # Retain cryptic message as is, - # Mark feature as done - future_cell[0].set_exception( - tornado.gen.TimeoutError("Operation timed out after %s seconds" % None) - ) - return future_cell[0].result() - def execute( self, cmd: str, @@ -254,7 +220,7 @@ class CLI(object): with Span( server=self.script.credentials.get("address"), service=self.name, in_label=cmd ) as s: - self.run_sync(self.submit, parser) + self.loop_context.get_loop().run_until_complete(self.submit(parser)) if self.error: if s: s.error_text = str(self.error) -- GitLab From 9654532ed08d4ce988ad3efb66d589f5b5b1b4bf Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 14:32:36 +0300 Subject: [PATCH 43/61] Fix expire_resolvers --- core/dcs/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index c68e92f0c9..160e517e0b 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -131,7 +131,7 @@ class DCSBase(object): async def expire_resolvers(self): with self.resolvers_lock: - for svc in self.resolvers: + for svc in list(self.resolvers): r = self.resolvers[svc] if r.is_expired(): self.logger.info("Stopping expired resolver for service %s", svc) -- GitLab From dd47c555da1774b815b706f429fa1db8774babe6 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 14:53:58 +0300 Subject: [PATCH 44/61] Fix threadpool shutdown --- core/threadpool.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/threadpool.py b/core/threadpool.py index 635cbfb3d3..28e89eda85 100644 --- a/core/threadpool.py +++ b/core/threadpool.py @@ -13,7 +13,7 @@ import time from collections import deque import _thread from time import perf_counter -from asyncio import Future, wait_for +import asyncio # Third-party modules from typing import Optional, Dict, Any, Set, List @@ -133,7 +133,7 @@ class ThreadPoolExecutor(object): def submit(self, fn, *args, **kwargs): if self.to_shutdown: raise RuntimeError("Cannot schedule new task after shutdown") - future = Future() + future = asyncio.Future() span_ctx, span = get_current_span() # Fetch span label if "_in_label" in kwargs: @@ -151,7 +151,7 @@ class ThreadPoolExecutor(object): def shutdown(self, sync=False): logger.info("Shutdown") with self.mutex: - self.done_future = Future() + self.done_future = asyncio.Future() if sync: self.done_event = threading.Event() self.to_shutdown = True @@ -161,7 +161,7 @@ class ThreadPoolExecutor(object): self.done_event.wait(timeout=self.shutdown_timeout) return self.done_future else: - return wait_for(self.done_future, self.shutdown_timeout) + return asyncio.ensure_future(asyncio.wait_for(self.done_future, self.shutdown_timeout)) def worker(self): t = threading.current_thread() -- GitLab From 59bcc33496baab702ddddf9eb359365d0371fa6d Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 21:09:51 +0300 Subject: [PATCH 45/61] http client: Fix gzip content encoding --- core/http/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/http/client.py b/core/http/client.py index 7dbdb52243..dfd0a2980f 100644 --- a/core/http/client.py +++ b/core/http/client.py @@ -23,6 +23,7 @@ from tornado.ioloop import IOLoop import tornado.iostream import cachetools import ujson +from typing import Optional # NOC modules from noc.core.perf import metrics @@ -83,7 +84,7 @@ async def fetch( url, method="GET", headers=None, - body=None, + body: Optional[bytes] = None, connect_timeout=DEFAULT_CONNECT_TIMEOUT, request_timeout=DEFAULT_REQUEST_TIMEOUT, resolver=resolve, @@ -268,7 +269,7 @@ async def fetch( 6, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0 ) crc = zlib.crc32(body, 0) & 0xFFFFFFFF - body = "\x1f\x8b\x08\x00%s\x02\xff%s%s%s%s" % ( + body = b"\x1f\x8b\x08\x00%s\x02\xff%s%s%s%s" % ( to32u(int(time.time())), compress.compress(body), compress.flush(), -- GitLab From 473b9e581adb85e6b2af977fc6fff3037e052f33 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 21:11:39 +0300 Subject: [PATCH 46/61] chwriter: Internally operate with binary data --- core/service/base.py | 6 ++--- services/chwriter/channel.py | 16 ++++++++------ services/chwriter/service.py | 43 +++++++++++++++++++++--------------- 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/core/service/base.py b/core/service/base.py index f3fdf26f51..1d68495f13 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -675,7 +675,7 @@ class Service(object): self.logger.info("Resuming subscription for handler %s", handler) self.nsq_readers[handler].set_max_in_flight(config.nsqd.max_in_flight) - def get_topic_queue(self, topic): + def get_topic_queue(self, topic: str) -> TopicQueue: q = self.topic_queues.get(topic) if q: return q @@ -689,7 +689,7 @@ class Service(object): self.ioloop.add_callback(self.nsq_publisher_guard, q) return q - async def nsq_publisher_guard(self, queue: TopicQueue) -> Generator: + async def nsq_publisher_guard(self, queue: TopicQueue): while not queue.to_shutdown: try: await self.nsq_publisher(queue) @@ -697,7 +697,7 @@ class Service(object): self.logger.error("Unhandled exception in NSQ publisher, restarting: %s", e) queue.shutdown_complete.set() - async def nsq_publisher(self, queue): + async def nsq_publisher(self, queue: TopicQueue): """ Publisher for NSQ topic diff --git a/services/chwriter/channel.py b/services/chwriter/channel.py index 301df44541..c953bacd13 100644 --- a/services/chwriter/channel.py +++ b/services/chwriter/channel.py @@ -10,12 +10,15 @@ from time import perf_counter from urllib.parse import quote as urllib_quote +# Third-party modules +from typing import List + # NOC modules from noc.config import config class Channel(object): - def __init__(self, service, table, address, db): + def __init__(self, table: str, address: str, db: str): """ :param table: ClickHouse table name :param address: ClickHouse address @@ -24,13 +27,12 @@ class Channel(object): :return: """ self.name = table - self.service = service self.address = address self.db = db self.sql = "INSERT INTO %s FORMAT JSONEachRow" % table self.encoded_sql = urllib_quote(self.sql.encode("utf8")) self.n = 0 - self.data = [] + self.data: List[bytes] = [] self.last_updated = perf_counter() self.last_flushed = perf_counter() self.flushing = False @@ -42,8 +44,8 @@ class Channel(object): self.encoded_sql, ) - def feed(self, data): - n = data.count("\n") + def feed(self, data: bytes): + n = data.count(b"\n") self.n += n self.data += [data] return n @@ -64,9 +66,9 @@ class Channel(object): t = perf_counter() return (t - self.last_flushed) * 1000 >= config.chwriter.batch_delay_ms - def get_data(self): + def get_data(self) -> bytes: self.n = 0 - data = "\n".join(self.data) + data = b"\n".join(self.data) self.data = [] return data diff --git a/services/chwriter/service.py b/services/chwriter/service.py index b9596d417b..8737e8a9b8 100755 --- a/services/chwriter/service.py +++ b/services/chwriter/service.py @@ -11,7 +11,7 @@ from time import perf_counter # Third-party modules import tornado.ioloop -import tornado.gen +from typing import Dict # NOC modules from noc.core.service.base import Service @@ -19,7 +19,7 @@ from noc.core.http.client import fetch from noc.config import config from noc.core.perf import metrics from noc.services.chwriter.channel import Channel -from noc.core.comp import smart_text +from noc.core.comp import smart_text, smart_bytes class CHWriterService(Service): @@ -29,7 +29,7 @@ class CHWriterService(Service): def __init__(self): super(CHWriterService, self).__init__() - self.channels = {} + self.channels: Dict[str, Channel] = {} self.last_ts = None self.last_metrics = 0 self.table_fields = {} # table name -> fields @@ -63,7 +63,7 @@ class CHWriterService(Service): def get_channel(self, table): if table not in self.channels: - self.channels[table] = Channel(self, table, self.ch_address, config.clickhouse.db) + self.channels[table] = Channel(table, self.ch_address, config.clickhouse.db) metrics["channels_active"] += 1 return self.channels[table] @@ -90,7 +90,12 @@ class CHWriterService(Service): ) metrics["deferred_messages"] += 1 return False - table, data = smart_text(records).split("\n", 1) + if b"\n" not in records: + self.logger.error("Malformed message dropped: %s", records) + metrics["dropped_malformed_messages"] += 1 + return True # Trash + b_table, data = records.split(b"\n", 1) + table = smart_text(b_table) self.logger.debug("Receiving %s", table) if "." in table or "|" in table: self.logger.error("Message in legacy format dropped: %s" % table) @@ -102,8 +107,7 @@ class CHWriterService(Service): metrics["records_buffered"] += n return True - @tornado.gen.coroutine - def report(self): + async def report(self): nm = metrics["records_written"].value t = perf_counter() if self.last_ts: @@ -117,8 +121,7 @@ class CHWriterService(Service): self.last_metrics = nm self.last_ts = t - @tornado.gen.coroutine - def check_channels(self): + async def check_channels(self): expired = [c for c in self.channels if self.channels[c].is_expired()] for x in expired: self.logger.info("Closing expired channel %s", x) @@ -139,10 +142,11 @@ class CHWriterService(Service): channel.flushing, ) if channel and channel.is_ready(): - yield self.flush_channel(channel) + await self.flush_channel(channel) - @tornado.gen.coroutine - def flush_channel(self, channel): + async def flush_channel(self, channel: Channel): + if not channel.n: + return channel.start_flushing() n = channel.n data = channel.get_data() @@ -151,7 +155,7 @@ class CHWriterService(Service): written = False suspended = False try: - code, headers, body = yield fetch( + code, headers, body = await fetch( channel.url, method="POST", body=data, @@ -186,7 +190,7 @@ class CHWriterService(Service): else: self.requeue_channel(channel) - def requeue_channel(self, channel): + def requeue_channel(self, channel: Channel): channel.start_flushing() data = channel.get_data().splitlines() if not data: @@ -196,7 +200,11 @@ class CHWriterService(Service): while data: chunk, data = data[: config.nsqd.ch_chunk_size], data[config.nsqd.ch_chunk_size :] cl = len(chunk) - self.pub(config.chwriter.topic, "%s\n%s\n" % (channel.name, "\n".join(chunk)), raw=True) + self.pub( + config.chwriter.topic, + "%s\n%s\n" % (channel.name, "\n".join(smart_text(x) for x in chunk)), + raw=True, + ) metrics["records_requeued"] += cl metrics["records_buffered"] -= cl channel.stop_flushing() @@ -233,12 +241,11 @@ class CHWriterService(Service): metrics["resumes"] += 1 self.resume_subscription(self.on_data) - @tornado.gen.coroutine - def check_restore(self): + async def check_restore(self): if self.stopping: self.logger.info("Checking restore during stopping. Ignoring") else: - code, headers, body = yield fetch( + code, headers, body = await fetch( "http://%s/?user=%s&password=%s&database=%s&query=%s" % ( self.ch_address, -- GitLab From 9c754052dfa7766f696e6ffd8f6a2590b9f98f83 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 21:12:01 +0300 Subject: [PATCH 47/61] Additional type annotations --- core/scheduler/scheduler.py | 15 ++++++++------- core/threadpool.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/core/scheduler/scheduler.py b/core/scheduler/scheduler.py index b9efaf4226..17c77013bd 100644 --- a/core/scheduler/scheduler.py +++ b/core/scheduler/scheduler.py @@ -18,6 +18,7 @@ import asyncio import pymongo.errors from tornado.ioloop import IOLoop from pymongo import DeleteOne, UpdateOne +from typing import Optional # NOC modules from noc.core.mongo.connection import get_db @@ -80,7 +81,7 @@ class Scheduler(object): self.bulk = [] self.bulk_lock = threading.Lock() self.max_threads = max_threads - self.executor = None + self.executor: Optional[ThreadPoolExecutor] = None self.run_callback = None self.check_time = check_time self.read_ahead_interval = datetime.timedelta(milliseconds=check_time) @@ -141,7 +142,7 @@ class Scheduler(object): self.bulk = [] return self.collection - def get_executor(self): + def get_executor(self) -> ThreadPoolExecutor: """ Returns threadpool executor """ @@ -183,8 +184,7 @@ class Scheduler(object): qq = self.filter.copy() qq.update(q) return qq - else: - return q + return q def scheduler_tick(self): """ @@ -210,9 +210,10 @@ class Scheduler(object): while not self.to_shutdown: t0 = perf_counter() n = 0 - if self.get_executor().may_submit(): + executor = self.get_executor() + if executor.may_submit(): try: - n = await self.executor.submit(self.scheduler_tick) + n = await executor.submit(self.scheduler_tick) except Exception as e: self.logger.error("Failed to execute scheduler tick: %s", e) dt = self.check_time - (perf_counter() - t0) * 1000 @@ -524,5 +525,5 @@ class Scheduler(object): else: f = asyncio.Future() f.set_result(True) - f.add_done_callback(lambda _: self.apply_bulk_ops()) + f.add_done_callback(lambda _: self.apply_ops()) return f diff --git a/core/threadpool.py b/core/threadpool.py index 28e89eda85..9d5962558d 100644 --- a/core/threadpool.py +++ b/core/threadpool.py @@ -130,7 +130,7 @@ class ThreadPoolExecutor(object): def stop_one_worker(self): self._put((None, None, None, None, None, None, None)) - def submit(self, fn, *args, **kwargs): + def submit(self, fn, *args, **kwargs) -> asyncio.Future: if self.to_shutdown: raise RuntimeError("Cannot schedule new task after shutdown") future = asyncio.Future() -- GitLab From 8e1669e83d2bba76aa49c4c490eb224c52e86de2 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 21:14:29 +0300 Subject: [PATCH 48/61] Flake8 --- core/script/cli/base.py | 2 -- core/service/base.py | 2 +- services/chwriter/service.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/core/script/cli/base.py b/core/script/cli/base.py index 96067d5227..028a14e6c3 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -10,14 +10,12 @@ import socket import re import functools from functools import reduce -import sys from threading import Lock import asyncio # Third-party modules import tornado.gen import tornado.iostream -from tornado.concurrent import Future from tornado.ioloop import IOLoop from typing import Optional, Callable diff --git a/core/service/base.py b/core/service/base.py index 1d68495f13..d90d2aaeb2 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -27,7 +27,7 @@ import tornado.httpserver import tornado.locks import setproctitle import ujson -from typing import Dict, List, Generator +from typing import Dict, List # NOC modules from noc.config import config, CH_UNCLUSTERED, CH_REPLICATED, CH_SHARDED diff --git a/services/chwriter/service.py b/services/chwriter/service.py index 8737e8a9b8..66f748995a 100755 --- a/services/chwriter/service.py +++ b/services/chwriter/service.py @@ -19,7 +19,7 @@ from noc.core.http.client import fetch from noc.config import config from noc.core.perf import metrics from noc.services.chwriter.channel import Channel -from noc.core.comp import smart_text, smart_bytes +from noc.core.comp import smart_text class CHWriterService(Service): -- GitLab From 095beca0cb95bc649ba391528d216d7825e264b8 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 22:11:29 +0300 Subject: [PATCH 49/61] Refactor timeout handling --- core/script/cli/base.py | 2 +- core/script/mml/base.py | 12 ++++++------ core/script/rtsp/base.py | 13 ++++++------- core/service/apiaccess.py | 1 - core/service/base.py | 6 +++--- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/core/script/cli/base.py b/core/script/cli/base.py index 028a14e6c3..c7ae0f18ba 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -350,7 +350,7 @@ class CLI(object): metrics["cli_timeouts", ("proto", self.name)] += 1 # IOStream must be closed to prevent hanging read callbacks self.close_iostream() - raise tornado.gen.TimeoutError("Timeout") + raise asyncio.TimeoutError("Timeout") self.logger.debug("Received: %r", r) # Clean input if self.buffer.find(b"\x1b", -self.MATCH_MISSED_CONTROL_TAIL) != -1: diff --git a/core/script/mml/base.py b/core/script/mml/base.py index a6c7949b84..b046b6aa8a 100644 --- a/core/script/mml/base.py +++ b/core/script/mml/base.py @@ -14,7 +14,7 @@ import asyncio # Third-party modules import tornado.ioloop import tornado.iostream -import tornado.gen +from typing import Union # NOC modules from noc.config import config @@ -115,10 +115,10 @@ class MMLBase(object): s.setsockopt(socket.SOL_TCP, socket.TCP_KEEPCNT, self.KEEP_CNT) return self.iostream_class(s, self) - def set_timeout(self, timeout): + def set_timeout(self, timeout: Union[int, float]): if timeout: self.logger.debug("Setting timeout: %ss", timeout) - self.current_timeout = datetime.timedelta(seconds=timeout) + self.current_timeout = timeout else: if self.current_timeout: self.logger.debug("Resetting timeouts") @@ -232,7 +232,7 @@ class MMLBase(object): try: f = self.iostream.read_bytes(self.BUFFER_SIZE, partial=True) if self.current_timeout: - r = await tornado.gen.with_timeout(self.current_timeout, f) + r = await asyncio.wait_for(f, self.current_timeout) else: r = await f except tornado.iostream.StreamClosedError: @@ -262,9 +262,9 @@ class MMLBase(object): continue else: raise tornado.iostream.StreamClosedError() - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: self.logger.info("Timeout error") - raise tornado.gen.TimeoutError("Timeout") + raise asyncio.TimeoutError("Timeout") self.logger.debug("Received: %r", r) self.buffer += r offset = max(0, len(self.buffer) - self.MATCH_TAIL) diff --git a/core/script/rtsp/base.py b/core/script/rtsp/base.py index b185ffb62e..1eb5895eae 100644 --- a/core/script/rtsp/base.py +++ b/core/script/rtsp/base.py @@ -7,7 +7,6 @@ # Python modules import socket -import datetime import os from urllib.request import parse_http_list, parse_keqv_list import asyncio @@ -15,8 +14,8 @@ import asyncio # Third-party modules import tornado.ioloop import tornado.iostream -import tornado.gen import hashlib +from typing import Union # NOC modules from noc.config import config @@ -121,10 +120,10 @@ class RTSPBase(object): s.setsockopt(socket.SOL_TCP, socket.TCP_KEEPCNT, self.KEEP_CNT) return self.iostream_class(s, self) - def set_timeout(self, timeout): + def set_timeout(self, timeout: Union[int, float]): if timeout: self.logger.debug("Setting timeout: %ss", timeout) - self.current_timeout = datetime.timedelta(seconds=timeout) + self.current_timeout = timeout else: if self.current_timeout: self.logger.debug("Resetting timeouts") @@ -288,7 +287,7 @@ class RTSPBase(object): try: f = self.iostream.read_bytes(self.BUFFER_SIZE, partial=True) if self.current_timeout: - r = await tornado.gen.with_timeout(self.current_timeout, f) + r = await asyncio.wait_for(f, self.current_timeout) else: r = await f except tornado.iostream.StreamClosedError: @@ -318,9 +317,9 @@ class RTSPBase(object): continue else: raise tornado.iostream.StreamClosedError() - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: self.logger.info("Timeout error") - raise tornado.gen.TimeoutError("Timeout") + raise asyncio.TimeoutError("Timeout") self.logger.debug("Received: %r", r) self.buffer += r # offset = max(0, len(self.buffer) - self.MATCH_TAIL) diff --git a/core/service/apiaccess.py b/core/service/apiaccess.py index fadc7fd29e..0801acc27f 100644 --- a/core/service/apiaccess.py +++ b/core/service/apiaccess.py @@ -10,7 +10,6 @@ import functools # Third-party modules import tornado.web -import tornado.gen def authenticated(method): diff --git a/core/service/base.py b/core/service/base.py index d90d2aaeb2..5726f58396 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -473,7 +473,7 @@ class Service(object): try: self.logger.info("Shutting down scheduler") await self.scheduler.shutdown() - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: self.logger.info("Timed out when shutting down scheduler") # Shutdown executors await self.shutdown_executors() @@ -745,7 +745,7 @@ class Service(object): try: self.logger.info("Shutting down %s", x) await self.executors[x].shutdown() - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: self.logger.info("Timed out when shutting down %s", x) async def shutdown_topic_queues(self): @@ -766,7 +766,7 @@ class Service(object): try: self.logger.info("Waiting shutdown of topic queue %s", topic) await queue.shutdown_complete.wait(datetime.timedelta(seconds=5)) - except tornado.gen.TimeoutError: + except asyncio.TimeoutError: self.logger.info("Failed to shutdown topic queue %s: Timed out", topic) def pub(self, topic, data, raw=False): -- GitLab From aea6a583f4b50c5fcdeb702d5d295e0d3de83402 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 4 May 2020 22:14:10 +0300 Subject: [PATCH 50/61] Fix --- core/script/mml/base.py | 1 - core/service/base.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/core/script/mml/base.py b/core/script/mml/base.py index b046b6aa8a..032adba8e3 100644 --- a/core/script/mml/base.py +++ b/core/script/mml/base.py @@ -7,7 +7,6 @@ # Python modules import socket -import datetime import re import asyncio diff --git a/core/service/base.py b/core/service/base.py index 5726f58396..3503af36fd 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -17,6 +17,7 @@ import time import threading from time import perf_counter import datetime +import asyncio # Third-party modules from tornado.ioloop import IOLoop, PeriodicCallback -- GitLab From 63b9a3532d8e78f715039233cbd5f2ef515e1839 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 08:09:28 +0300 Subject: [PATCH 51/61] Remove broken tests --- tests/web/__init__.py | 0 tests/web/base.py | 108 --------------------------------- tests/web/main/__init__.py | 0 tests/web/main/test_desktop.py | 71 ---------------------- 4 files changed, 179 deletions(-) delete mode 100644 tests/web/__init__.py delete mode 100644 tests/web/base.py delete mode 100644 tests/web/main/__init__.py delete mode 100644 tests/web/main/test_desktop.py diff --git a/tests/web/__init__.py b/tests/web/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/web/base.py b/tests/web/base.py deleted file mode 100644 index 807995bbf6..0000000000 --- a/tests/web/base.py +++ /dev/null @@ -1,108 +0,0 @@ -# ---------------------------------------------------------------------- -# BaseAPITest -# ---------------------------------------------------------------------- -# Copyright (C) 2007-2020 The NOC Project -# See LICENSE for details -# ---------------------------------------------------------------------- - -# Python modules -import functools -import types - -# Third-party modules -import tornado.gen -import tornado.ioloop -import tornado.testing -import tornado.httpserver -import tornado.web -import pytest -import ujson - -# NOC modules -from noc.core.http.client import fetch - - -class APIHandler(object): - io_loop = None - - def __init__(self, handlers): - if not getattr(APIHandler, "io_loop"): - APIHandler.io_loop = tornado.ioloop.IOLoop() - APIHandler.io_loop.make_current() - sock, port = tornado.testing.bind_unused_port() - app = tornado.web.Application(handlers) - self.server = tornado.httpserver.HTTPServer(app) - self.server.add_socket(sock) - self.base_url = "http://127.0.0.1:%d" % port - self.server.start() - - @tornado.gen.coroutine - def fetch(self, url, method="GET", headers=None, body=None): - url = "%s%s" % (self.base_url, url) - code, headers, body = yield fetch(url=url, method=method, headers=headers, body=body) - return code, headers, body - - -def gen_test(f): - def wrap(f): - @functools.wraps(f) - def pre_coroutine(self, *args, **kwargs): - result = f(self, *args, **kwargs) - if isinstance(result, types.GeneratorType): - self._test_generator = result - else: - self._test_generator = None - return result - - coro = tornado.gen.coroutine(pre_coroutine) - - @functools.wraps(coro) - def post_coroutine(self, *args, **kwargs): - try: - return self.io_loop.run_sync( - functools.partial(coro, self, *args, **kwargs), timeout=15 - ) - except tornado.gen.TimeoutError as e: - self._test_generator.throw(e) - raise - - return post_coroutine - - return wrap(f) - - -class BaseAPITest(object): - name = None - _api = {} - - @pytest.fixture(autouse=True) - def init_api(self): - if self.name not in self._api: - self._api[self.name] = APIHandler(self.get_handlers()) - self.api = self._api[self.name] - self.api = self._api[self.name] - self.io_loop = self.api.io_loop - - @tornado.gen.coroutine - def fetch(self, url, method="GET", headers=None, body=None, user="superuser"): - headers = headers or {} - headers["Remote-User"] = user - code, headers, body = yield self.api.fetch(url, method=method, headers=headers, body=body) - if "Content-Type" in headers and headers["Content-Type"].startswith("text/json"): - body = ujson.loads(body) - return code, headers, body - - def get_handlers(self): - raise NotImplementedError() - - -class WebAPITest(BaseAPITest): - name = "web" - - def get_handlers(self): - from noc.services.web.service import WebService - - ws = WebService() - ws.setup_test_logging() - ws.on_activate() - return ws.get_handlers() diff --git a/tests/web/main/__init__.py b/tests/web/main/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/web/main/test_desktop.py b/tests/web/main/test_desktop.py deleted file mode 100644 index c239baed9e..0000000000 --- a/tests/web/main/test_desktop.py +++ /dev/null @@ -1,71 +0,0 @@ -# ---------------------------------------------------------------------- -# Test main.desktop application -# ---------------------------------------------------------------------- -# Copyright (C) 2007-2018 The NOC Project -# See LICENSE for details -# ---------------------------------------------------------------------- - -# NOC modules -from noc.core.version import version -from noc.config import config -from noc.core.comp import smart_text -from ..base import WebAPITest, gen_test - - -class TestDesktopAPI(WebAPITest): - users = [ - { - "username": "superuser", - "email": "superuser@example.com", - "first_name": "Mighty", - "last_name": "Root", - }, - { - "username": "granted", - "email": "granted@example.com", - "first_name": "Granted", - "last_name": "User", - }, - { - "username": "deprived", - "email": "deprived@example.com", - "first_name": "Deprived", - "last_name": "Buddy", - }, - ] - - @gen_test - def test_is_logged(self): - code, headers, body = yield self.fetch("/main/desktop/is_logged/") - assert code == 200 - assert body is True - - @gen_test - def test_html(self): - code, headers, body = yield self.fetch("/main/desktop/") - assert code == 200 - assert "" in smart_text(body) - - @gen_test - def test_about(self): - code, headers, body = yield self.fetch("/main/desktop/about/") - assert code == 200 - assert body.get("installation") == config.installation_name - - @gen_test - def test_version(self): - code, headers, body = yield self.fetch("/main/desktop/version/") - assert code == 200 - assert body == version.version - - @gen_test - def test_user_settings(self): - for user in self.users: - code, headers, body = yield self.fetch( - "/main/desktop/user_settings/", user=user["username"] - ) - assert code == 200 - assert body.get("username") == user["username"] - assert body.get("email") == user["email"] - assert body.get("first_name") == user["first_name"] - assert body.get("last_name") == user["last_name"] -- GitLab From d20922388cd254af3dd4a44545a39401ab75ee70 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 10:00:12 +0300 Subject: [PATCH 52/61] async/await API and Service.run_in_executor --- core/service/api.py | 12 +++++++----- core/service/base.py | 20 ++++++++++++++------ services/correlator/service.py | 2 +- services/grafanads/annotations.py | 7 +++---- services/sae/api/sae.py | 29 +++++++++++------------------ services/web/service.py | 17 +++++++++-------- 6 files changed, 45 insertions(+), 42 deletions(-) diff --git a/core/service/api.py b/core/service/api.py index eeb525b5bf..67b58fd228 100644 --- a/core/service/api.py +++ b/core/service/api.py @@ -7,6 +7,7 @@ # Python modules from collections import namedtuple +import asyncio # Third-party modules import tornado.web @@ -35,8 +36,7 @@ class APIRequestHandler(tornado.web.RequestHandler): self.service = service self.api_class = api_class - @tornado.gen.coroutine - def post(self, *args, **kwargs): + async def post(self, *args, **kwargs): span_ctx = self.request.headers.get("X-NOC-Span-Ctx", 0) span_id = self.request.headers.get("X-NOC-Span", 0) sample = 1 if span_ctx and span_id else 0 @@ -78,13 +78,15 @@ class APIRequestHandler(tornado.web.RequestHandler): try: if getattr(h, "executor", ""): # Threadpool version - executor = self.service.get_executor(h.executor) - result = executor.submit(h, *params) + result = await self.service.run_in_executor(h.executor, h, *params) else: # Serialized version result = h(*params) if tornado.gen.is_future(result): - result = yield result + # @todo: Deprecated + result = await result + elif asyncio.isfuture(result) or asyncio.iscoroutine(result): + result = await result if isinstance(result, Redirect): # Redirect protocol extension self.set_status(307, "Redirect") diff --git a/core/service/base.py b/core/service/base.py index 3503af36fd..edf7f7cea7 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -28,7 +28,7 @@ import tornado.httpserver import tornado.locks import setproctitle import ujson -from typing import Dict, List +from typing import Dict, List, Tuple, Callable, Any, TypeVar # NOC modules from noc.config import config, CH_UNCLUSTERED, CH_REPLICATED, CH_SHARDED @@ -57,6 +57,8 @@ from .rpc import RPCProxy from .ctl import CtlAPI from .loader import set_service +T = TypeVar("T") + class Service(object): """ @@ -169,13 +171,13 @@ class Service(object): self.topic_queues: Dict[str, TopicQueue] = {} self.topic_queue_lock = threading.Lock() - def create_parser(self): + def create_parser(self) -> argparse.ArgumentParser: """ Return argument parser """ return argparse.ArgumentParser() - def add_arguments(self, parser): + def add_arguments(self, parser: argparse.ArgumentParser) -> None: """ Apply additional parser arguments """ @@ -364,7 +366,7 @@ class Service(object): self.logger.warning("SIGTERM caught, Stopping") self.stop() - def get_service_address(self): + def get_service_address(self) -> Tuple[str, int]: """ Returns an (address, port) for HTTP service listener """ @@ -795,7 +797,7 @@ class Service(object): for chunk in q.iter_encode_chunks(m): q.put(chunk) - def get_executor(self, name): + def get_executor(self, name: str) -> ThreadPoolExecutor: """ Return or start named executor """ @@ -810,6 +812,12 @@ class Service(object): self.executors[name] = executor return executor + def run_in_executor( + self, name: str, fn: Callable[[Any], T], *args: Any, **kwargs: Any + ) -> asyncio.Future[T]: + executor = self.get_executor(name) + return executor.submit(fn, *args, **kwargs) + def register_metrics(self, table, metrics): """ Register metrics @@ -887,7 +895,7 @@ class Service(object): for chunk in self._iter_metrics_raw_chunks(table, data[ch]): self.pub(ch, chunk, raw=True) - def start_telemetry_callback(self): + def start_telemetry_callback(self) -> None: """ Run telemetry callback :return: diff --git a/services/correlator/service.py b/services/correlator/service.py index aa26a9500a..c7ceffa0bb 100755 --- a/services/correlator/service.py +++ b/services/correlator/service.py @@ -481,7 +481,7 @@ class CorrelatorService(Service): """ self.logger.info("[%s] Receiving message", event_id) message.enable_async() - self.get_executor("max").submit(self.dispose_worker, message, event_id, event) + self.run_in_executor("max", self.dispose_worker, message, event_id, event) def dispose_worker(self, message, event_id, event_hint=None): metrics["alarm_dispose"] += 1 diff --git a/services/grafanads/annotations.py b/services/grafanads/annotations.py index a9bc2216b7..11d441e3df 100644 --- a/services/grafanads/annotations.py +++ b/services/grafanads/annotations.py @@ -2,7 +2,7 @@ # --------------------------------------------------------------------- # annotations handler # --------------------------------------------------------------------- -# Copyright (C) 2007-2019 The NOC Project +# Copyright (C) 2007-2020 The NOC Project # See LICENSE for details # --------------------------------------------------------------------- @@ -28,8 +28,7 @@ class AnnotationsHandler(tornado.web.RequestHandler): def initialize(self, service=None): self.service = service - @tornado.gen.coroutine - def post(self, *args, **kwargs): + async def post(self, *args, **kwargs): try: req = ujson.loads(self.request.body) except ValueError: @@ -47,7 +46,7 @@ class AnnotationsHandler(tornado.web.RequestHandler): # Annotation to return in reply ra = req.get("annotation") # - result = yield self.service.get_executor("db").submit(self.get_annotations, f, t, ra) + result = await self.service.run_in_executor("db", self.get_annotations, f, t, ra) self.write(result) def get_annotations(self, f, t, annotation): diff --git a/services/sae/api/sae.py b/services/sae/api/sae.py index c021a669e6..8e3197374b 100644 --- a/services/sae/api/sae.py +++ b/services/sae/api/sae.py @@ -5,9 +5,6 @@ # See LICENSE for details # --------------------------------------------------------------------- -# Third-party modules -import tornado.gen - # NOC modules from noc.core.service.api import API, APIError, api from noc.core.script.loader import loader @@ -64,12 +61,11 @@ class SAEAPI(API): WHERE mo.id = %s """ - @tornado.gen.coroutine - def resolve_activator(self, pool): + async def resolve_activator(self, pool): sn = "activator-%s" % pool for i in range(config.sae.activator_resolution_retries): try: - svc = yield self.service.dcs.resolve( + svc = await self.service.dcs.resolve( sn, timeout=config.sae.activator_resolution_timeout ) return svc @@ -78,9 +74,8 @@ class SAEAPI(API): metrics["error", ("type", "resolve_activator")] += 1 return None - @tornado.gen.coroutine - def get_activator_url(self, pool): - svc = yield self.resolve_activator(pool) + async def get_activator_url(self, pool): + svc = await self.resolve_activator(pool) if svc: return "http://%s/api/activator/" % svc else: @@ -88,17 +83,16 @@ class SAEAPI(API): return None @api - @tornado.gen.coroutine - def script(self, object_id, script, args=None, timeout=None): + async def script(self, object_id, script, args=None, timeout=None): """ Execute SA script against ManagedObject - :param object: Managed Object id - :param script: Script name (Eighter with or without profile) + :param object_id: Managed Object id + :param script: Script name (Either with or without profile) :param args: Dict with input arguments :param timeout: Script timeout in seconds """ # Resolve object data - data = yield self.service.get_executor("db").submit(self.get_object_data, object_id) + data = await self.service.run_in_executor("db", self.get_object_data, object_id) # Find pool name pool = self.service.get_pool_name(data["pool_id"]) if not pool: @@ -110,7 +104,7 @@ class SAEAPI(API): metrics["error", ("type", "invalid_scripts_request")] += 1 raise APIError("Invalid script") # - url = yield self.get_activator_url(pool) + url = await self.get_activator_url(pool) if not url: raise APIError("No active activators for pool '%s'" % pool) return self.redirect( @@ -127,10 +121,9 @@ class SAEAPI(API): ) @api - @tornado.gen.coroutine - def get_credentials(self, object_id): + async def get_credentials(self, object_id): # Resolve object data - data = yield self.service.get_executor("db").submit(self.get_object_data, object_id) + data = await self.service.run_in_executor("db", self.get_object_data, object_id) # Find pool name pool = self.service.get_pool_name(data["pool_id"]) if not pool: diff --git a/services/web/service.py b/services/web/service.py index 7c14c49dc9..8cc5a2c23a 100755 --- a/services/web/service.py +++ b/services/web/service.py @@ -64,16 +64,14 @@ class WebService(Service): class NOCWSGIHandler(tornado.web.RequestHandler): def initialize(self, service): self.service = service - self.executor = self.service.get_executor("max") self.backend_id = "%s (%s:%s)" % ( self.service.service_id, self.service.address, self.service.port, ) - @tornado.gen.coroutine - def prepare(self): - data = yield self.process_request(self.request) + async def prepare(self): + data = await self.process_request(self.request) header_obj = httputil.HTTPHeaders() for key, value in data["headers"]: header_obj.add(key, value) @@ -82,8 +80,7 @@ class NOCWSGIHandler(tornado.web.RequestHandler): self.log_request(data["status_code"], self.request) self._finished = True - @tornado.gen.coroutine - def process_request(self, request): + async def process_request(self, request): data = {} response = [] @@ -102,8 +99,12 @@ class NOCWSGIHandler(tornado.web.RequestHandler): else: in_label = None wsgi = django.core.handlers.wsgi.WSGIHandler() - app_response = yield self.executor.submit( - wsgi, tornado.wsgi.WSGIContainer.environ(request), start_response, _in_label=in_label + app_response = await self.service.run_in_executor( + "max", + wsgi, + tornado.wsgi.WSGIContainer.environ(request), + start_response, + _in_label=in_label, ) try: response.extend(app_response) -- GitLab From 1e3e6ca44fb4b3ff5703da50406574c48a0960bb Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 10:59:07 +0300 Subject: [PATCH 53/61] ThreadPoolExecutor: Thread-safe result passing --- core/ioloop/util.py | 12 ++++++++++++ core/service/base.py | 2 +- core/threadpool.py | 25 +++++++++++++++++++------ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index 72eea743e0..7bb4c4f957 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -128,3 +128,15 @@ class NOCEventLoopPolicy(asyncio.DefaultEventLoopPolicy): :return: """ self._local._set_called = False + + +if sys.version_info >= (3, 7): + + def get_future_loop(future: asyncio.Future) -> asyncio.AbstractEventLoop: + return future.get_loop() + + +else: + + def get_future_loop(future: asyncio.Future) -> asyncio.AbstractEventLoop: + return future._loop diff --git a/core/service/base.py b/core/service/base.py index edf7f7cea7..fe09a781c4 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -814,7 +814,7 @@ class Service(object): def run_in_executor( self, name: str, fn: Callable[[Any], T], *args: Any, **kwargs: Any - ) -> asyncio.Future[T]: + ) -> asyncio.Future: executor = self.get_executor(name) return executor.submit(fn, *args, **kwargs) diff --git a/core/threadpool.py b/core/threadpool.py index 9d5962558d..fa2c198fa1 100644 --- a/core/threadpool.py +++ b/core/threadpool.py @@ -16,12 +16,15 @@ from time import perf_counter import asyncio # Third-party modules -from typing import Optional, Dict, Any, Set, List +from typing import Optional, Dict, Any, Set, List, Callable, TypeVar # NOC modules from noc.config import config from noc.core.span import Span, get_current_span from noc.core.error import NOCError +from noc.core.ioloop.util import get_future_loop + +T = TypeVar("T") logger = logging.getLogger(__name__) @@ -130,10 +133,10 @@ class ThreadPoolExecutor(object): def stop_one_worker(self): self._put((None, None, None, None, None, None, None)) - def submit(self, fn, *args, **kwargs) -> asyncio.Future: + def submit(self, fn: Callable[[Any], T], *args: Any, **kwargs: Any) -> asyncio.Future: if self.to_shutdown: raise RuntimeError("Cannot schedule new task after shutdown") - future = asyncio.Future() + future: asyncio.Future = asyncio.Future() span_ctx, span = get_current_span() # Fetch span label if "_in_label" in kwargs: @@ -163,6 +166,14 @@ class ThreadPoolExecutor(object): else: return asyncio.ensure_future(asyncio.wait_for(self.done_future, self.shutdown_timeout)) + @staticmethod + def _set_future_result(future: asyncio.Future, result: Any) -> None: + get_future_loop(future).call_soon_threadsafe(future.set_result, result) + + @staticmethod + def _set_future_exception(future: asyncio.Future, exc: BaseException) -> None: + get_future_loop(future).call_soon_threadsafe(future.set_exception, exc) + def worker(self): t = threading.current_thread() logger.debug("Starting worker thread %s", t.name) @@ -196,14 +207,16 @@ class ThreadPoolExecutor(object): ) as span: try: result = fn(*args, **kwargs) - future.set_result(result) + self._set_future_result(future, result) result = None # Release memory except NOCError as e: - future.set_exception(e) + self._set_future_exception(future, e) span.set_error_from_exc(e, e.default_code) + e = None # Release memory except BaseException as e: - future.set_exception(e) + self._set_future_exception(future, e) span.set_error_from_exc(e) + e = None # Release memory finally: logger.debug("Stopping worker thread %s", t.name) with self.mutex: -- GitLab From d3ae8267430f768daa923fde47c503625fc3f792 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 12:09:15 +0300 Subject: [PATCH 54/61] Consul DCS: Use Base HTTP Client directly --- core/dcs/consuldcs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/dcs/consuldcs.py b/core/dcs/consuldcs.py index 2ee7461d80..9e3126e3e2 100644 --- a/core/dcs/consuldcs.py +++ b/core/dcs/consuldcs.py @@ -15,7 +15,6 @@ import asyncio # Third-party modules import consul.base -import consul.tornado from tornado.ioloop import PeriodicCallback # NOC modules @@ -32,9 +31,9 @@ CONSUL_REQUEST_TIMEOUT = config.consul.request_timeout CONSUL_NEAR_RETRY_TIMEOUT = config.consul.near_retry_timeout -class ConsulHTTPClient(consul.tornado.HTTPClient): +class ConsulHTTPClient(consul.base.HTTPClient): """ - Gentler version of tornado http client + asyncio version of consul http client """ async def _request(self, callback, url, method="GET", body=None): -- GitLab From 6a6ac6fa2f2ff399c10640d1ebf82b1e82fe39b0 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 13:18:01 +0300 Subject: [PATCH 55/61] httpclient: Native asyncio implementation --- core/dcs/base.py | 4 +- core/http/client.py | 102 +++++++++++++++----------------------------- 2 files changed, 37 insertions(+), 69 deletions(-) diff --git a/core/dcs/base.py b/core/dcs/base.py index 160e517e0b..43f9921664 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -51,7 +51,7 @@ class DCSBase(object): def start(self): """ - Run IOLoop if not started yet + Start all pending tasks :return: """ self.resolver_expiration_task = PeriodicCallback(self.expire_resolvers, 10000) @@ -59,7 +59,7 @@ class DCSBase(object): def stop(self): """ - Stop IOLoop if not stopped yet + Stop all pending tasks :return: """ if self.resolver_expiration_task: diff --git a/core/http/client.py b/core/http/client.py index dfd0a2980f..f55f5c3167 100644 --- a/core/http/client.py +++ b/core/http/client.py @@ -18,12 +18,9 @@ from urllib.parse import urlparse import asyncio # Third-party modules -import tornado.ioloop -from tornado.ioloop import IOLoop -import tornado.iostream import cachetools import ujson -from typing import Optional +from typing import Optional, List # NOC modules from noc.core.perf import metrics @@ -31,6 +28,7 @@ from noc.core.validators import is_ipv4 from .proxy import SYSTEM_PROXIES from noc.config import config from noc.core.comp import smart_bytes, smart_text +from noc.core.ioloop.util import run_sync from http_parser.parser import HttpParser @@ -122,13 +120,19 @@ async def fetch( :return: code, headers, body """ - def get_ssl_options(): - ssl_options = {} - if validate_cert: - ssl_options["cert_reqs"] = ssl.CERT_REQUIRED - return ssl_options + def get_connect_options(): + opts = {} + if use_tls and not proxy: + ctx = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) + if validate_cert: + ctx.check_hostname = True + ctx.verify_mode = ssl.CERT_REQUIRED + else: + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + opts["ssl"] = ctx + return opts - logger.debug("HTTP %s %s", method, url) metrics["httpclient_requests", ("method", method.lower())] += 1 # if eof_mark: @@ -136,6 +140,8 @@ async def fetch( # Detect proxy when necessary u = urlparse(str(url)) use_tls = u.scheme == "https" + proto = "HTTPS" if use_tls else "HTTP" + logger.debug("%s %s %s", proto, method, url) if ":" in u.netloc: host, port = u.netloc.rsplit(":") port = int(port) @@ -156,13 +162,8 @@ async def fetch( else: proxy = None # Connect - stream = None - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + reader, writer = None, None try: - if use_tls and not proxy: - stream = tornado.iostream.SSLIOStream(s, ssl_options=get_ssl_options()) - else: - stream = tornado.iostream.IOStream(s) try: if proxy: connect_address = proxy @@ -173,13 +174,12 @@ async def fetch( if proxy: logger.debug("Connecting to proxy %s:%s", connect_address[0], connect_address[1]) - await asyncio.wait_for( - stream.connect(connect_address, server_hostname=u.netloc), connect_timeout + reader, writer = await asyncio.wait_for( + asyncio.open_connection( + connect_address[0], connect_address[1], **get_connect_options() + ), + connect_timeout, ) - except tornado.iostream.StreamClosedError: - # May be not relevant on Tornado6 anymore - metrics["httpclient_timeouts"] += 1 - return ERR_TIMEOUT, {}, "Connection refused" except ConnectionRefusedError: metrics["httpclient_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection refused" @@ -195,11 +195,9 @@ async def fetch( smart_bytes(port), smart_bytes(DEFAULT_USER_AGENT), ) + writer.write(smart_bytes(req)) try: - await asyncio.wait_for(stream.write(smart_bytes(req)), request_timeout) - except tornado.iostream.StreamClosedError: - metrics["httpclient_proxy_timeouts"] += 1 - return ERR_TIMEOUT, {}, "Connection reset while connecting to proxy" + await asyncio.wait_for(writer.drain(), request_timeout) except asyncio.TimeoutError: metrics["httpclient_proxy_timeouts"] += 1 return ERR_TIMEOUT, {}, "Timed out while sending request to proxy" @@ -207,12 +205,7 @@ async def fetch( parser = HttpParser() while not parser.is_headers_complete(): try: - data = await asyncio.wait_for( - stream.read_bytes(max_buffer_size, partial=True), request_timeout - ) - except tornado.iostream.StreamClosedError: - metrics["httpclient_proxy_timeouts"] += 1 - return ERR_TIMEOUT, {}, "Connection reset while connecting to proxy" + data = await asyncio.wait_for(reader.read(max_buffer_size), request_timeout) except asyncio.TimeoutError: metrics["httpclient_proxy_timeouts"] += 1 return ERR_TIMEOUT, {}, "Timed out while sending request to proxy" @@ -224,24 +217,6 @@ async def fetch( logger.debug("Proxy response: %s", code) if not 200 <= code <= 299: return code, parser.get_headers(), "Proxy error: %s" % code - # Switch to TLS when necessary - if use_tls: - logger.debug("Starting TLS negotiation") - try: - stream = await asyncio.wait_for( - stream.start_tls( - server_side=False, - ssl_options=get_ssl_options(), - server_hostname=u.netloc, - ), - request_timeout, - ) - except tornado.iostream.StreamClosedError: - metrics["httpclient_proxy_timeouts"] += 1 - return ERR_TIMEOUT, {}, "Connection reset while connecting to proxy" - except asyncio.TimeoutError: - metrics["httpclient_proxy_timeouts"] += 1 - return ERR_TIMEOUT, {}, "Timed out while sending request to proxy" # Process request body = body or "" content_type = "application/binary" @@ -295,21 +270,20 @@ async def fetch( body, ) try: - await asyncio.wait_for(stream.write(req), request_timeout) - except tornado.iostream.StreamClosedError: + writer.write(req) + await asyncio.wait_for(writer.drain(), request_timeout) + except ConnectionResetError: metrics["httpclient_timeouts"] += 1 return ERR_TIMEOUT, {}, "Connection reset while sending request" except asyncio.TimeoutError: metrics["httpclient_timeouts"] += 1 return ERR_TIMEOUT, {}, "Timed out while sending request" parser = HttpParser() - response_body = [] + response_body: List[bytes] = [] while not parser.is_message_complete(): try: - data = await asyncio.wait_for( - stream.read_bytes(max_buffer_size, partial=True), request_timeout - ) - except tornado.iostream.StreamClosedError: + data = await asyncio.wait_for(reader.read(max_buffer_size), request_timeout) + except asyncio.IncompleteReadError: if eof_mark and response_body: # Check if EOF mark is in received data response_body = [b"".join(response_body)] @@ -365,10 +339,8 @@ async def fetch( # @todo: Process gzip and deflate Content-Encoding return code, parsed_headers, b"".join(response_body) finally: - if stream: - stream.close() - else: - s.close() + if writer: + writer.close() def fetch_sync( @@ -391,7 +363,7 @@ def fetch_sync( eof_mark=None, ): async def _fetch(): - result = await fetch( + return await fetch( url, method=method, headers=headers, @@ -410,12 +382,8 @@ def fetch_sync( content_encoding=content_encoding, eof_mark=eof_mark, ) - r.append(result) - r = [] - # Should be another IOLoop instance instance - IOLoop().run_sync(_fetch) - return r[0] + return run_sync(_fetch) def to32u(n): -- GitLab From a630b713fde3c081df01033a3a52cc0f6da6ab82 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 13:58:44 +0300 Subject: [PATCH 56/61] Gentler ioloop shutdown --- core/ioloop/util.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index 7bb4c4f957..f4f8135e1c 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -35,6 +35,7 @@ class IOLoopContext(object): return self.new_loop def drop_context(self): + self.new_loop.run_until_complete(self.new_loop.shutdown_asyncgens()) self.new_loop.close() self.new_loop = None asyncio._set_running_loop(self.prev_loop) @@ -68,8 +69,7 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: async def wrapper(): try: - r = await cb() - result.append(r) + result.append(await cb()) except Exception: error.append(sys.exc_info()) @@ -81,7 +81,6 @@ def run_sync(cb: Callable[..., T], close_all: bool = True) -> T: with IOLoopContext() as loop: loop.run_until_complete(wrapper()) - # @todo: close_all if error: reraise(*error[0]) return result[0] -- GitLab From 5b908ea6d5ec8d8e3bd486c80c3b9530128adf06 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 14:23:27 +0300 Subject: [PATCH 57/61] Clean up synchorous RPC client --- core/service/rpc.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/core/service/rpc.py b/core/service/rpc.py index c0dc53b7dc..10bc08d1f2 100644 --- a/core/service/rpc.py +++ b/core/service/rpc.py @@ -9,8 +9,6 @@ import itertools import logging import random -import threading -import sys from time import perf_counter import asyncio @@ -23,7 +21,7 @@ from noc.core.http.client import fetch from noc.core.perf import metrics from noc.config import config from noc.core.span import Span, get_current_span -from noc.core.comp import reraise +from noc.core.ioloop.util import run_sync from .error import RPCError, RPCNoService, RPCHTTPError, RPCException, RPCRemoteError logger = logging.getLogger(__name__) @@ -148,28 +146,13 @@ class RPCProxy(object): raise RPCNoService("No active service %s found" % self._service_name) async def async_wrapper(*args, **kwargs): - result = await _call(item, *args, **kwargs) - return result + return await _call(item, *args, **kwargs) def sync_wrapper(*args, **kwargs): - async def _sync_call(): - try: - r = await _call(item, *args, **kwargs) - result.append(r) - except Exception: - error.append(sys.exc_info()) - finally: - ev.set() + async def wrapper(): + return await _call(item, *args, **kwargs) - ev = threading.Event() - result = [] - error = [] - self._service.ioloop.add_callback(_sync_call) - ev.wait() - if error: - reraise(*error[0]) - else: - return result[0] + return run_sync(wrapper) if item.startswith("_"): return self.__dict__[item] -- GitLab From 6c86d6b70bddb1db692746757f003f1fdad44bf1 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 14:48:31 +0300 Subject: [PATCH 58/61] httpclient: Wait for close --- core/http/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/http/client.py b/core/http/client.py index f55f5c3167..6c97cb1113 100644 --- a/core/http/client.py +++ b/core/http/client.py @@ -341,6 +341,9 @@ async def fetch( finally: if writer: writer.close() + # Pass one more tick to ensure transport is closed + # Refer to https://github.com/python/asyncio/issues/466 + await asyncio.sleep(0) def fetch_sync( -- GitLab From e01beaafa8dfc4e838a835f337a0e81d41a455af Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 5 May 2020 15:57:18 +0300 Subject: [PATCH 59/61] Fix run_sync --- core/ioloop/util.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/ioloop/util.py b/core/ioloop/util.py index f4f8135e1c..841280f62f 100644 --- a/core/ioloop/util.py +++ b/core/ioloop/util.py @@ -35,7 +35,13 @@ class IOLoopContext(object): return self.new_loop def drop_context(self): + # Cancel all tasks + to_cancel = asyncio.Task.all_tasks(self.new_loop) + for task in to_cancel: + task.cancel() + asyncio.gather(*to_cancel, loop=self.new_loop, return_exceptions=True) self.new_loop.run_until_complete(self.new_loop.shutdown_asyncgens()) + # self.new_loop.close() self.new_loop = None asyncio._set_running_loop(self.prev_loop) -- GitLab From 1967df9ab513e49f0d6c0905b03def4256972683 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 12 May 2020 17:25:21 +0300 Subject: [PATCH 60/61] Run DCS resolver in separate thread --- core/dcs/base.py | 28 +++++-- core/dcs/{consuldcs.py => consul.py} | 4 +- core/dcs/loader.py | 105 ++++++++++++++++++++------- core/dcs/util.py | 5 +- core/http/client.py | 23 +++--- 5 files changed, 115 insertions(+), 50 deletions(-) rename core/dcs/{consuldcs.py => consul.py} (99%) diff --git a/core/dcs/base.py b/core/dcs/base.py index 43f9921664..e5d290484a 100644 --- a/core/dcs/base.py +++ b/core/dcs/base.py @@ -16,7 +16,7 @@ from time import perf_counter import asyncio # Third-party modules -from tornado.ioloop import IOLoop, PeriodicCallback +from tornado.ioloop import PeriodicCallback # NOC modules from noc.config import config @@ -34,7 +34,8 @@ class DCSBase(object): # and must be temporary removed from resolver HEALTH_FAILED_HTTP_CODE = 429 - def __init__(self, url): + def __init__(self, runner, url): + self.runner = runner self.logger = logging.getLogger(__name__) self.url = url self.parse_url(urlparse(url)) @@ -45,15 +46,17 @@ class DCSBase(object): self.health_check_service_id = None self.status = True self.status_message = "" + self.thread_id = None def parse_url(self, u): pass - def start(self): + async def start(self): """ Start all pending tasks :return: """ + self.thread_id = threading.get_ident() self.resolver_expiration_task = PeriodicCallback(self.expire_resolvers, 10000) self.resolver_expiration_task.start() @@ -100,6 +103,10 @@ class DCSBase(object): raise NotImplementedError() async def get_resolver(self, name, critical=False, near=False, track=True): + def run_resolver(res): + loop = asyncio.get_running_loop() + loop.call_soon(loop.create_task, res.start()) + if track: with self.resolvers_lock: resolver = self.resolvers.get((name, critical, near)) @@ -107,11 +114,11 @@ class DCSBase(object): self.logger.info("Running resolver for service %s", name) resolver = self.resolver_cls(self, name, critical=critical, near=near) self.resolvers[name, critical, near] = resolver - IOLoop.current().add_callback(resolver.start) + run_resolver(resolver) else: # One-time resolver resolver = self.resolver_cls(self, name, critical=critical, near=near, track=False) - IOLoop.current().add_callback(resolver.start) + run_resolver(resolver) return resolver async def resolve( @@ -125,9 +132,14 @@ class DCSBase(object): near=False, track=True, ): - resolver = await self.get_resolver(name, critical=critical, near=near, track=track) - r = await resolver.resolve(hint=hint, wait=wait, timeout=timeout, full_result=full_result) - return r + async def wrap(): + resolver = await self.get_resolver(name, critical=critical, near=near, track=track) + r = await resolver.resolve( + hint=hint, wait=wait, timeout=timeout, full_result=full_result + ) + return r + + return await self.runner.trampoline(wrap()) async def expire_resolvers(self): with self.resolvers_lock: diff --git a/core/dcs/consuldcs.py b/core/dcs/consul.py similarity index 99% rename from core/dcs/consuldcs.py rename to core/dcs/consul.py index 9e3126e3e2..1c0b3eb67f 100644 --- a/core/dcs/consuldcs.py +++ b/core/dcs/consul.py @@ -137,7 +137,7 @@ class ConsulDCS(DCSBase): resolver_cls = ConsulResolver - def __init__(self, url): + def __init__(self, runner, url): self.name = None self.consul_host = self.DEFAULT_CONSUL_HOST self.consul_port = self.DEFAULT_CONSUL_PORT @@ -155,7 +155,7 @@ class ConsulDCS(DCSBase): self.session = None self.slot_number = None self.total_slots = None - super(ConsulDCS, self).__init__(url) + super().__init__(runner, url) self.consul = ConsulClient( host=self.consul_host, port=self.consul_port, token=self.consul_token ) diff --git a/core/dcs/loader.py b/core/dcs/loader.py index a90f57cc54..6ae4d38ea9 100644 --- a/core/dcs/loader.py +++ b/core/dcs/loader.py @@ -6,45 +6,96 @@ # ---------------------------------------------------------------------- # Python modules -from threading import Lock +from threading import Lock, Thread, Event, get_ident +import asyncio +import logging + +# Third-party modules +from typing import Optional, Dict, Awaitable, Any # NOC modules from noc.core.handler import get_handler +from noc.core.ioloop.util import setup_asyncio from noc.config import config +from .base import DCSBase DEFAULT_DCS = "consul://%s:%s/%s" % (config.consul.host, config.consul.port, config.consul.base) -DCS_HANDLERS = {"consul": "noc.core.dcs.consuldcs.ConsulDCS"} -_lock = Lock() -_instances = {} +class DCSRunner(object): + HANDLERS = {"consul": "noc.core.dcs.consul.ConsulDCS"} + + def __init__(self): + self.lock = Lock() + self.thread: Optional[Thread] = None + self.loop: Optional[asyncio.BaseEventLoop] = None + self.instances: Dict[str, DCSBase] = {} + self.ready_event = Event() + self.logger = logging.getLogger() + + @classmethod + def get_dcs_class(cls, url: str): + scheme = url.split(":", 1)[0] + if scheme not in cls.HANDLERS: + raise ValueError("Unknown DCS handler: %s" % scheme) + handler = get_handler(cls.HANDLERS[scheme]) + if not handler: + raise ValueError("Cannot initialize DCS handler: %s", scheme) + return handler + + def get_dcs(self, url: Optional[str] = None) -> DCSBase: + url = url or DEFAULT_DCS + with self.lock: + dcs = self.instances.get(url) + if not dcs: + if not self.thread: + # Run separate DCS thread + self.thread = Thread(name="dcs", target=self._runner) + self.thread.setDaemon(True) + self.thread.start() + self.ready_event.wait() + self.logger.debug("DCS runner thread is ready") + self.logger.debug("Starting DCS %s" % url) + dcs_cls = self.get_dcs_class(url) + dcs = dcs_cls(self, url) + self.instances[url] = dcs + self.loop.call_soon_threadsafe(self.loop.create_task, dcs.start()) + return dcs + + def _runner(self): + self.logger.debug("Starting DCS runner thread") + setup_asyncio() + self.loop = asyncio.get_event_loop() + self.ready_event.set() + self.loop.run_forever() + self.logger.debug("Stopping DCS runner thread") + + async def trampoline(self, aw: Awaitable) -> Any: + """ + Trampoline awaitable to dedicated loop + :param aw: + :return: + """ + async def thunk(): + try: + r = await aw + loop.call_soon_threadsafe(future.set_result, r) + except Exception as e: + loop.call_soon_threadsafe(future.set_exception, e) -def get_dcs_url(url=None): - return url or DEFAULT_DCS + is_same_thread = not self.thread or self.thread.ident == get_ident() + if is_same_thread: + return await aw + # Trampoline to proper thread + loop = asyncio.get_running_loop() + future = loop.create_future() + self.loop.call_soon_threadsafe(self.loop.create_task, thunk()) + return await future -def get_dcs_class(url=None): - url = get_dcs_url(url) - scheme = url.split(":", 1)[0] - if scheme not in DCS_HANDLERS: - raise ValueError("Unknown DCS handler: %s" % scheme) - handler = get_handler(DCS_HANDLERS[scheme]) - if not handler: - raise ValueError("Cannot initialize DCS handler: %s", scheme) - return handler +runner = DCSRunner() def get_dcs(url=None): - """ - Return initialized DCS instance - :param url: - :return: - """ - url = get_dcs_url(url) - with _lock: - if url not in _instances: - dcs = get_dcs_class(url)(url) - dcs.start() - _instances[url] = dcs - return _instances[url] + return runner.get_dcs(url) diff --git a/core/dcs/util.py b/core/dcs/util.py index 110ab17ddd..6713161e31 100644 --- a/core/dcs/util.py +++ b/core/dcs/util.py @@ -7,7 +7,7 @@ # NOC modules from noc.core.ioloop.util import run_sync -from .loader import get_dcs_url, get_dcs_class +from .loader import get_dcs def resolve( @@ -26,8 +26,7 @@ def resolve( """ async def _resolve(): - url = get_dcs_url() - dcs = get_dcs_class()(url) + dcs = get_dcs() try: if near: r = await dcs.resolve_near( diff --git a/core/http/client.py b/core/http/client.py index 6c97cb1113..7662662dcf 100644 --- a/core/http/client.py +++ b/core/http/client.py @@ -163,15 +163,14 @@ async def fetch( proxy = None # Connect reader, writer = None, None + if proxy: + connect_address = proxy + elif isinstance(addr, tuple): + connect_address = addr + else: + connect_address = (addr, port) try: try: - if proxy: - connect_address = proxy - elif isinstance(addr, tuple): - connect_address = addr - else: - connect_address = (addr, port) - if proxy: logger.debug("Connecting to proxy %s:%s", connect_address[0], connect_address[1]) reader, writer = await asyncio.wait_for( @@ -341,9 +340,13 @@ async def fetch( finally: if writer: writer.close() - # Pass one more tick to ensure transport is closed - # Refer to https://github.com/python/asyncio/issues/466 - await asyncio.sleep(0) + if hasattr(writer, "wait_closed"): + await writer.wait_closed() + else: + # Pass one more tick to ensure transport is closed + # Refer to https://github.com/python/asyncio/issues/466 + # await asyncio.sleep(0) + await asyncio.sleep(0.0001) def fetch_sync( -- GitLab From 2b9b70d29386e17189739ddfb1392b3e372b75a5 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Wed, 13 May 2020 12:43:31 +0300 Subject: [PATCH 61/61] cli: Fix on_failure await --- core/script/cli/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/script/cli/base.py b/core/script/cli/base.py index c7ae0f18ba..154b85d32e 100644 --- a/core/script/cli/base.py +++ b/core/script/cli/base.py @@ -546,7 +546,7 @@ class CLI(object): if self.to_raise_privileges: # Start privilege raising sequence if not self.profile.command_super: - self.on_failure(data, match, CLINoSuperCommand) + await self.on_failure(data, match, CLINoSuperCommand) await self.send( smart_bytes(self.profile.command_super, encoding=self.native_encoding) + (self.profile.command_submit or b"\n") -- GitLab