Commit 44810002 authored by Andrey Vertiprahov's avatar Andrey Vertiprahov
Browse files

Merge branch 'noc-svc-code-cleanup' into 'microservices'

Service.perf_metrics is deprecated and replaced by metrics

See merge request !1178
parents a3587fb7 ac958fe2
Pipeline #8153 passed with stages
in 5 minutes and 52 seconds
......@@ -131,7 +131,6 @@ class Service(object):
self.ioloop = None
self.logger = None
self.service_id = str(uuid.uuid4())
self.perf_metrics = metrics
self.executors = {}
self.start_time = time.time()
self.pid = os.getpid()
......@@ -664,11 +663,11 @@ class Service(object):
Subscribe message to channel
"""
def call_json_handler(message):
self.perf_metrics[metric_in] += 1
metrics[metric_in] += 1
try:
data = ujson.loads(message.body)
except ValueError as e:
self.perf_metrics[metric_decode_fail] += 1
metrics[metric_decode_fail] += 1
self.logger.debug("Cannot decode JSON message: %s", e)
return True # Broken message
if isinstance(data, dict):
......@@ -678,27 +677,27 @@ class Service(object):
with ErrorReport():
r = handler(message, data)
if r:
self.perf_metrics[metric_processed] += 1
metrics[metric_processed] += 1
elif message.is_async():
message.on("finish", on_finish)
else:
self.perf_metrics[metric_deferred] += 1
metrics[metric_deferred] += 1
return r
def call_raw_handler(message):
self.perf_metrics[metric_in] += 1
metrics[metric_in] += 1
with ErrorReport():
r = handler(message, message.body)
if r:
self.perf_metrics[metric_processed] += 1
metrics[metric_processed] += 1
elif message.is_async():
message.on("finish", on_finish)
else:
self.perf_metrics[metric_deferred] += 1
metrics[metric_deferred] += 1
return r
def on_finish(*args, **kwargs):
self.perf_metrics[metric_processed] += 1
metrics[metric_processed] += 1
t = topic.replace(".", "_")
metric_in = "nsq_msg_in_%s" % t
......@@ -950,7 +949,7 @@ class Service(object):
remote_ip = handler.request.remote_ip
if status == 200 and uri == "/mon/" and method == "GET":
self.logger.debug("Monitoring request (%s)", remote_ip)
self.perf_metrics["mon_requests"] += 1
metrics["mon_requests"] += 1
elif (status == 200 or status == 429) and uri.startswith("/health/") and method == "GET":
pass
elif status == 200 and uri == ("/metrics") and method == "GET":
......@@ -961,8 +960,8 @@ class Service(object):
method, uri, remote_ip,
1000.0 * handler.request.request_time()
)
self.perf_metrics["http_requests", ("method", method.lower())] += 1
self.perf_metrics["http_response", ("status", status)] += 1
metrics["http_requests", ("method", method.lower())] += 1
metrics["http_response", ("status", status)] += 1
def get_leader_lock_name(self):
if self.leader_lock_name:
......
......@@ -15,7 +15,6 @@ import tornado.ioloop
# NOC modules
from noc.core.dcs.loader import get_dcs, DEFAULT_DCS
from .rpc import RPCProxy
from noc.core.perf import metrics
from noc.config import config
......@@ -25,7 +24,6 @@ class ServiceStub(object):
def __init__(self):
self.logger = logging.getLogger("stub")
self.perf_metrics = metrics
self.is_ready = threading.Event()
self.config = None
self._metrics = defaultdict(list)
......
......@@ -2,7 +2,7 @@
# ---------------------------------------------------------------------
# Activator API
# ---------------------------------------------------------------------
# Copyright (C) 2007-2017 The NOC Project
# Copyright (C) 2007-2018 The NOC Project
# See LICENSE for details
# ---------------------------------------------------------------------
......@@ -15,6 +15,7 @@ from noc.core.script.base import BaseScript
from noc.core.ioloop.snmp import snmp_get, SNMPError
from noc.core.snmp.version import SNMP_v1, SNMP_v2c
from noc.core.http.client import fetch
from noc.core.perf import metrics
from noc.config import config
......@@ -62,7 +63,7 @@ class ActivatorAPI(API):
"""
script_class = loader.get_script(name)
if not script_class:
self.service.perf_metrics["error", ("type", "invalid_script")] += 1
metrics["error", ("type", "invalid_script")] += 1
raise APIError("Invalid script: %s" % name)
script = script_class(
service=self.service,
......@@ -78,7 +79,7 @@ class ActivatorAPI(API):
try:
result = script.run()
except script.ScriptError as e:
self.service.perf_metrics["error", ("type", "script_error")] += 1
metrics["error", ("type", "script_error")] += 1
raise APIError("Script error: %s" % e.__doc__)
return result
......@@ -109,7 +110,7 @@ class ActivatorAPI(API):
self.logger.debug("SNMP GET %s %s returns %s",
address, oid, result)
except SNMPError as e:
self.service.perf_metrics["error", ("type", "snmp_v1_error")] += 1
metrics["error", ("type", "snmp_v1_error")] += 1
result = None
self.logger.debug("SNMP GET %s %s returns error %s",
address, oid, e)
......@@ -142,7 +143,7 @@ class ActivatorAPI(API):
self.logger.debug("SNMP GET %s %s returns %s",
address, oid, result)
except SNMPError as e:
self.service.perf_metrics["error", ("type", "snmp_v2_error")] += 1
metrics["error", ("type", "snmp_v2_error")] += 1
result = None
self.logger.debug("SNMP GET %s %s returns error %s",
address, oid, e)
......@@ -171,7 +172,7 @@ class ActivatorAPI(API):
if 200 <= code <= 299:
raise tornado.gen.Return(body)
else:
self.service.perf_metrics["error", ("type", "http_error_%s" % code)] += 1
metrics["error", ("type", "http_error_%s" % code)] += 1
self.logger.debug("HTTP GET %s failed: %s %s", url, code, body)
raise tornado.gen.Return(None)
......
......@@ -2,7 +2,7 @@
# ---------------------------------------------------------------------
# BI API
# ---------------------------------------------------------------------
# Copyright (C) 2007-2017 The NOC Project
# Copyright (C) 2007-2018 The NOC Project
# See LICENSE for details
# ---------------------------------------------------------------------
......@@ -31,7 +31,8 @@ from noc.bi.models.aggregatedinterface import AggregatedInterface
from noc.pm.models.metricscope import MetricScope
from noc.pm.models.metrictype import MetricType
from noc.bi.models.dashboard import Dashboard, DashboardAccess, DAL_ADMIN, DAL_RO
from noc.sa.interfaces.base import (DictListParameter, DictParameter, IntParameter, StringParameter)
from noc.sa.interfaces.base import DictListParameter, DictParameter, IntParameter, StringParameter
from noc.core.perf import metrics
from noc.core.translation import ugettext as _
# Access items validations
......@@ -226,7 +227,7 @@ class BIAPI(API):
for ds in self.get_datasources():
if ds["name"] == name:
return ds
self.service.perf_metrics["error", ("type", "info_invalid_datasource")] += 1
metrics["error", ("type", "info_invalid_datasource")] += 1
raise APIError("Invalid datasource")
@executor("query")
......@@ -240,11 +241,11 @@ class BIAPI(API):
:return:
"""
if "datasource" not in query:
self.service.perf_metrics["error", ("type", "query_no_datasource")] += 1
metrics["error", ("type", "query_no_datasource")] += 1
raise APIError("No datasource")
model = self.get_model(query["datasource"])
if not model:
self.service.perf_metrics["error", ("type", "query_invalid_datasource")] += 1
metrics["error", ("type", "query_invalid_datasource")] += 1
raise APIError("Invalid datasource")
return model.query(query, self.handler.current_user)
......@@ -303,7 +304,7 @@ class BIAPI(API):
elif i.group and i.group.id in groups and i.level >= access_level:
return d
# No access
self.service.perf_metrics["error", ("type", "no_permission")] += 1
metrics["error", ("type", "no_permission")] += 1
raise APIError("User have no permission to access dashboard")
@executor("query")
......@@ -318,7 +319,7 @@ class BIAPI(API):
if d:
return ujson.loads(zlib.decompress(d.config))
else:
self.service.perf_metrics["error", ("type", "dashboard_not_found")] += 1
metrics["error", ("type", "dashboard_not_found")] += 1
raise APIError("Dashboard not found")
@executor("query")
......@@ -332,12 +333,12 @@ class BIAPI(API):
if "id" in config:
d = self._get_dashboard(config["id"], access_level=1)
if not d:
self.service.perf_metrics["error", ("type", "dashboard_not_found")] += 1
metrics["error", ("type", "dashboard_not_found")] += 1
raise APIError("Dashboard not found")
else:
d = Dashboard.objects.filter(title=config.get("title")).first()
if d:
self.service.perf_metrics["error", ("type", "bad_dashboard_name")] += 1
metrics["error", ("type", "bad_dashboard_name")] += 1
raise APIError("Dashboard name exists")
d = Dashboard(id=str(bson.ObjectId()), owner=self.handler.current_user)
d.format = config.get("format", 1)
......@@ -363,7 +364,7 @@ class BIAPI(API):
d.delete()
return True
else:
self.service.perf_metrics["error", ("type", "dashboard_not_found")] += 1
metrics["error", ("type", "dashboard_not_found")] += 1
raise APIError("Dashboard not found")
@executor("query")
......@@ -398,17 +399,17 @@ class BIAPI(API):
sort_children(n)
if "datasource" not in params:
self.service.perf_metrics["error", ("type", "get_hierarchy_no_datasource")] += 1
metrics["error", ("type", "get_hierarchy_no_datasource")] += 1
raise APIError("No datasource")
if "dic_name" not in params:
self.service.perf_metrics["error", ("type", "get_hierarchy_no_dict_name")] += 1
metrics["error", ("type", "get_hierarchy_no_dict_name")] += 1
raise APIError("No dictionary name")
if "field_name" not in params:
self.service.perf_metrics["error", ("type", "get_hierarchy_no_field_name")] += 1
metrics["error", ("type", "get_hierarchy_no_field_name")] += 1
raise APIError("No field name")
model = Model.get_model_class(params["datasource"])
if not model:
self.service.perf_metrics["error", ("type", "get_hierarchy_invalid_datasource")] += 1
metrics["error", ("type", "get_hierarchy_invalid_datasource")] += 1
raise APIError("Invalid datasource")
query = {
"fields": [
......@@ -554,11 +555,11 @@ class BIAPI(API):
d = self._get_dashboard(id)
if not d:
self.logger.error("Dashboards not find %s", id)
self.service.perf_metrics["error", ("type", "dashboard_not_found")] += 1
metrics["error", ("type", "dashboard_not_found")] += 1
raise APIError("Dashboard not found")
if d.get_user_access(self.handler.current_user) < DAL_ADMIN:
self.logger.error("Access for user Dashboards %s", self.handler.current_user)
self.service.perf_metrics["error", ("type", "no_permissions_to_set_permissions")] += 1
metrics["error", ("type", "no_permissions_to_set_permissions")] += 1
raise APIError("User have no permission to set permissions")
access = []
if acc_limit == "user":
......@@ -572,7 +573,7 @@ class BIAPI(API):
items = I_VALID.clean(items)
except ValueError as e:
self.logger.error("Validation items with rights", e)
self.service.perf_metrics["error", ("type", "validation")] += 1
metrics["error", ("type", "validation")] += 1
raise APIError("Validation error %s" % e)
for i in items:
da = DashboardAccess(level=i.get("level", -1))
......@@ -595,7 +596,7 @@ class BIAPI(API):
:return:
"""
if not id.get("id"):
self.service.perf_metrics["error", ("type", "wrong_json")] += 1
metrics["error", ("type", "wrong_json")] += 1
raise APIError("Not id field in JSON")
return self._set_dashboard_access(id.get("id"), items.get("items"))
......
......@@ -36,6 +36,7 @@ from noc.core.version import version
from noc.core.debug import format_frames, get_traceback_frames, error_report
from services.correlator import utils
from noc.lib.dateutils import total_seconds
from noc.core.perf import metrics
class CorrelatorService(Service):
......@@ -181,7 +182,7 @@ class CorrelatorService(Service):
# Root cause found
self.logger.info("%s is root cause for %s (Rule: %s)",
root.id, a.id, rc.name)
self.perf_metrics["alarm_correlated_rule"] += 1
metrics["alarm_correlated_rule"] += 1
a.set_root(root)
return True
return False
......@@ -209,7 +210,7 @@ class CorrelatorService(Service):
"%s is root cause for %s (Reverse rule: %s)",
a.id, ca.id, rc.name
)
self.perf_metrics["alarm_correlated_rule"] += 1
metrics["alarm_correlated_rule"] += 1
ca.set_root(a)
found = True
return found
......@@ -224,7 +225,7 @@ class CorrelatorService(Service):
self.logger.info("Managed object is not managed. Do not raise alarm")
return
if e.managed_object.id != managed_object.id:
self.perf_metrics["alarm_change_mo"] += 1
metrics["alarm_change_mo"] += 1
self.logger.info(
"Changing managed object to %s",
managed_object.name
......@@ -251,7 +252,7 @@ class CorrelatorService(Service):
a.alarm_class.name, a.id
)
a = a.reopen("Reopened by disposition rule '%s'" % r.u_name)
self.perf_metrics["alarm_reopen"] += 1
metrics["alarm_reopen"] += 1
if a:
# Active alarm found, refresh
self.logger.info(
......@@ -270,7 +271,7 @@ class CorrelatorService(Service):
# Refresh last update
a.last_update = e.timestamp
a.save()
self.perf_metrics["alarm_contribute"] += 1
metrics["alarm_contribute"] += 1
return
# Calculate alarm coverage
summary = ServiceSummary.get_object_summary(managed_object)
......@@ -317,7 +318,7 @@ class CorrelatorService(Service):
e.event_class.name,
a.alarm_class.name, a.id, a.vars
)
self.perf_metrics["alarm_raise"] += 1
metrics["alarm_raise"] += 1
self.correlate(r, a)
# Notify about new alarm
if not a.root:
......@@ -361,7 +362,7 @@ class CorrelatorService(Service):
)
except: # noqa. Can probable happens anything from handler
error_report()
self.perf_metrics["error", ("type", "alarm_handler")] += 1
metrics["error", ("type", "alarm_handler")] += 1
# Call triggers if necessary
if r.alarm_class.id in self.triggers:
for t in self.triggers[r.alarm_class.id]:
......@@ -375,7 +376,7 @@ class CorrelatorService(Service):
# Silently drop alarm
self.logger.debug("Alarm severity is 0, dropping")
a.delete()
self.perf_metrics["alarm_drop"] += 1
metrics["alarm_drop"] += 1
return
def clear_alarm(self, r, e):
......@@ -385,7 +386,7 @@ class CorrelatorService(Service):
"[%s|Unknown|Unknown] Referred to unknown managed object, ignoring",
e.id
)
self.perf_metrics["unknown_object"] += 1
metrics["unknown_object"] += 1
return
if r.unique:
discriminator, vars = r.get_vars(e)
......@@ -407,7 +408,7 @@ class CorrelatorService(Service):
"Cleared by disposition rule '%s'" % r.u_name,
ts=e.timestamp
)
self.perf_metrics["alarm_clear"] += 1
metrics["alarm_clear"] += 1
def get_delayed_event(self, r, e):
"""
......@@ -473,7 +474,7 @@ class CorrelatorService(Service):
self.get_executor("max").submit(self.dispose_worker, message, event_id, event)
def dispose_worker(self, message, event_id, event_hint=None):
self.perf_metrics["alarm_dispose"] += 1
metrics["alarm_dispose"] += 1
try:
if event_hint:
event = self.get_event_from_hint(event_hint)
......@@ -482,7 +483,7 @@ class CorrelatorService(Service):
if event:
self.dispose_event(event)
except Exception:
self.perf_metrics["alarm_dispose_error"] += 1
metrics["alarm_dispose_error"] += 1
error_report()
self.ioloop.add_callback(message.finish)
......@@ -497,8 +498,8 @@ class CorrelatorService(Service):
e = ActiveEvent.get_by_id(event_id)
if not e:
self.logger.info("[%s] Event not found, skipping", event_id)
self.perf_metrics["event_lookup_failed"] += 1
self.perf_metrics["event_lookups"] += 1
metrics["event_lookup_failed"] += 1
metrics["event_lookups"] += 1
return e
def get_event_from_hint(self, hint):
......@@ -507,7 +508,7 @@ class CorrelatorService(Service):
:param hint:
:return:
"""
self.perf_metrics["event_hints"] += 1
metrics["event_hints"] += 1
e = ActiveEvent.from_json(hint)
# Prevent TypeError: can't compare offset-naive and offset-aware datetimes
# when calculating alarm timestamp
......@@ -597,7 +598,7 @@ class CorrelatorService(Service):
if can_correlate(alarm, a):
self.logger.info("[%s] Set root to %s", alarm.id, a.id)
alarm.set_root(a)
self.perf_metrics["alarm_correlated_topology"] += 1
metrics["alarm_correlated_topology"] += 1
break
# Correlate neighbors' alarms
for d in na:
......
......@@ -3,7 +3,7 @@
# ---------------------------------------------------------------------
# Login service
# ---------------------------------------------------------------------
# Copyright (C) 2007-2017 The NOC Project
# Copyright (C) 2007-2018 The NOC Project
# See LICENSE for details
# ---------------------------------------------------------------------
......@@ -18,6 +18,7 @@ from noc.services.login.logout import LogoutRequestHandler
from noc.services.login.api.login import LoginAPI
from noc.services.login.backends.base import BaseAuthBackend
from noc.main.models.apikey import APIKey
from noc.core.perf import metrics
from noc.config import config
......@@ -73,14 +74,14 @@ class LoginService(UIService):
)
try:
user = backend.authenticate(**credentials)
self.perf_metrics['auth_try', ('method', method)] += 1
metrics['auth_try', ('method', method)] += 1
except backend.LoginError as e:
self.logger.info("[%s] Login Error: %s", method, e)
self.perf_metrics['auth_fail', ('method', method)] += 1
metrics['auth_fail', ('method', method)] += 1
le = str(e)
continue
self.logger.info("Authorized credentials %s as user %s", c, user)
self.perf_metrics['auth_success', ('method', method)] += 1
metrics['auth_success', ('method', method)] += 1
# Set cookie
handler.set_secure_cookie(
"noc_user",
......
......@@ -3,7 +3,7 @@
# ----------------------------------------------------------------------
# mailsender service
# ----------------------------------------------------------------------
# Copyright (C) 2007-2017 The NOC Project
# Copyright (C) 2007-2018 The NOC Project
# See LICENSE for details
# ----------------------------------------------------------------------
......@@ -20,6 +20,7 @@ import pytz
# NOC modules
from noc.config import config
from noc.core.service.base import Service
from noc.core.perf import metrics
class MailSenderService(Service):
......@@ -120,7 +121,7 @@ class MailSenderService(Service):
"[%s] SMTP Authentication error: %s",
message_id, e
)
self.perf_metrics['smtp_response', ('code', e.smtp_code)] += 1
metrics['smtp_response', ('code', e.smtp_code)] += 1
return False
# Send mail
try:
......@@ -135,7 +136,7 @@ class MailSenderService(Service):
smtp.rset()
self.logger.error("[%s] MAIL FROM '%s' failed: %s %s",
message_id, from_address, code, resp)
self.perf_metrics['smtp_response', ('code', code)] += 1
metrics['smtp_response', ('code', code)] += 1
return False
# RCPT TO
code, resp = smtp.rcpt(address, [])
......@@ -143,7 +144,7 @@ class MailSenderService(Service):
smtp.rset()
self.logger.error("[%s] RCPT TO '%s' failed: %s %s",
message_id, address, code, resp)
self.perf_metrics['smtp_response', ('code', code)] += 1
metrics['smtp_response', ('code', code)] += 1
return False
# Data
code, resp = smtp.data(msg)
......@@ -151,10 +152,10 @@ class MailSenderService(Service):
smtp.rset()
self.logger.error("[%s] DATA failed: %s %s",
message_id, code, resp)
self.perf_metrics['smtp_response', ('code', code)] += 1
metrics['smtp_response', ('code', code)] += 1
return False
self.logger.info("[%s] Message sent: %s", message_id, resp)
self.perf_metrics['smtp_response', ('code', code)] += 1
metrics['smtp_response', ('code', code)] += 1
except smtplib.SMTPException as e:
self.logger.error("[%s] SMTP Error: %s", message_id, e)
smtp.rset()
......
......@@ -3,7 +3,7 @@
# ---------------------------------------------------------------------
# Ping service
# ---------------------------------------------------------------------
# Copyright (C) 2007-2017 The NOC Project
# Copyright (C) 2007-2018 The NOC Project
# See LICENSE for details
# ---------------------------------------------------------------------
......@@ -21,6 +21,7 @@ from noc.core.error import NOCError
from noc.core.service.base import Service
from noc.core.ioloop.timers import PeriodicOffsetCallback
from noc.core.ioloop.ping import Ping
from noc.core.perf import metrics
from noc.services.ping.probesetting import ProbeSetting
from noc.services.ping.datastream import PingDataStreamClient
......@@ -68,7 +69,7 @@ class PingService(Service):
except OSError as e:
self.logger.info("Cannot set nice level to -20: %s", e)
#
self.perf_metrics["down_objects"] = 0
metrics["down_objects"] = 0
# Open ping sockets
self.ping = Ping(self.ioloop, tos=config.ping.tos)
# Send spooled messages every 250ms
......@@ -145,10 +146,10 @@ class PingService(Service):
ps.task.stop()
ps.task = None
del self.probes[id]
self.perf_metrics["ping_probe_delete"] += 1
metrics["ping_probe_delete"] += 1
if ps.status is not None and not ps.status:
self.perf_metrics["down_objects"] -= 1
self.perf_metrics["ping_objects"] = len(self.probes)
metrics["down_objects"] -= 1
metrics["ping_objects"] = len(self.probes)
def _create_probe(self, data):
"""
......@@ -163,8 +164,8 @@ class PingService(Service):
)
ps.task = pt
pt.start()
self.perf_metrics["ping_probe_create"] += 1
self.perf_metrics["ping_objects"] = len(self.probes)
metrics["ping_probe_create"] += 1
metrics["ping_objects"] = len(self.probes)
def _change_probe(self, data):
self.logger.info("Update probe: %s (%ds)", data["address"], data["interval"])
......@@ -175,8 +176,8 @@ class PingService(Service):
self.logger.info("Changing address: %s -> %s", ps.address, data["address"])
ps.address = data["address"]
ps.update(**data)
self.perf_metrics["ping_probe_update"] += 1
self.perf_metrics["ping_objects"] = len(self.probes)
metrics["ping_probe_update"] += 1
metrics["ping_objects"] = len(self.probes)
@tornado.gen.coroutine
def ping_check(self, ps):
......@@ -187,11 +188,11 @@ class PingService(Service):
t0 = time.time()
if ps.id not in self.probes:
return
self.perf_metrics["ping_check_total"] += 1
metrics["ping_check_total"] += 1
if ps.time_cond:
dt = datetime.datetime.fromtimestamp(t0)
if not eval(ps.time_cond, {"T": dt}):
self.perf_metrics["ping_check_skips"] += 1
metrics["ping_check_skips"] += 1
return
rtt, attempts = yield self.ping.ping_check_rtt(
ps.address,
......@@ -202,19 +203,19 @@ class PingService(Service):
)
s = rtt is not None
if s:
self.perf_metrics["ping_check_success"] += 1
metrics["ping_check_success"] += 1
else:
self.perf_metrics["ping_check_fail"] += 1
metrics["ping_check_fail"] += 1
if ps and s != ps.status:
if s:
self.perf_metrics["down_objects"] -= 1
metrics["down_objects"] -= 1
else:
self.perf_metrics["down_objects"] += 1
metrics["down_objects"] += 1
if config.ping.throttle_threshold:
# Process throttling
down_ratio = (
float(self.perf_metrics["down_objects"]) * 100.0 /
float(self.perf_metrics["ping_objects"])