From 3e1fd0a7decb140314c016fe32831d6424cf555a Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Thu, 15 Sep 2022 08:22:38 +0500 Subject: [PATCH 01/12] Speedup create probe on metrics service. --- config.py | 2 +- services/metrics/changelog.py | 12 +++++++++--- services/metrics/service.py | 6 ++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/config.py b/config.py index 258dce08d0..8848ddceee 100644 --- a/config.py +++ b/config.py @@ -874,7 +874,7 @@ class Config(BaseConfig): class metrics(ConfigSection): compact_on_start = BooleanParameter(default=True) compact_on_stop = BooleanParameter(default=False) - flush_interval = SecondsParameter(default="1") + flush_interval = SecondsParameter(default="1m") compact_interval = SecondsParameter(default="5m") # Metrics disable_spool = BooleanParameter(default=False, help="Disable send metrics to Clickhouse") diff --git a/services/metrics/changelog.py b/services/metrics/changelog.py index 3b0632595a..b144303229 100644 --- a/services/metrics/changelog.py +++ b/services/metrics/changelog.py @@ -9,7 +9,6 @@ # Python modules from typing import Any, Dict, List, Iterable import pickle -import lzma import logging import datetime import asyncio @@ -19,6 +18,13 @@ from pymongo import InsertOne, DeleteMany, ASCENDING, DESCENDING from pymongo.collection import Collection from bson import ObjectId +try: + # Only for Python 3.3+ + import lzma as compressor # noqa +except ImportError: + # logger.debug("lzma module is not available") + import bz2 as compressor # noqa + # NOC modules from noc.core.mongo.connection_async import get_db @@ -82,14 +88,14 @@ class ChangeLog(object): Encode state to bytes """ r = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL) - return lzma.compress(r) + return compressor.compress(r) @staticmethod def decode(data: bytes) -> Dict[str, Dict[str, Any]]: """ Decode bytes to state """ - r = lzma.decompress(data) + r = compressor.decompress(data) return pickle.loads(r) async def feed(self, state: Dict[str, Dict[str, Any]]) -> None: diff --git a/services/metrics/service.py b/services/metrics/service.py index b6eec39246..d555fc1953 100755 --- a/services/metrics/service.py +++ b/services/metrics/service.py @@ -288,7 +288,7 @@ class MetricsService(FastAPIService): self.logger.info("Waiting for mappings") await self.mappings_ready_event.wait() self.logger.info("Mappings are ready") - await self.subscribe_stream("metrics", self.slot_number, self.on_metrics) + await self.subscribe_stream("metrics", self.slot_number, self.on_metrics, async_cursor=True) async def on_deactivate(self): if self.change_log: @@ -353,6 +353,7 @@ class MetricsService(FastAPIService): async def on_metrics(self, msg: Message) -> None: data = orjson.loads(msg.value) state = {} + metrics["messages"] += 1 for item in data: scope = item.get("scope") if not scope: @@ -668,7 +669,8 @@ class MetricsService(FastAPIService): # Getting Context source = self.get_source_info(k) if not source: - self.logger.info("[%s] Unknown metric source. Skipping apply rules", k) + self.logger.debug("[%s] Unknown metric source. Skipping apply rules", k) + metrics["unknown_metric_source"] += 1 return s_labels = set(self.merge_labels(source.labels, labels)) # Appy matched rules -- GitLab From cc757156939a3f2334e9c8ec4f630f9c06fa9c61 Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Thu, 15 Sep 2022 10:06:40 +0500 Subject: [PATCH 02/12] Fix detect configured metrics. --- inv/models/sensor.py | 11 ++++++++++- sa/models/managedobject.py | 6 ++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/inv/models/sensor.py b/inv/models/sensor.py index 3368989d73..afa730c3fb 100644 --- a/inv/models/sensor.py +++ b/inv/models/sensor.py @@ -63,7 +63,14 @@ class Sensor(Document): "collection": "sensors", "strict": False, "auto_create_index": False, - "indexes": ["agent", "managed_object", "object", "labels", "effective_labels"], + "indexes": [ + "agent", + "managed_object", + "object", + "labels", + "effective_labels", + ("managed_object", "object"), + ], } profile: "SensorProfile" = PlainReferenceField( @@ -241,6 +248,8 @@ class Sensor(Document): :param sensor: :return: """ + if not sensor.state.is_productive: + return {} labels = [] for ll in sensor.effective_labels: l_c = Label.get_by_name(ll) diff --git a/sa/models/managedobject.py b/sa/models/managedobject.py index 2f1ecc2f80..dc7f0f7bb7 100644 --- a/sa/models/managedobject.py +++ b/sa/models/managedobject.py @@ -2889,7 +2889,7 @@ class ManagedObject(NOCModel): metrics=tuple(metrics), labels=(f"noc::interface::{i['name']}",), hints=[f"ifindex::{ifindex}"] if ifindex else None, - service=i.get("service"), + # service=i.get("service"), ) if not i_profile.allow_subinterface_metrics: continue @@ -2973,10 +2973,12 @@ class ManagedObject(NOCModel): from noc.inv.models.object import Object from mongoengine.queryset import Q as m_Q + if not self.is_managed: + return False sla_probe = SLAProbe.objects.filter(managed_object=self.id).first() - config = self.get_metric_config(self) o = Object.get_managed(self) sensor = Sensor.objects.filter(m_Q(managed_object=self.id) | m_Q(object__in=o)).first() + config = self.get_metric_config(self) return bool(sla_probe or sensor or config.get("metrics") or config.get("items")) -- GitLab From 4e874ff03ea65d937fe687e0ee3a6c1b2219e1b7 Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Thu, 15 Sep 2022 12:21:25 +0500 Subject: [PATCH 03/12] Speedup get_metric_config. --- sa/models/managedobject.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sa/models/managedobject.py b/sa/models/managedobject.py index dc7f0f7bb7..910f3a24aa 100644 --- a/sa/models/managedobject.py +++ b/sa/models/managedobject.py @@ -2917,31 +2917,35 @@ class ManagedObject(NOCModel): :return: """ from noc.inv.models.interface import Interface + from noc.inv.models.interfaceprofile import InterfaceProfile if not mo.is_managed: return {} + icoll = Interface._get_collection() s_metrics = mo.object_profile.get_object_profile_metrics(mo.object_profile.id) labels = [] for ll in mo.effective_labels: l_c = Label.get_by_name(ll) labels.append({"label": ll, "expose_metric": l_c.expose_metric if l_c else False}) items = [] - for iface in Interface.objects.filter(managed_object=mo.id): + for iface in icoll.find({"managed_object": mo.id}, {"name", "effective_labels", "profile"}): + ip = InterfaceProfile.get_by_id(iface["profile"]) metrics = [ { "name": mc.metric_type.field_name, "is_stored": mc.is_stored, "is_composed": bool(mc.metric_type.compose_expression), } - for mc in iface.profile.metrics + for mc in ip.metrics ] if not metrics: continue items.append( { - "key_labels": [f"noc::interface::{iface.name}"], + "key_labels": [f"noc::interface::{iface['name']}"], "labels": [ - {"label": ll, "expose_metric": False} for ll in iface.effective_labels + {"label": ll, "expose_metric": False} + for ll in iface.get("effective_labels", []) ], "metrics": metrics, } -- GitLab From f8ee3fffb6a4c421f4f6be30998d1ab72c4ddd83 Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Thu, 15 Sep 2022 13:18:10 +0500 Subject: [PATCH 04/12] Add get_key_hash cache. --- services/metrics/service.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/services/metrics/service.py b/services/metrics/service.py index d555fc1953..93dffb3dea 100755 --- a/services/metrics/service.py +++ b/services/metrics/service.py @@ -18,6 +18,7 @@ import random # Third-party modules import orjson +import cachetools # NOC modules from noc.core.service.fastapi import FastAPIService @@ -440,6 +441,7 @@ class MetricsService(FastAPIService): ) @staticmethod + @cachetools.cached(cachetools.TTLCache(maxsize=128, ttl=60)) def get_key_hash(k: MetricKey) -> str: """ Calculate persistent hash for metric key @@ -598,7 +600,7 @@ class MetricsService(FastAPIService): if is_composed: probe_cls = ComposeProbeNode prefix = self.get_key_hash(k) - state_id = f"{self.get_key_hash(k)}::{metric_field}" + state_id = f"{prefix}::{metric_field}" # Create Probe p = probe_cls.construct( metric_field, @@ -788,6 +790,9 @@ class MetricsService(FastAPIService): Update source config. """ sc_id = int(data["id"]) + if "type" not in data: + self.logger.info("[%s] Bad Source data", sc_id) + return sc = SourceConfig( type=data["type"], bi_id=data["bi_id"], -- GitLab From 3e712c2a0cea2d62610abaf63dc417eee71576fd Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Fri, 16 Sep 2022 19:10:34 +0500 Subject: [PATCH 05/12] Disable compact on start. --- config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 8848ddceee..f14d946345 100644 --- a/config.py +++ b/config.py @@ -872,7 +872,8 @@ class Config(BaseConfig): enable_postgres_quantiles = BooleanParameter(default=False) class metrics(ConfigSection): - compact_on_start = BooleanParameter(default=True) + # Disable On Start, that compact procedure lost Consul session + compact_on_start = BooleanParameter(default=False) compact_on_stop = BooleanParameter(default=False) flush_interval = SecondsParameter(default="1m") compact_interval = SecondsParameter(default="5m") -- GitLab From cf4b40b53f44fd2dd8bd39c831d305888822eb21 Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Sat, 17 Sep 2022 11:59:17 +0500 Subject: [PATCH 06/12] Invalidate card from config. --- services/metrics/service.py | 64 ++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/services/metrics/service.py b/services/metrics/service.py index 93dffb3dea..c8520b63dc 100755 --- a/services/metrics/service.py +++ b/services/metrics/service.py @@ -19,6 +19,7 @@ import random # Third-party modules import orjson import cachetools +from pymongo import DESCENDING # NOC modules from noc.core.service.fastapi import FastAPIService @@ -37,6 +38,7 @@ from noc.core.cdag.factory.scope import MetricScopeCDAGFactory from noc.core.cdag.factory.config import ConfigCDAGFactory, GraphConfig from noc.services.metrics.changelog import ChangeLog from noc.services.metrics.datastream import MetricsDataStreamClient, MetricRulesDataStreamClient +from noc.services.datastream.streams.cfgmetricsources import CfgMetricSourcesDataStream from noc.config import config as global_config # MetricKey - scope, key ctx: (managed_object, ), Key Labels @@ -319,11 +321,14 @@ class MetricsService(FastAPIService): """ # Register RPC aliases client = MetricsDataStreamClient("cfgmetricsources", service=self) + coll = CfgMetricSourcesDataStream.get_collection() + r = next(coll.find({}).sort([("change_id", DESCENDING)]), None) # Track stream changes while True: self.logger.info("Starting to track object mappings") try: await client.query( + change_id=str(r["change_id"]) if "change_id" in r else None, limit=global_config.metrics.ds_limit, block=True, filter_policy="delete", @@ -446,7 +451,7 @@ class MetricsService(FastAPIService): """ Calculate persistent hash for metric key """ - d = hashlib.sha512(str(k).encode("utf-8")).digest() + d = hashlib.blake2b(str(k).encode("utf-8")).digest() return codecs.encode(d, "base_64")[:7].decode("utf-8") @staticmethod @@ -620,6 +625,13 @@ class MetricsService(FastAPIService): metrics["cdag_nodes", ("type", p.name)] += 1 return p + @cachetools.cached(cachetools.TTLCache(maxsize=128, ttl=60)) + def get_source(self, s_id): + coll = CfgMetricSourcesDataStream.get_collection() + data = coll.find_one({"_id": str(s_id)}) + sc = self.get_source_config(orjson.loads(data["data"])) + return sc + def get_source_info(self, k: MetricKey) -> Optional[SourceInfo]: """ Get source Info by Metric Key. Sources: @@ -633,13 +645,17 @@ class MetricsService(FastAPIService): key_ctx, source = dict(k[1]), None sensor, sla_probe = None, None if "sensor" in key_ctx: - sensor = self.sources_config.get(key_ctx["sensor"]) + source = self.get_source(key_ctx["sensor"]) + # sensor = self.sources_config.get(key_ctx["sensor"]) elif "sla_probe" in key_ctx: - sla_probe = self.sources_config.get(key_ctx["sla_probe"]) + source = self.get_source(key_ctx["sla_probe"]) + # sla_probe = self.sources_config.get(key_ctx["sla_probe"]) if "agent" in key_ctx: - source = self.sources_config.get(key_ctx["agent"]) + source = self.get_source(key_ctx["agent"]) + # source = self.sources_config.get(key_ctx["agent"]) elif "managed_object" in key_ctx: - source = self.sources_config.get(key_ctx["managed_object"]) + source = self.get_source(key_ctx["managed_object"]) + # source = self.sources_config.get(key_ctx["managed_object"]) if not source: return composed_metrics = [] @@ -785,18 +801,12 @@ class MetricsService(FastAPIService): sender.activate(tx, "labels", data.get("labels") or []) return tx.get_changed_state() - async def update_source_config(self, data: Dict[str, Any]) -> None: - """ - Update source config. - """ - sc_id = int(data["id"]) - if "type" not in data: - self.logger.info("[%s] Bad Source data", sc_id) - return + @staticmethod + def get_source_config(data): sc = SourceConfig( type=data["type"], bi_id=data["bi_id"], - fm_pool=sys.intern(data["fm_pool"]) if data["fm_pool"] else None, + fm_pool=data["fm_pool"] if data["fm_pool"] else None, labels=tuple(sys.intern(ll["label"]) for ll in data["labels"]), metrics=tuple( sys.intern(m["name"]) for m in data["metrics"] if not m.get("is_composed") @@ -816,12 +826,27 @@ class MetricsService(FastAPIService): ), ) ) - if sc_id not in self.sources_config: - self.sources_config[sc_id] = sc + return sc + + async def update_source_config(self, data: Dict[str, Any]) -> None: + """ + Update source config. + """ + if not self.cards: + # Initial config return - diff = self.sources_config[sc_id].is_differ(sc) - if "condition" in diff: - self.invalidate_card_config(sc) + sc_id = int(data["id"]) + if "type" not in data: + self.logger.info("[%s] Bad Source data", sc_id) + return + sc = self.get_source_config(data) + self.invalidate_card_config(sc) + # if sc_id not in self.sources_config: + # self.sources_config[sc_id] = sc + # return + # diff = self.sources_config[sc_id].is_differ(sc) + # if "condition" in diff: + # self.invalidate_card_config(sc) async def delete_source_config(self, c_id: int) -> None: """ @@ -968,6 +993,7 @@ class MetricsService(FastAPIService): elif diff.intersection({"conditions", "graph"}): # Invalidate Cards self.logger.info("[%s] %s Changed. Invalidate cards for rules", r.id, diff) + self.rules[r_id] = r invalidate_rules.add(r_id) if invalidate_rules: await self.invalidate_card_rules(invalidate_rules) -- GitLab From 1e4b9a28cc57eb75934499947b08e9cecb8bab3d Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Sat, 17 Sep 2022 17:25:50 +0500 Subject: [PATCH 07/12] Fix typo in pm.metricaction collection Data. --- .../Average_CPU_Usage.json | 8 ++++---- .../Average_Memory_Usage.json | 8 ++++---- .../Interface_Bandwidth_more_than_N.json | 0 .../Interface_Errors_In.json | 8 ++++---- core/collection/base.py | 8 +++++++- pm/models/metricaction.py | 2 +- 6 files changed, 20 insertions(+), 14 deletions(-) rename collections/{pm.metricaction => pm.metricactions}/Average_CPU_Usage.json (91%) rename collections/{pm.metricaction => pm.metricactions}/Average_Memory_Usage.json (91%) rename collections/{pm.metricaction => pm.metricactions}/Interface_Bandwidth_more_than_N.json (100%) rename collections/{pm.metricaction => pm.metricactions}/Interface_Errors_In.json (91%) diff --git a/collections/pm.metricaction/Average_CPU_Usage.json b/collections/pm.metricactions/Average_CPU_Usage.json similarity index 91% rename from collections/pm.metricaction/Average_CPU_Usage.json rename to collections/pm.metricactions/Average_CPU_Usage.json index da835f98a7..1988887747 100644 --- a/collections/pm.metricaction/Average_CPU_Usage.json +++ b/collections/pm.metricactions/Average_CPU_Usage.json @@ -8,7 +8,7 @@ "name": "activation.min_window", "description": null, "default": "5", - "max_value": 3.0, + "max_value": 5.0, "min_value": 3.0, "type": "int" }, @@ -16,7 +16,7 @@ "name": "activation.max_window", "description": null, "default": "3", - "max_value": 3.0, + "max_value": 5.0, "min_value": 3.0, "type": "int" }, @@ -24,7 +24,7 @@ "name": "alarm.activation_level", "description": null, "default": "95", - "max_value": 20.0, + "max_value": 99.0, "min_value": 20.0, "type": "int" }, @@ -32,7 +32,7 @@ "name": "alarm.deactivation_level", "description": null, "default": "60", - "max_value": 20.0, + "max_value": 99.0, "min_value": 20.0, "type": "int" } diff --git a/collections/pm.metricaction/Average_Memory_Usage.json b/collections/pm.metricactions/Average_Memory_Usage.json similarity index 91% rename from collections/pm.metricaction/Average_Memory_Usage.json rename to collections/pm.metricactions/Average_Memory_Usage.json index 44adc046b9..c4cc1465c6 100644 --- a/collections/pm.metricaction/Average_Memory_Usage.json +++ b/collections/pm.metricactions/Average_Memory_Usage.json @@ -8,7 +8,7 @@ "name": "alarm.activation_level", "description": null, "default": "95", - "max_value": 20.0, + "max_value": 99.0, "min_value": 20.0, "type": "int" }, @@ -16,7 +16,7 @@ "name": "alarm.deactivation_level", "description": null, "default": "65", - "max_value": 20.0, + "max_value": 99.0, "min_value": 20.0, "type": "int" }, @@ -24,7 +24,7 @@ "name": "activation.min_window", "description": null, "default": "3", - "max_value": 3.0, + "max_value": 10.0, "min_value": 3.0, "type": "int" }, @@ -32,7 +32,7 @@ "name": "activation.max_window", "description": null, "default": "5", - "max_value": 3.0, + "max_value": 10.0, "min_value": 3.0, "type": "int" } diff --git a/collections/pm.metricaction/Interface_Bandwidth_more_than_N.json b/collections/pm.metricactions/Interface_Bandwidth_more_than_N.json similarity index 100% rename from collections/pm.metricaction/Interface_Bandwidth_more_than_N.json rename to collections/pm.metricactions/Interface_Bandwidth_more_than_N.json diff --git a/collections/pm.metricaction/Interface_Errors_In.json b/collections/pm.metricactions/Interface_Errors_In.json similarity index 91% rename from collections/pm.metricaction/Interface_Errors_In.json rename to collections/pm.metricactions/Interface_Errors_In.json index 6e4460c70b..247e21ae18 100644 --- a/collections/pm.metricaction/Interface_Errors_In.json +++ b/collections/pm.metricactions/Interface_Errors_In.json @@ -8,7 +8,7 @@ "name": "alarm.activation_level", "description": null, "default": "1", - "max_value": 1.0, + "max_value": 1000.0, "min_value": 1.0, "type": "float" }, @@ -16,7 +16,7 @@ "name": "alarm.deactivation_level", "description": null, "default": "1", - "max_value": 1.0, + "max_value": 1000.0, "min_value": 1.0, "type": "float" }, @@ -24,7 +24,7 @@ "name": "activation.min_window", "description": null, "default": "900", - "max_value": 300.0, + "max_value": 3600.0, "min_value": 300.0, "type": "int" }, @@ -32,7 +32,7 @@ "name": "activation.max_window", "description": null, "default": "1200", - "max_value": 300.0, + "max_value": 3600.0, "min_value": 300.0, "type": "int" } diff --git a/core/collection/base.py b/core/collection/base.py index 72664345d0..d981875864 100644 --- a/core/collection/base.py +++ b/core/collection/base.py @@ -240,6 +240,12 @@ class Collection(object): except ValueError as e: v = [] self.partial_errors[d["uuid"]] = str(e) + elif isinstance(field, EmbeddedDocumentField): + try: + v = field.document_type(**v) + except ValueError as e: + v = None + self.partial_errors[d["uuid"]] = str(e) # Dereference binary field if isinstance(field, BinaryField): v = b85decode(v) @@ -313,7 +319,7 @@ class Collection(object): o.save() # Try again return self.update_item(data) - self.stdout.write("Not find object by query: %s\n" % qs) + self.stdout.write("Not find object by query: %s\n" % qs) raise def delete_item(self, uuid): diff --git a/pm/models/metricaction.py b/pm/models/metricaction.py index 45bdc73df6..c63488fd26 100644 --- a/pm/models/metricaction.py +++ b/pm/models/metricaction.py @@ -72,7 +72,7 @@ class MetricActionParam(EmbeddedDocument): "type": self.type, "description": self.description, "min_value": self.min_value, - "max_value": self.min_value, + "max_value": self.max_value, "default": self.default, } -- GitLab From aa30875b705989214674174c2b4aad63b1e017ec Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Sat, 17 Sep 2022 18:41:26 +0500 Subject: [PATCH 08/12] Fix alarm value type. --- commands/cdag.py | 9 +++++---- core/cdag/node/alarm.py | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/commands/cdag.py b/commands/cdag.py index 10c9d67cd8..a0cc6b8da3 100644 --- a/commands/cdag.py +++ b/commands/cdag.py @@ -91,7 +91,7 @@ class Command(BaseCommand): from noc.core.clickhouse.connect import connection now = datetime.datetime.now() - now = now - datetime.timedelta(hours=2) + now = now - datetime.timedelta(hours=4) q_args = [] if source.startswith("iface://"): source, iface = source[8:].split("::") @@ -109,11 +109,12 @@ class Command(BaseCommand): source = source[6:] source = self.get_source(source) query = SQL % ( - "cpu_usage", + "usage", "cpu", - source.managed_object.bi_id, + source.bi_id, now.date().isoformat(), now.replace(microsecond=0).isoformat(sep=" "), + "", ) else: self.die(f"Unknown source {source}") @@ -135,7 +136,7 @@ class Command(BaseCommand): def iter_metrics( self, f_input: Optional[str], metrics: Optional[List[str]] = None ) -> Iterable[Dict[str, Union[float, str]]]: - if f_input.startswith("mo://") or f_input.startswith("iface://"): + if f_input.startswith("cpu://") or f_input.startswith("iface://"): yield from self.input_from_device(f_input, metrics) else: yield from self.input_from_file(f_input) diff --git a/core/cdag/node/alarm.py b/core/cdag/node/alarm.py index d2ec14412c..3bf7377849 100644 --- a/core/cdag/node/alarm.py +++ b/core/cdag/node/alarm.py @@ -57,12 +57,12 @@ class AlarmNodeConfig(BaseModel): if config.reference: template = config.reference elif config.labels: - template = "th:{{object}}:{{alarm_class}}:{{';'.join(config.labels)}}" + template = "th:{{object or ''}}:{{alarm_class}}:{{';'.join(config.labels)}}" return Template(template).render( **{ "object": config.managed_object, "alarm_class": config.alarm_class, - "labels": config.labels, + "labels": config.labels or [], "vars": {v.name: v.value for v in config.vars or []}, } ) @@ -117,7 +117,8 @@ class AlarmNode(BaseCDAGNode): "managed_object": self.config.managed_object, "alarm_class": self.config.alarm_class, "labels": self.config.labels if self.config.labels else [], - "vars": {"ovalue": x, "tvalue": self.config.activation_level}, + # x is numpy.float64 type, ? + "vars": {"ovalue": round(float(x), 3), "tvalue": self.config.activation_level}, } # Render vars if self.config.vars: -- GitLab From 6898d63ed48d51f5d68a1da6a6c042da165e3481 Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Sat, 17 Sep 2022 22:19:08 +0500 Subject: [PATCH 09/12] Fix utcoffset detect. --- models.py | 2 +- services/discovery/jobs/periodic/metrics.py | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/models.py b/models.py index 80044c8016..67d0add8c3 100644 --- a/models.py +++ b/models.py @@ -249,7 +249,7 @@ _MODELS = { "pm.AgentProfile": "noc.pm.models.agent.AgentProfile", "pm.Scale": "noc.pm.models.scale.Scale", "pm.MeasurementUnits": "noc.pm.models.measurementunits.MeasurementUnits", - "pm.MetricAction": "noc.pm.models.metricrule.MetricAction", + "pm.MetricAction": "noc.pm.models.metricaction.MetricAction", "pm.MetricRule": "noc.pm.models.metricrule.MetricRule", "pm.MetricScope": "noc.pm.models.metricscope.MetricScope", "pm.MetricType": "noc.pm.models.metrictype.MetricType", diff --git a/services/discovery/jobs/periodic/metrics.py b/services/discovery/jobs/periodic/metrics.py index 8594e59d66..b951c243e9 100644 --- a/services/discovery/jobs/periodic/metrics.py +++ b/services/discovery/jobs/periodic/metrics.py @@ -9,10 +9,15 @@ import itertools import time from typing import Any, List, Dict, Iterable +from functools import partial + +import cachetools # NOC modules from noc.services.discovery.jobs.base import DiscoveryCheck from noc.core.models.cfgmetrics import MetricCollectorConfig +from noc.core.service.loader import get_dcs +from noc.core.ioloop.util import run_sync from noc.inv.models.object import Object from noc.inv.models.interfaceprofile import MetricConfig from noc.inv.models.sensor import Sensor @@ -42,6 +47,12 @@ class MetricsCheck(DiscoveryCheck): SLA_CAPS = ["Cisco | IP | SLA | Probes"] + @staticmethod + @cachetools.cached(cachetools.TTLCache(maxsize=128, ttl=60)) + def get_slot_limits(slot_name): + dcs = get_dcs() + return run_sync(partial(dcs.get_slot_limit, slot_name)) + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.id_count = itertools.count() @@ -110,8 +121,8 @@ class MetricsCheck(DiscoveryCheck): collected=metrics, streaming={ "stream": "metrics", - "partition": 0, - "utc_offset": config.timezone._utcoffset.seconds, + "partition": self.object.id % self.get_slot_limits("metrics"), + "utc_offset": config.get_utc_offset, "data": s_data, }, ) -- GitLab From 038ac98db5170715ce1530c04b1088fd39655478 Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Sat, 17 Sep 2022 23:54:47 +0500 Subject: [PATCH 10/12] Fix getting slot for metrics. --- core/service/base.py | 16 ++++++++++++++++ services/discovery/jobs/periodic/metrics.py | 8 +------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/core/service/base.py b/core/service/base.py index edc26c6b53..472c87fff9 100644 --- a/core/service/base.py +++ b/core/service/base.py @@ -15,6 +15,8 @@ import argparse import threading from time import perf_counter import asyncio +import cachetools +from functools import partial from typing import ( Optional, Dict, @@ -881,3 +883,17 @@ class BaseService(object): # Cluster election in progress or cluster is misconfigured self.logger.info("Stream '%s' has no active partitions. Waiting" % stream) await asyncio.sleep(1) + + @staticmethod + @cachetools.cached(cachetools.TTLCache(maxsize=128, ttl=60)) + def get_slot_limits(slot_name): + """ + Get slot count + :param slot_name: + :return: + """ + from noc.core.dcs.loader import get_dcs + from noc.core.ioloop.util import run_sync + + dcs = get_dcs() + return run_sync(partial(dcs.get_slot_limit, slot_name)) diff --git a/services/discovery/jobs/periodic/metrics.py b/services/discovery/jobs/periodic/metrics.py index b951c243e9..094b6984cf 100644 --- a/services/discovery/jobs/periodic/metrics.py +++ b/services/discovery/jobs/periodic/metrics.py @@ -47,12 +47,6 @@ class MetricsCheck(DiscoveryCheck): SLA_CAPS = ["Cisco | IP | SLA | Probes"] - @staticmethod - @cachetools.cached(cachetools.TTLCache(maxsize=128, ttl=60)) - def get_slot_limits(slot_name): - dcs = get_dcs() - return run_sync(partial(dcs.get_slot_limit, slot_name)) - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.id_count = itertools.count() @@ -121,7 +115,7 @@ class MetricsCheck(DiscoveryCheck): collected=metrics, streaming={ "stream": "metrics", - "partition": self.object.id % self.get_slot_limits("metrics"), + "partition": self.object.id % self.service.get_slot_limits("metrics"), "utc_offset": config.get_utc_offset, "data": s_data, }, -- GitLab From aa5213b11f1eb96a98c391b4845e78be2d3b9566 Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Sun, 18 Sep 2022 08:36:10 +0500 Subject: [PATCH 11/12] Use pop when getting start_state value. --- services/discovery/jobs/periodic/metrics.py | 5 ----- services/metrics/service.py | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/services/discovery/jobs/periodic/metrics.py b/services/discovery/jobs/periodic/metrics.py index 094b6984cf..6932c9056d 100644 --- a/services/discovery/jobs/periodic/metrics.py +++ b/services/discovery/jobs/periodic/metrics.py @@ -9,15 +9,10 @@ import itertools import time from typing import Any, List, Dict, Iterable -from functools import partial - -import cachetools # NOC modules from noc.services.discovery.jobs.base import DiscoveryCheck from noc.core.models.cfgmetrics import MetricCollectorConfig -from noc.core.service.loader import get_dcs -from noc.core.ioloop.util import run_sync from noc.inv.models.object import Object from noc.inv.models.interfaceprofile import MetricConfig from noc.inv.models.sensor import Sensor diff --git a/services/metrics/service.py b/services/metrics/service.py index c8520b63dc..78049d425d 100755 --- a/services/metrics/service.py +++ b/services/metrics/service.py @@ -610,7 +610,7 @@ class MetricsService(FastAPIService): p = probe_cls.construct( metric_field, prefix=prefix, - state=self.start_state.get(state_id), + state=self.start_state.pop(state_id, None), config=self.metric_configs.get(metric_field), sticky=True, ) -- GitLab From 963811ac7375258867eb9b4ecd0e521e324de38e Mon Sep 17 00:00:00 2001 From: Andrey Vertiprahov Date: Sun, 18 Sep 2022 09:23:45 +0500 Subject: [PATCH 12/12] Fix typo. --- services/discovery/jobs/periodic/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/discovery/jobs/periodic/metrics.py b/services/discovery/jobs/periodic/metrics.py index 6932c9056d..e6ca1a9ff4 100644 --- a/services/discovery/jobs/periodic/metrics.py +++ b/services/discovery/jobs/periodic/metrics.py @@ -111,7 +111,7 @@ class MetricsCheck(DiscoveryCheck): streaming={ "stream": "metrics", "partition": self.object.id % self.service.get_slot_limits("metrics"), - "utc_offset": config.get_utc_offset, + "utc_offset": config.tz_utc_offset, "data": s_data, }, ) -- GitLab