Commit 08e94eea authored by kk's avatar kk Committed by Andrey Vertiprahov
Browse files

move lib/text to core/text. add tests

parent c1bf2db1
......@@ -25,7 +25,7 @@ import cachetools
# NOC modules
from noc.core.prettyjson import to_json
from noc.lib.text import quote_safe_path
from noc.core.text import quote_safe_path
from noc.core.model.decorator import on_delete_check
from noc.sa.interfaces.base import StringParameter, IntParameter, BooleanParameter
......
......@@ -16,7 +16,7 @@ from mongoengine.fields import StringField, UUIDField
# NOC modules
from noc.core.prettyjson import to_json
from noc.lib.text import quote_safe_path
from noc.core.text import quote_safe_path
@six.python_2_unicode_compatible
......
......@@ -10,13 +10,13 @@
import re
# Third-party modules
from pyparsing import *
from pyparsing import LineStart, Literal, Word, alphanums, Optional, restOfLine, ZeroOrMore, nums
# NOC modules
from noc.core.ip import IPv4
from noc.cm.parsers.pyparser import BasePyParser
from noc.cm.parsers.tokens import INDENT, IPv4_ADDRESS, LINE, REST, DIGITS, ALPHANUMS
from noc.lib.text import ranges_to_list
from noc.core.text import ranges_to_list
from noc.lib.validators import is_ipv4, is_int
......
......@@ -2,7 +2,7 @@
# ---------------------------------------------------------------------
# Basic IOS parser
# ---------------------------------------------------------------------
# Copyright (C) 2007-2015 The NOC Project
# Copyright (C) 2007-2019 The NOC Project
# See LICENSE for details
# ---------------------------------------------------------------------
......@@ -10,13 +10,23 @@
import re
# Third-party modules
from pyparsing import *
from pyparsing import (
LineStart,
Literal,
Word,
alphanums,
Optional,
restOfLine,
Combine,
ZeroOrMore,
nums,
)
# NOC modules
from noc.core.ip import IPv4
from noc.cm.parsers.pyparser import BasePyParser
from noc.cm.parsers.tokens import INDENT, IPv4_ADDRESS, LINE, REST, DIGITS, ALPHANUMS, RD
from noc.lib.text import ranges_to_list
from noc.core.text import ranges_to_list
from noc.lib.validators import is_ipv4, is_int
......
......@@ -10,7 +10,7 @@
from collections import defaultdict
# NOC modules
from noc.lib.text import ranges_to_list
from noc.core.text import ranges_to_list
from noc.cm.parsers.base import BaseParser
from noc.core.ip import IPv4
from noc.lib.validators import is_ipv4
......
......@@ -2,7 +2,7 @@
# ---------------------------------------------------------------------
# Basic Junos parser
# ---------------------------------------------------------------------
# Copyright (C) 2007-2018 The NOC Project
# Copyright (C) 2007-2019 The NOC Project
# See LICENSE for details
# ---------------------------------------------------------------------
......@@ -12,7 +12,7 @@ from pyparsing import OneOrMore, Word, alphanums, QuotedString
# NOC modules
from noc.core.ip import IPv4
from noc.cm.parsers.base import BaseParser
from noc.lib.text import ranges_to_list
from noc.core.text import ranges_to_list
class BaseQSW2800Parser(BaseParser):
......
......@@ -25,7 +25,7 @@ from noc.ip.models.addressprofile import AddressProfile
from noc.ip.models.address import Address
from noc.lib.validators import is_int
from noc.dns.utils.rr import RR
from noc.lib.text import split_alnum
from noc.core.text import split_alnum
class Command(BaseCommand):
......
......@@ -16,7 +16,7 @@ from noc.inv.models.interface import Interface
from noc.inv.models.interfaceprofile import InterfaceProfile
from noc.inv.models.interfaceclassificationrule import InterfaceClassificationRule
from noc.sa.models.managedobjectselector import ManagedObjectSelector
from noc.lib.text import split_alnum
from noc.core.text import split_alnum
class Command(BaseCommand):
......
......@@ -16,7 +16,7 @@ from noc.core.mongo.connection import connect
from noc.inv.models.interface import Interface
from noc.sa.models.managedobjectselector import ManagedObjectSelector
from noc.core.etl.portmapper.loader import loader
from noc.lib.text import split_alnum, format_table
from noc.core.text import split_alnum, format_table
class Command(BaseCommand):
......
......@@ -24,7 +24,7 @@ from noc.fm.models.mib import MIB
from noc.sa.models.managedobject import ManagedObject
from noc.fm.models.activeevent import ActiveEvent
from noc.core.fileutils import iter_open
from noc.lib.text import format_table
from noc.core.text import format_table
from noc.core.perf import metrics
......
......@@ -2,7 +2,7 @@
# ----------------------------------------------------------------------
# Service command
# ----------------------------------------------------------------------
# Copyright (C) 2007-2015 The NOC Project
# Copyright (C) 2007-2019 The NOC Project
# See LICENSE for details
# ----------------------------------------------------------------------
......@@ -15,7 +15,7 @@ import six
# NOC modules
from noc.core.management.base import BaseCommand
from noc.core.service.loader import get_service
from noc.lib.text import format_table
from noc.core.text import format_table
class Command(BaseCommand):
......
......@@ -2,18 +2,18 @@
# ----------------------------------------------------------------------
# Managed Object Extractor
# ----------------------------------------------------------------------
# Copyright (C) 2007-2017 The NOC Project
# Copyright (C) 2007-2019 The NOC Project
# See LICENSE for details
# ----------------------------------------------------------------------
# Python modules
from __future__ import absolute_import
import datetime
from noc.lib.text import ch_escape
from collections import defaultdict
# NOC modules
from .base import BaseExtractor
from noc.core.text import ch_escape
from noc.sa.models.managedobject import ManagedObject, ManagedObjectAttribute
from noc.bi.models.managedobjects import ManagedObject as ManagedObjectBI
from noc.core.etl.bi.stream import Stream
......
......@@ -2,17 +2,19 @@
# ----------------------------------------------------------------------
# Expression matcher
# ----------------------------------------------------------------------
# Copyright (C) 2007-2017 The NOC Project
# Copyright (C) 2007-2019 The NOC Project
# See LICENSE for details
# ----------------------------------------------------------------------
# Python modules
import re
from collections import Iterable
# Third-party modules
import six
# NOC modules
from noc.lib.text import split_alnum
from noc.core.text import split_alnum
__all__ = ["match"]
......
......@@ -15,7 +15,7 @@ import six
# NOC modules
from noc.core.escape import json_escape
from noc.lib.text import indent
from noc.core.text import indent
class PrettyJSON(object):
......
......@@ -2,7 +2,7 @@
# ----------------------------------------------------------------------
# CLI FSM
# ----------------------------------------------------------------------
# Copyright (C) 2007-2018 The NOC Project
# Copyright (C) 2007-2019 The NOC Project
# See LICENSE for details
# ----------------------------------------------------------------------
......@@ -24,7 +24,7 @@ import six
# NOC modules
from noc.core.log import PrefixLoggerAdapter
from noc.lib.text import replace_re_group
from noc.core.text import replace_re_group
from .error import (
CLIError,
CLIAuthFailed,
......
......@@ -22,7 +22,7 @@ import tornado.gen
from noc.core.error import NO_ERROR, ERR_UNKNOWN
from noc.core.perf import metrics
from noc.config import config
from noc.lib.text import ch_escape
from noc.core.text import ch_escape
from noc.core.backport.time import perf_counter
forensic_logger = logging.getLogger("noc.core.forensic")
......
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------
# Various text-processing utilities
# ---------------------------------------------------------------------
# Copyright (C) 2007-2019 The NOC Project
# See LICENSE for details
# ---------------------------------------------------------------------
# Python modules
import re
# Third-party modules
import six
from six.moves import zip_longest
from numpy import array
rx_header_start = re.compile(r"^\s*[-=]+[\s\+]+[-=]+")
rx_col = re.compile(r"^([\s\+]*)([\-]+|[=]+)")
def parse_table(
s,
allow_wrap=False,
allow_extend=False,
expand_columns=False,
max_width=0,
footer=None,
n_row_delim="",
expand_tabs=True,
):
"""
Parse string containing table an return a list of table rows.
Each row is a list of cells.
Columns are determined by a sequences of ---- or ==== which are
determines rows bounds.
Examples:
First Second Third
----- ------ -----
a b c
ddd eee fff
Will be parsed down to the [["a","b","c"],["ddd","eee","fff"]]
:param s: Table for parsing
:type s: str
:param allow_wrap: Union if cell contins multiple line
:type allow_wrap: bool
:param allow_extend: Check if column on row longest then column width, enlarge it and shift rest of columns
:type allow_extend: bool
:param expand_columns: Expand columns covering all available width
:type expand_columns: bool
:param max_width: Max table width, if table width < max_width extend length, else - nothing
:type max_width: int
:param footer: stop iteration if match expression footer
:type footer: string
:param n_row_delim: Append delimiter to next cell line
:type n_row_delim: string
:param expand_tabs: Apply expandtabs() to each line
:type expand_tabs: bool
"""
r = []
columns = []
if footer is not None:
rx_footer = re.compile(footer)
for line in s.splitlines():
if expand_tabs:
# Replace tabs with spaces with step 8
line = line.expandtabs()
if not line.strip() and footer is None:
columns = []
continue
if footer is not None and rx_footer.search(line):
break # Footer reached, stop
if not columns and rx_header_start.match(line):
# Column delimiters found. try to determine column's width
columns = []
x = 0
while line:
match = rx_col.match(line)
if not match:
break
spaces = len(match.group(1))
dashes = len(match.group(2))
columns += [(x + spaces, x + spaces + dashes)]
x += match.end()
line = line[match.end() :]
if max_width and columns[-1][-1] < max_width:
columns[-1] = (columns[-1][0], max_width)
if expand_columns:
columns = [(cc[0], nc[0] - 1) for cc, nc in zip(columns, columns[1:])] + [
columns[-1]
]
elif columns: # Fetch cells
if allow_extend:
# Find which spaces between column not empty
ll = len(line)
for i, (f, t) in enumerate(columns):
if t < ll and line[t].strip():
# If spaces not empty - shift column width equal size row
shift = len(line[f:].split()[0]) - (t - f)
# Enlarge column
columns[i] = (f, t + shift)
# Shift rest
columns[i + 1 :] = [(v[0] + shift, v[1] + shift) for v in columns[i + 1 :]]
break
if allow_wrap:
row = [line[f:t] for f, t in columns]
if r and not row[0].strip():
# first column is empty
for i, x in enumerate(row):
if (
x.strip()
and not r[-1][i].endswith(n_row_delim)
and not x.startswith(n_row_delim)
):
r[-1][i] += "%s%s" % (n_row_delim, x)
else:
r[-1][i] += x
else:
r += [row]
else:
r += [[line[f:t].strip() for f, t in columns]]
if allow_wrap:
return [[x.strip() for x in rr] for rr in r]
else:
return r
#
# Convert HTML to plain text
#
rx_html_tags = re.compile("</?[^>+]+>", re.MULTILINE | re.DOTALL)
def strip_html_tags(s):
t = rx_html_tags.sub("", s)
for k, v in [("&nbsp;", " "), ("&lt;", "<"), ("&gt;", ">"), ("&amp;", "&")]:
t = t.replace(k, v)
return t
#
# Convert XML to list of elements
#
def xml_to_table(s, root, row):
# pylint: disable=line-too-long
"""
>>> xml_to_table('<?xml version="1.0" encoding="UTF-8" ?><response><action><row><a>1</a><b>2</b></row><row><a>3</a><b>4</b></row></action></response>','action','row') # noqa
[{'a': '1', 'b': '2'}, {'a': '3', 'b': '4'}]
"""
# Detect root element
match = re.search(r"<%s>(.*)</%s>" % (root, root), s, re.DOTALL | re.IGNORECASE)
if not match:
return []
s = match.group(1)
row_re = re.compile(r"<%s>(.*?)</%s>" % (row, row), re.DOTALL | re.IGNORECASE)
item_re = re.compile(r"<([^\]+])>(.*?)</\1>", re.DOTALL | re.IGNORECASE)
r = []
for m in [x for x in row_re.split(s) if x]:
data = item_re.findall(m)
if data:
r += [dict(data)]
return r
#
# Convert list of values to string of ranges
#
def list_to_ranges(s):
"""
>>> list_to_ranges([])
''
>>> list_to_ranges([1])
'1'
>>> list_to_ranges([1,2])
'1-2'
>>> list_to_ranges([1,2,3])
'1-3'
>>> list_to_ranges([1,2,3,5])
'1-3,5'
>>> list_to_ranges([1,2,3,5,6,7])
'1-3,5-7'
>>> list_to_ranges(range(1,4001))
'1-4000'
"""
def f():
if last_start == last_end:
return str(last_start)
else:
return "%d-%d" % (last_start, last_end)
last_start = None
last_end = None
r = []
for i in sorted(s):
if last_end is not None and i == last_end + 1:
last_end += 1
else:
if last_start is not None:
r += [f()]
last_start = i
last_end = i
if last_start is not None:
r += [f()]
return ",".join(r)
#
# Convert range string to a list of integers
#
rx_range = re.compile(r"^(\d+)\s*-\s*(\d+)$")
def ranges_to_list(s, splitter=","):
"""
>>> ranges_to_list("1")
[1]
>>> ranges_to_list("1, 2")
[1, 2]
>>> ranges_to_list("1, 10-12")
[1, 10, 11, 12]
>>> ranges_to_list("1, 10-12, 15, 17-19")
[1, 10, 11, 12, 15, 17, 18, 19]
"""
r = []
if "to" in s:
s = s.replace(" to ", "-")
for p in s.split(splitter):
p = p.strip()
try:
r += [int(p)]
continue
except ValueError:
pass
match = rx_range.match(p)
if not match:
raise SyntaxError
f, t = [int(x) for x in match.groups()]
if f >= t:
raise SyntaxError
for i in range(f, t + 1):
r += [i]
return sorted(r)
#
# Replace regular expression group with pattern
#
def replace_re_group(expr, group, pattern):
"""
>>> replace_re_group("nothing","(?P<groupname>","groupvalue")
'nothing'
>>> replace_re_group("the (?P<groupname>simple) test","(?P<groupname>","groupvalue")
'the groupvalue test'
>>> replace_re_group("the (?P<groupname> nested (test)>)","(?P<groupname>","groupvalue")
'the groupvalue'
"""
r = ""
lg = len(group)
while expr:
idx = expr.find(group)
if idx == -1:
return r + expr # No more groups found
r += expr[:idx]
expr = expr[idx + lg :]
level = 1 # Level of parenthesis nesting
while expr:
c = expr[0]
expr = expr[1:]
if c == "\\":
# Skip quoted character
expr = expr[1:]
continue
elif c == "(":
# Increase nesting level
level += 1
continue
elif c == ")":
# Decrease nesting level
level -= 1
if level == 0:
# Replace with pattern and search for next
r += pattern
break
return r + expr
def indent(text, n=4):
"""
Indent each line of text with spaces
:param text: text
:param n: amount of spaces to ident
>>> indent("")
''
>>> indent("the quick brown fox\\njumped over an lazy dog\\nend")
' the quick brown fox\\n jumped over an lazy dog\\n end'
"""
if not text:
return ""
i = " " * n
return i + text.replace("\n", "\n" + i)
def split_alnum(s):
"""
Split line to a sequence of iterating alpha and digit strings
:param s:
:type s: str
:return: list
:rtype: list
>>> split_alnum("Fa 0/1")
['Fa ', 0, '/', 1]
>>> split_alnum("Fa 0/1.15")
['Fa ', 0, '/', 1, '.', 15]
>>> split_alnum("ge-1/0/1")
['ge-', 1, '/', 0, '/', 1]
>>> split_alnum("ge-1/0/1.15")
['ge-', 1, '/', 0, '/', 1, '.', 15]
"""
def convert(x):
try:
return int(x)
except ValueError:
return x
r = []
digit = None
for c in s:
d = c.isdigit()
if d != digit:
digit = d
r += [c]
else:
r[-1] += c
return [convert(x) for x in r]
rx_notspace = re.compile(r"^\S+")
def find_indented(s):
"""
Parses following text structure:
section 1 header
line 1
line 2
section 2 header
line 1
line 2
>>> find_idented("section0\\nsection 1\\n line 1-1\\n line 1-2\\n\\n"\
"section 2\\n line 2-1\\n line 2-2")
['section 1\n line 1-1\n line 1-2', 'section 2\n line 2-1\n line 2-2']
:param s:
:return:
"""
r = []
cr = []
for l in s.splitlines():
if rx_notspace.match(l):
if len(cr) > 1:
r += ["\n".join(cr)]
cr = [l]
continue
elif l:
cr += [l]
if len(cr) > 1:
r += ["\n".join(cr)]
return r
def parse_kv(kmap, data, sep=":"):
"""
:param kmap: text -> dict mapping
:param data:
:return: dict
"""
r = {}
for line in data.splitlines():