Commit 8ba1686e authored by Andrey Vertiprahov's avatar Andrey Vertiprahov
Browse files

Merge branch 'vak-1598-fias' into 'master'

#1598 moving Street, Address, Building extractors to fias module

See merge request noc/noc!5469
parents 577c99f9 8848e435
# ----------------------------------------------------------------------
# FIAS Address Extractor
# ----------------------------------------------------------------------
# Copyright (C) 2021 The NOC Project
# See LICENSE for details
# ----------------------------------------------------------------------
# python modules
import dbf
import requests
import os
import re
from datetime import datetime
from pathlib import Path
from zipfile import ZipFile, is_zipfile
# NOC modules
from .base import BaseExtractor
from ..models.address import Address
from noc.core.etl.remotesystem.base import BaseRemoteSystem
class AddressRemoteSystem(BaseRemoteSystem):
"""
base class
Configuration variables
*FIAS_URL* - url of source FIAS data
*CACHE_PATH* - dir target download files
*REGION* - region code
"""
@AddressRemoteSystem.extractor
class AddressExtractor(BaseExtractor):
"""
Address extractor.
"""
name = "address"
model = Address
def __init__(self, system, *args, **kwargs):
super(AddressExtractor, self).__init__(system)
self.fias_url = str(self.config.get("FIAS_URL"))
self.cache_path = str(self.config.get("CACHE_PATH"))
self.region = str(self.config.get("REGION"))
self.check_path(self.cache_path)
self.zip_path = os.path.join(self.cache_path, "fias_dbf.zip")
self.dbf_file = f"HOUSE{self.region}.DBF"
self.dbf_path = os.path.join(self.cache_path, self.dbf_file)
self.now = datetime.now().date()
def check_path(self, path):
# check exists cache_path
dirpath = Path(path)
if not dirpath.exists() or not dirpath.is_dir():
os.makedirs(path)
def download(self):
if (
not os.path.isfile(self.zip_path)
or datetime.fromtimestamp(os.path.getctime(self.zip_path)).date()
!= datetime.now().date()
):
r = requests.get(self.fias_url, stream=True)
if r.status_code == 200:
with open(self.zip_path, "wb") as f:
for chunk in r.iter_content(1024):
f.write(chunk)
if is_zipfile(self.zip_path):
with ZipFile(self.zip_path, "r") as f:
f.extract(self.dbf_file_house, self.cache_path)
f.extract(self.dbf_file_address, self.cache_path)
else:
raise Exception("zipfile not found!")
def extract(self):
super().extract()
return
def num_letter(self, num_letter):
found = re.search(r"^\d+", num_letter.rstrip())
if found:
num = found.group()
letter = num_letter.rstrip()[len(num) :]
return num, letter
else:
return None, None
def iter_data(self):
self.download()
with dbf.Table(filename=self.dbf_path, codepage="cp866") as table:
for r in table:
num, letter = self.num_letter(r.HOUSENUM)
if r.ENDDATE >= self.now and num:
yield r.HOUSEID, r.HOUSEGUID, r.AOGUID, num, letter
# ----------------------------------------------------------------------
# FIAS Building Extractor
# ----------------------------------------------------------------------
# Copyright (C) 2021 The NOC Project
# See LICENSE for details
# ----------------------------------------------------------------------
# python modules
import dbf
import requests
import os
from datetime import datetime
from pathlib import Path
from zipfile import ZipFile, is_zipfile
# NOC modules
from .base import BaseExtractor
from ..models.building import Building
from noc.core.etl.remotesystem.base import BaseRemoteSystem
class BuildingRemoteSystem(BaseRemoteSystem):
"""
base class
Configuration variables
*FIAS_URL* - url of source FIAS data
*CACHE_PATH* - dir target download files
*REGION* - region code
"""
@BuildingRemoteSystem.extractor
class BuildingExtractor(BaseExtractor):
"""
Building extractor.
"""
name = "building"
model = Building
def __init__(self, system, *args, **kwargs):
super(BuildingExtractor, self).__init__(system)
self.fias_url = str(self.config.get("FIAS_URL"))
self.cache_path = str(self.config.get("CACHE_PATH"))
self.region = str(self.config.get("REGION"))
self.check_path(self.cache_path)
self.zip_path = os.path.join(self.cache_path, "fias_dbf.zip")
self.dbf_file_house = f"HOUSE{self.region}.DBF"
self.dbf_file_address = f"ADDROB{self.region}.DBF"
self.dbf_path_house = os.path.join(self.cache_path, self.dbf_file_house)
self.dbf_path_address = os.path.join(self.cache_path, self.dbf_file_address)
self.now = datetime.now().date()
def check_path(self, path):
# check exists cache_path
dirpath = Path(path)
if not dirpath.exists() or not dirpath.is_dir():
os.makedirs(path)
def download(self):
if (
not os.path.isfile(self.zip_path)
or datetime.fromtimestamp(os.path.getctime(self.zip_path)).date()
!= datetime.now().date()
):
r = requests.get(self.fias_url, stream=True)
if r.status_code == 200:
with open(self.zip_path, "wb") as f:
for chunk in r.iter_content(1024):
f.write(chunk)
if is_zipfile(self.zip_path):
with ZipFile(self.zip_path, "r") as f:
f.extract(self.dbf_file_house, self.cache_path)
f.extract(self.dbf_file_address, self.cache_path)
else:
raise Exception("zipfile not found!")
def extract(self):
super().extract()
return
def get_oktmo_data(self):
oktmo_data = {}
with dbf.Table(filename=self.dbf_path_address, codepage="cp866") as table:
for r in table:
if r.AOLEVEL in [7, 4, 35] and r.NEXTID == " " * 36 and r.OKTMO != " " * 11:
oktmo = r.OKTMO.rstrip().zfill(11)
oktmo_data[r.AOGUID] = oktmo
return oktmo_data
def iter_data(self):
self.download()
oktmo_data = self.get_oktmo_data()
with dbf.Table(filename=self.dbf_path_house, codepage="cp866") as table:
for r in table:
oktmo = oktmo_data.get(r.AOGUID)
if r.ENDDATE >= self.now and oktmo:
yield r.HOUSEGUID, oktmo, r.POSTALCODE, r.STARTDATE, r.ENDDATE
# ----------------------------------------------------------------------
# FIAS Street Extractor
# FIAS Extractor
# ----------------------------------------------------------------------
# Copyright (C) 2021 The NOC Project
# See LICENSE for details
......@@ -9,6 +9,7 @@
import dbf
import requests
import os
import re
from datetime import datetime
from pathlib import Path
from zipfile import ZipFile, is_zipfile
......@@ -16,10 +17,12 @@ from zipfile import ZipFile, is_zipfile
# NOC modules
from .base import BaseExtractor
from ..models.street import Street
from ..models.address import Address
from ..models.building import Building
from noc.core.etl.remotesystem.base import BaseRemoteSystem
class StreetRemoteSystem(BaseRemoteSystem):
class FiasRemoteSystem(BaseRemoteSystem):
"""
base class
......@@ -30,7 +33,7 @@ class StreetRemoteSystem(BaseRemoteSystem):
"""
@StreetRemoteSystem.extractor
@FiasRemoteSystem.extractor
class StreetExtractor(BaseExtractor):
"""
Street extractor.
......@@ -137,3 +140,139 @@ class StreetExtractor(BaseExtractor):
yield r.AOGUID, parent.OKTMO, r.FORMALNAME.rstrip(), r.SHORTNAME.rstrip(), r.STARTDATE, r.ENDDATE
cities.close()
streets.close()
@FiasRemoteSystem.extractor
class AddressExtractor(BaseExtractor):
"""
Address extractor.
"""
name = "address"
model = Address
def __init__(self, system, *args, **kwargs):
super(AddressExtractor, self).__init__(system)
self.fias_url = str(self.config.get("FIAS_URL"))
self.cache_path = str(self.config.get("CACHE_PATH"))
self.region = str(self.config.get("REGION"))
self.check_path(self.cache_path)
self.zip_path = os.path.join(self.cache_path, "fias_dbf.zip")
self.dbf_file = f"HOUSE{self.region}.DBF"
self.dbf_path = os.path.join(self.cache_path, self.dbf_file)
self.now = datetime.now().date()
def check_path(self, path):
# check exists cache_path
dirpath = Path(path)
if not dirpath.exists() or not dirpath.is_dir():
os.makedirs(path)
def download(self):
if (
not os.path.isfile(self.zip_path)
or datetime.fromtimestamp(os.path.getctime(self.zip_path)).date()
!= datetime.now().date()
):
r = requests.get(self.fias_url, stream=True)
if r.status_code == 200:
with open(self.zip_path, "wb") as f:
for chunk in r.iter_content(1024):
f.write(chunk)
if is_zipfile(self.zip_path):
with ZipFile(self.zip_path, "r") as f:
f.extract(self.dbf_file_house, self.cache_path)
f.extract(self.dbf_file_address, self.cache_path)
else:
raise Exception("zipfile not found!")
def extract(self):
super().extract()
return
def num_letter(self, num_letter):
found = re.search(r"^\d+", num_letter.rstrip())
if found:
num = found.group()
letter = num_letter.rstrip()[len(num) :]
return num, letter
else:
return None, None
def iter_data(self):
self.download()
with dbf.Table(filename=self.dbf_path, codepage="cp866") as table:
for r in table:
num, letter = self.num_letter(r.HOUSENUM)
if r.ENDDATE >= self.now and num:
yield r.HOUSEID, r.HOUSEGUID, r.AOGUID, num, letter
@FiasRemoteSystem.extractor
class BuildingExtractor(BaseExtractor):
"""
Building extractor.
"""
name = "building"
model = Building
def __init__(self, system, *args, **kwargs):
super(BuildingExtractor, self).__init__(system)
self.fias_url = str(self.config.get("FIAS_URL"))
self.cache_path = str(self.config.get("CACHE_PATH"))
self.region = str(self.config.get("REGION"))
self.check_path(self.cache_path)
self.zip_path = os.path.join(self.cache_path, "fias_dbf.zip")
self.dbf_file_house = f"HOUSE{self.region}.DBF"
self.dbf_file_address = f"ADDROB{self.region}.DBF"
self.dbf_path_house = os.path.join(self.cache_path, self.dbf_file_house)
self.dbf_path_address = os.path.join(self.cache_path, self.dbf_file_address)
self.now = datetime.now().date()
def check_path(self, path):
# check exists cache_path
dirpath = Path(path)
if not dirpath.exists() or not dirpath.is_dir():
os.makedirs(path)
def download(self):
if (
not os.path.isfile(self.zip_path)
or datetime.fromtimestamp(os.path.getctime(self.zip_path)).date()
!= datetime.now().date()
):
r = requests.get(self.fias_url, stream=True)
if r.status_code == 200:
with open(self.zip_path, "wb") as f:
for chunk in r.iter_content(1024):
f.write(chunk)
if is_zipfile(self.zip_path):
with ZipFile(self.zip_path, "r") as f:
f.extract(self.dbf_file_house, self.cache_path)
f.extract(self.dbf_file_address, self.cache_path)
else:
raise Exception("zipfile not found!")
def extract(self):
super().extract()
return
def get_oktmo_data(self):
oktmo_data = {}
with dbf.Table(filename=self.dbf_path_address, codepage="cp866") as table:
for r in table:
if r.AOLEVEL in [7, 4, 35] and r.NEXTID == " " * 36 and r.OKTMO != " " * 11:
oktmo = r.OKTMO.rstrip().zfill(11)
oktmo_data[r.AOGUID] = oktmo
return oktmo_data
def iter_data(self):
self.download()
oktmo_data = self.get_oktmo_data()
with dbf.Table(filename=self.dbf_path_house, codepage="cp866") as table:
for r in table:
oktmo = oktmo_data.get(r.AOGUID)
if r.ENDDATE >= self.now and oktmo:
yield r.HOUSEGUID, oktmo, r.POSTALCODE, r.STARTDATE, r.ENDDATE
......@@ -21,3 +21,5 @@ class Building(BaseModel):
postal_code: Optional[str]
start_date: Optional[date]
end_date: Optional[date]
_csv_fields = ["id", "adm_division", "postal_code", "short_name", "start_date", "end_date"]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment