Source code for isogeotodocx.isogeo2docx

# -*- coding: UTF-8 -*-
#! python3  # noqa: E265

"""
    Get metadatas from Isogeo and dump each into a Word document.

"""

# ##############################################################################
# ########## Libraries #############
# ##################################

# Standard library
import logging
from datetime import datetime
from pathlib import Path

# 3rd party library
from docxtpl import DocxTemplate, InlineImage, etree
from isogeo_pysdk import Event, IsogeoTranslator, IsogeoUtils, Metadata, Share

# custom submodules
from isogeotodocx.utils import Formatter

# ##############################################################################
# ############ Globals ############
# #################################

logger = logging.getLogger("isogeo2office")
utils = IsogeoUtils()

# ##############################################################################
# ########## Classes ###############
# ##################################


[docs]class Isogeo2docx(object): """IsogeoToDocx class. :param str lang: selected language for output :param dict thumbnails: dictionary of metadatas associated to an image path :param str url_base_edit: base url to format edit links (basically app.isogeo.com) :param str url_base_view: base url to format view links (basically open.isogeo.com) """ def __init__( self, lang="FR", thumbnails: dict = None, url_base_edit: str = "https://app.isogeo.com", url_base_view: str = "https://open.isogeo.com", ): """Processing matching between Isogeo metadata and a Miscrosoft Word template.""" super(Isogeo2docx, self).__init__() # ------------ VARIABLES --------------------- # LOCALE if lang.lower() == "fr": self.dates_fmt = "%d/%m/%Y" self.datetimes_fmt = "%A %d %B %Y (%Hh%M)" self.locale_fmt = "fr_FR" else: self.dates_fmt = "%d/%m/%Y" self.datetimes_fmt = "%a %d %B %Y (%Hh%M)" self.locale_fmt = "uk_UK" # TRANSLATIONS self.isogeo_tr = IsogeoTranslator(lang).tr # FORMATTER self.fmt = Formatter() # THUMBNAILS if thumbnails is not None and isinstance(thumbnails, dict): self.thumbnails = thumbnails else: self.thumbnails = {} logger.debug("No valid thumbnails matching table passed.") # URLS utils.app_url = url_base_edit # APP utils.oc_url = url_base_view # OpenCatalog url
[docs] def md2docx(self, docx_template: DocxTemplate, md: Metadata, share: Share = None): """Dump Isogeo metadata into a docx template. :param DocxTemplate docx_template: Word template to fill :param Metadata metadata: metadata to dumpinto the template :param Share share: share in which the metadata is. Used to build the view URL. """ logger.debug( "Starting the export into Word .docx of {} ({})".format( md.title_or_name(slugged=1), md._id ) ) # template context starting with metadata attributes which do not require any special formatting context = { # IDENTIFICATION "varType": self.isogeo_tr("formatTypes", md.type), "varTitle": self.fmt.clean_xml(md.title), "varAbstract": self.fmt.clean_xml(md.abstract), "varNameTech": self.fmt.clean_xml(md.name), "varOwner": md.groupName, "varPath": self.fmt.clean_xml(md.path), # QUALITY "varTopologyInfo": self.fmt.clean_xml(md.topologicalConsistency), # HISTORY "varCollectContext": self.fmt.clean_xml(md.collectionContext), "varCollectMethod": self.fmt.clean_xml(md.collectionMethod), "varValidityComment": self.fmt.clean_xml(md.validityComment), # GEOGRAPHY "varEncoding": self.fmt.clean_xml(md.encoding), "varScale": self.fmt.clean_xml(md.scale), "varGeometry": self.fmt.clean_xml(md.geometry), "varObjectsCount": self.fmt.clean_xml(md.features), # METADATA "varMdDtCrea": utils.hlpr_datetimes(md._created).strftime( self.datetimes_fmt ), "varMdDtUpda": utils.hlpr_datetimes(md._modified).strftime( self.datetimes_fmt ), "varMdDtExp": datetime.now().strftime(self.datetimes_fmt), } # TAGS # # extracting & parsing tags li_motscles = [] li_theminspire = [] # default values context["varInspireConformity"] = self.isogeo_tr("quality", "isNotConform") # looping on tags for tag in md.tags.keys(): # free keywords if tag.startswith("keyword:isogeo"): li_motscles.append(md.tags.get(tag)) continue # INSPIRE themes if tag.startswith("keyword:inspire-theme"): li_theminspire.append(md.tags.get(tag)) continue # coordinate system if tag.startswith("coordinate-system"): context["varSRS"] = md.tags.get(tag) continue # format if tag.startswith("format"): context["varFormat"] = md.tags.get(tag) if md.formatVersion: context["varFormat"] += " " + md.formatVersion continue # INSPIRE conformity if tag.startswith("conformity:inspire"): context["varInspireConformity"] = self.isogeo_tr("quality", "isConform") continue # add tags to the template context context["varKeywords"] = " ; ".join(li_motscles) context["varKeywordsCount"] = len(li_motscles) context["varInspireTheme"] = " ; ".join(li_theminspire) # formatting links to visualize on OpenCatalog and edit on APP if share is not None: context["varViewOC"] = utils.get_view_url( md_id=md._id, share_id=share._id, share_token=share.urlToken ) else: logger.debug( "No OpenCatalog URL for metadata: {} ({})".format( md.title_or_name(), md._id ) ) # link to APP context["varEditAPP"] = utils.get_edit_url(md) # ---- CONTACTS # ---------------------------------------------------- contacts_out = [] if md.contacts: # formatting contacts for ct_in in md.contacts: ct = {} # translate contact role ct["role"] = self.isogeo_tr("roles", ct_in.get("role")) # ensure other contacts fields ct["name"] = ct_in.get("contact").get("name", "NR") ct["organization"] = ct_in.get("contact").get("organization", "") ct["email"] = ct_in.get("contact").get("email", "") ct["phone"] = ct_in.get("contact").get("phone", "") ct["fax"] = ct_in.get("contact").get("fax", "") ct["addressLine1"] = ct_in.get("contact").get("addressLine1", "") ct["addressLine2"] = ct_in.get("contact").get("addressLine2", "") ct["zipCode"] = ct_in.get("contact").get("zipCode", "") ct["city"] = ct_in.get("contact").get("city", "") ct["countryCode"] = ct_in.get("contact").get("countryCode", "") # store into the final list contacts_out.append(ct) # add it to final context context["varContactsCount"] = len(contacts_out) context["varContactsDetails"] = contacts_out # ---- ATTRIBUTES -------------------------------------------------- fields_out = [] if md.type == "vectorDataset" and isinstance(md.featureAttributes, list): for f_in in md.featureAttributes: field = {} # ensure other fields field["name"] = self.fmt.clean_xml(f_in.get("name", "")) field["alias"] = self.fmt.clean_xml(f_in.get("alias", "")) field["description"] = self.fmt.clean_xml(f_in.get("description", "")) field["dataType"] = f_in.get("dataType", "") field["language"] = f_in.get("language", "") # store into the final list fields_out.append(field) # add to the final context context["varFieldsCount"] = len(fields_out) context["varFields"] = fields_out # ---- EVENTS ------------------------------------------------------ events_out = [] if md.events: for e in md.events: evt = Event(**e) # pop creation events (already in the export document) if evt.kind == "creation": continue # prevent invalid character for XML formatting in description evt.description = self.fmt.clean_xml(evt.description) # make data human readable evt.date = utils.hlpr_datetimes(evt.date).strftime(self.dates_fmt) # translate event kind # evt.kind = self.isogeo_tr("events", evt.kind) # append events_out.append(evt.to_dict()) # add to the final context context["varEventsCount"] = len(events_out) context["varEvents"] = events_out # ---- HISTORY # ----------------------------------------------------- # data events if md.created: context["varDataDtCrea"] = utils.hlpr_datetimes(md.created).strftime( self.dates_fmt ) if md.modified: context["varDataDtUpda"] = utils.hlpr_datetimes(md.modified).strftime( self.dates_fmt ) if md.published: context["varDataDtPubl"] = utils.hlpr_datetimes(md.published).strftime( self.dates_fmt ) # validity if md.validFrom: context["varValidityStart"] = utils.hlpr_datetimes(md.validFrom).strftime( self.dates_fmt ) # end validity date if md.validTo: context["varValidityEnd"] = utils.hlpr_datetimes(md.validTo).strftime( self.dates_fmt ) # ---- SPECIFICATIONS # ----------------------------------------------- if md.specifications: context["varSpecifications"] = self.fmt.specifications( md_specifications=md.specifications ) # ---- CGUs # -------------------------------------------------------- if md.conditions: context["varConditions"] = self.fmt.conditions(md_conditions=md.conditions) # ---- LIMITATIONS # ------------------------------------------------- if md.limitations: context["varLimitations"] = self.fmt.limitations( md_limitations=md.limitations ) # -- THUMBNAIL ----------------------------------------------------------------- if md._id in self.thumbnails and Path(self.thumbnails.get(md._id)).is_file(): thumbnail = str(Path(self.thumbnails.get(md._id)).resolve()) context["varThumbnail"] = InlineImage(docx_template, thumbnail) logger.info( "Thumbnail found for {}: {}".format(md.title_or_name(1), thumbnail) ) # fillfull file try: docx_template.render(context, autoescape=True) logger.info( "Vector metadata stored: {} ({})".format( md.title_or_name(slugged=1), md._id ) ) except etree.XMLSyntaxError as e: logger.error( "Invalid character in XML: {}. " "Any special character (<, <, &...)? Check: {}".format( e, context.get("varEditAPP") ) ) except (UnicodeEncodeError, UnicodeDecodeError) as e: logger.error( "Encoding error: {}. " "Any special character (<, <, &...)? Check: {}".format( e, context.get("varEditAPP") ) ) except Exception as e: logger.error( "Unexpected error: {}. Check: {}".format(e, context.get("varEditAPP")) ) # end of function return
# ############################################################################### # ###### Stand alone program ######## # ################################### if __name__ == "__main__": """ Standalone execution and basic tests """ # ------------ Specific imports ---------------- from csv import DictReader from dotenv import load_dotenv from logging.handlers import RotatingFileHandler from os import environ import urllib3 from isogeo_pysdk import Isogeo # ------------ Log & debug ---------------- logger = logging.getLogger() logging.captureWarnings(True) logger.setLevel(logging.DEBUG) # logger.setLevel(logging.INFO) log_format = logging.Formatter( "%(asctime)s || %(levelname)s " "|| %(module)s - %(lineno)d ||" " %(funcName)s || %(message)s" ) # debug to the file log_file_handler = RotatingFileHandler("dev_debug.log", "a", 3000000, 1) log_file_handler.setLevel(logging.DEBUG) log_file_handler.setFormatter(log_format) # info to the console log_console_handler = logging.StreamHandler() log_console_handler.setLevel(logging.INFO) log_console_handler.setFormatter(log_format) logger.addHandler(log_file_handler) logger.addHandler(log_console_handler) # ------------ Real start ---------------- # get user ID as environment variables load_dotenv("dev.env") # misc METADATA_TEST_FIXTURE_UUID = environ.get("ISOGEO_FIXTURES_METADATA_COMPLETE") WORKGROUP_TEST_FIXTURE_UUID = environ.get("ISOGEO_WORKGROUP_TEST_UUID") # ignore warnings related to the QA self-signed cert if environ.get("ISOGEO_PLATFORM").lower() == "qa": urllib3.disable_warnings() # for oAuth2 Backend (Client Credentials Grant) Flow isogeo = Isogeo( auth_mode="group", client_id=environ.get("ISOGEO_API_GROUP_CLIENT_ID"), client_secret=environ.get("ISOGEO_API_GROUP_CLIENT_SECRET"), auto_refresh_url="{}/oauth/token".format(environ.get("ISOGEO_ID_URL")), platform=environ.get("ISOGEO_PLATFORM", "qa"), ) # getting a token isogeo.connect() # ------------ Isogeo search -------------------------- search_results = isogeo.search( include="all", specific_md=( "70f1192f67ac43e5987800ead18effb2", "b140d9a92c20416d97c3cdc12dc12607", ), ) isogeo.close() # close session # ------------ REAL START ---------------------------- # output folder Path("_output/").mkdir(exist_ok=True) # template template_path = Path("tests/fixtures/template_Isogeo.docx") assert template_path.is_file() # thumbnails table thumbnails_table_csv_path = Path("tests/fixtures/thumbnails.csv") assert thumbnails_table_csv_path.is_file() # CSV structure csv_headers = ["isogeo_uuid", "isogeo_title_slugged", "img_abs_path"] thumbnails_dict = {} with thumbnails_table_csv_path.open("r", newline="") as csv_thumbnails: reader = DictReader(csv_thumbnails, fieldnames=csv_headers) next(reader, None) # skip header line for row in reader: thumbnails_dict[row.get("isogeo_uuid")] = row.get("img_abs_path") # instanciate toDocx = Isogeo2docx(thumbnails=thumbnails_dict) # parse results and export it for md in search_results.results: # load metadata as object metadata = Metadata.clean_attributes(md) # prepare the template tpl = DocxTemplate(template_path.resolve()) # fill the template toDocx.md2docx(docx_template=tpl, md=metadata) # filename md_name = metadata.title_or_name(slugged=1) uuid = "{}".format(metadata._id[:5]) out_docx_filename = "_output/{}_{}.docx".format(md_name, uuid) # save it tpl.save(out_docx_filename) # delete template object del tpl