Source code for fogdb.raw.smb

# src/fogdb/raw/smb.py
"""Module a handler class for  mapping locally stored raw data."""
import os
import tempfile
import urllib

from smb.SMBHandler import SMBHandler

from . import BaseHandler
from .reader import parser

# pragma: no cover
# on pylint:
# pylint: disable=too-many-arguments
# 3 argumetns passed on to parent class. 7-3 = 4 ;)
# honestly, this is an api interface so imho, 7 args is ok.
# on pytest:
# this module was tested successfully on my local machine.
# Until now however, i have not figured out, how to set up a samba network drive
# on a CI-Environment like github.
# Help would be greatly appreciated.


[docs]class Handler(BaseHandler): # pragma: no cover """Handle data mapping for locally stored raw data. Parameters ---------- connection `pysmb ConnectionClass instance <https://pysmb.readthedocs.io/en/latest/api/smb_SMBConnection.html>`_ providing the samba shared network connection under the label of :paramref:`sharename`. This is usually something like:: from smb.SMBConnection import SMBConnection conn = SMBConnection( username="MyUserName", password="MyPassword", my_name="", remote_name="192.168.178.1", ) sharename: str String labeling the samba shared network service, provided by the :paramref:`connection`. For a ``Fritz!Box``, this usually is:: "fritz.nas" top_level_folder: str String specifying the name of the toplevel folder where the raw data is found. categories: str, list, default="all" String or list of strings specifying which categories (i.e. sublevel folders) are used for reading in the data. If ``"all"`` is used, all sublevel folders are traversed. Can be something like ``"crawford"``, ``"jacke"``, ``"myRand0mSUBf0lder"``, ... dtype: str, default="txt" String specifying the data type of the raw datafiles. If ``"all"`` is used, data type is not filtered. Can be something like ``"rst"``, ``"cfg"``, ... excl_dirs: ~collections.abc.Container Container of strings specifying folder names to excluded during the mapping. """ def __init__( self, connection, sharename, top_level_folder, categories="all", dtype="txt", excl_dirs=("FRITZ",), ): self.connection = connection self.sharename = sharename self.tlf = top_level_folder super().__init__( categories=categories, dtype=dtype, excl_dirs=excl_dirs, ) # pylint: disable=too-many-locals # imho constructing the smb-string is way better this way # so im gonna be a little liberal with the amount of locals in this one
[docs] def map_source_file_data(self, relative_file_path): """Return source file data mappings.""" uname = self.connection.username pwd = self.connection.password host = self.connection.remote_name sharename = self.sharename top = self.tlf path = relative_file_path conn_string = f"smb://{uname}:{pwd}@{host}/{sharename}/{top}/{path}" opener = urllib.request.build_opener(SMBHandler) file_handler = opener.open(conn_string) data = file_handler.read() file_handler.close() with tempfile.NamedTemporaryFile() as tmp: # Open the file for writing. with open(tmp.name, "w", encoding="utf-8") as fle: fle.write(data.decode("utf-8")) # infer file type dtype = self._infer_file_type(relative_file_path) parsed_data = parser[dtype](tmp.name) return parsed_data
# pylint: enable=too-many-locals def _list_top_level_folders(self, relative_file_path): """Return alphabetically sorted list of folder names found in path.""" _excl = [".", "..", *self.exclude] folders = [ name.filename for name in self.connection.listPath( self.sharename, os.path.join(self.tlf, relative_file_path) ) if name.isDirectory and name.filename not in _excl ] sorted_folders = list(sorted(folders)) return sorted_folders def _list_files(self, relative_file_path): """Return alphabetically sorted list of file names found in path.""" found_files = [ found.filename for found in self.connection.listPath( self.sharename, os.path.join(self.tlf, relative_file_path) ) if not found.isDirectory ] return self._sort_and_filter_file_list(found_files)
# pylint: enable=too-many-arguments