Source code for sdt.io.fs

# SPDX-FileCopyrightText: 2020 Lukas Schrangl <lukas.schrangl@tuwien.ac.at>
#
# SPDX-License-Identifier: BSD-3-Clause

"""Some functions related to files and the file system"""
import contextlib
from pathlib import Path
import os
import re
from typing import Dict, List, Tuple, Union


[docs]@contextlib.contextmanager def chdir(path): """Context manager to temporarily change the working directory Parameters ---------- path : str Path of the directory to change to. :py:func:`os.path.expanduser` is called on this. Examples -------- >>> with chdir("subdir"): ... # here the working directory is "subdir" >>> # here we are back """ old_wd = os.getcwd() os.chdir(os.path.expanduser(str(path))) try: yield finally: os.chdir(old_wd)
def _conv_value(v: str) -> Union[int, float, str]: """Trey converting value to int, then float, otherwise return unchanged Helper function for :py:func:`get_files` Parameters ---------- v Value for attempted type conversion Returns ------- Value as int, if possible, otherwise as float, if possible, otherwise unchanged as string. """ for conv in int, float: try: v = conv(v) except ValueError: continue else: break return v
[docs]def get_files(pattern: str, subdir: Union[str, Path] = Path(), id_dict: bool = False ) -> Tuple[List[str], Union[List[Tuple], List[Dict]]]: r"""Get all files matching a regular expression Parameters ---------- pattern Regular expression to search in the file name. Search is performed on the path relative to `subdir`. One can also define groups (using parenthesis), which will be returned in addition to the matching file names. **A note to Windows users: Use a forward slash (/) for subdirectories.** subdir Any regular expression matching will be performed relative to `subdir`. id_dict If `True`, return IDs as a dict. Only works for named groups in `pattern`. Returns ------- Sorted list of file where `pattern` could be matched. as well as values of the groups defined in the `pattern`. Values are converted to int if possible, otherwise conversion to float is attempted. If that fails as well, the string is used. Examples -------- >>> names, ids = get_files(r"^image_(.*)_(\d{3}).tif$", "subdir") >>> names ['image_xxx_001.tif', 'image_xxx_002.tif', 'image_yyy_003.tif'] >>> ids [('xxx', 1), ('xxx', 2), ('yyy', 3)] """ r = re.compile(pattern) flist = [] idlist = [] for dp, dn, fn in os.walk(subdir): reldir = Path(dp).relative_to(subdir) for f in fn: relp = (reldir / f).as_posix() m = r.search(relp) if m is None: continue # For compatibility, append path as string. # However, one could simply append reldir / f flist.append(relp) if id_dict: ids = {k: _conv_value(v) for k, v in m.groupdict().items()} else: ids = tuple(_conv_value(v) for v in m.groups()) idlist.append(ids) slist = sorted(zip(flist, idlist), key=lambda x: x[0]) return [s[0] for s in slist], [s[1] for s in slist]