split functionality file across new module as was getting too long on its own. All functions remian the same, but imports will need to be updated.
This commit is contained in:
@@ -1,3 +0,0 @@
|
||||
|
||||
from core.correctness.validation import *
|
||||
from core.correctness.vars import *
|
||||
@@ -8,7 +8,7 @@ Author(s): David Marchant
|
||||
from datetime import datetime
|
||||
from inspect import signature
|
||||
from os.path import sep, exists, isfile, isdir, dirname
|
||||
from typing import Any, _SpecialForm, Union, Tuple, Type, Dict, List, \
|
||||
from typing import Any, _SpecialForm, Union, Type, Dict, List, \
|
||||
get_origin, get_args
|
||||
|
||||
from core.correctness.vars import VALID_PATH_CHARS, get_not_imp_msg, \
|
||||
@@ -248,22 +248,6 @@ def valid_non_existing_path(variable:str, allow_base:bool=False):
|
||||
raise ValueError(
|
||||
f"Route to requested path '{variable}' does not exist.")
|
||||
|
||||
def setup_debugging(print:Any=None, logging:int=0)->Tuple[Any,int]:
|
||||
"""Create a place for debug messages to be sent. Always returns a place,
|
||||
along with a logging level."""
|
||||
check_type(logging, int)
|
||||
if print is None:
|
||||
return None, 0
|
||||
else:
|
||||
if not isinstance(print, object):
|
||||
raise TypeError(f"Invalid print location provided")
|
||||
writeable = getattr(print, "write", None)
|
||||
if not writeable or not callable(writeable):
|
||||
raise TypeError(f"Print object does not implement required "
|
||||
"'write' function")
|
||||
|
||||
return print, logging
|
||||
|
||||
def valid_meow_dict(meow_dict:Dict[str,Any], msg:str,
|
||||
keys:Dict[str,Type])->None:
|
||||
"""Check given dictionary expresses a meow construct. This won't do much
|
||||
|
||||
@@ -1,402 +0,0 @@
|
||||
# TODO comments
|
||||
import copy
|
||||
import hashlib
|
||||
import json
|
||||
import nbformat
|
||||
import os
|
||||
import yaml
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
|
||||
from multiprocessing.connection import Connection, wait as multi_wait
|
||||
# Need to import additional Connection type for Windows machines
|
||||
if os.name == 'nt':
|
||||
from multiprocessing.connection import PipeConnection
|
||||
from multiprocessing.queues import Queue
|
||||
from papermill.translators import papermill_translators
|
||||
from typing import Any, Dict
|
||||
from random import SystemRandom
|
||||
|
||||
from core.correctness.validation import check_type, valid_existing_file_path, \
|
||||
valid_path, check_script
|
||||
from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE, \
|
||||
VALID_CHANNELS, HASH_BUFFER_SIZE, SHA256, DEBUG_WARNING, DEBUG_INFO, \
|
||||
EVENT_TYPE, EVENT_PATH, JOB_EVENT, JOB_TYPE, JOB_ID, JOB_PATTERN, \
|
||||
JOB_RECIPE, JOB_RULE, EVENT_RULE, JOB_STATUS, STATUS_QUEUED, \
|
||||
JOB_CREATE_TIME, JOB_REQUIREMENTS, WATCHDOG_BASE, WATCHDOG_HASH, \
|
||||
EVENT_TYPE_WATCHDOG
|
||||
|
||||
# mig trigger keyword replacements
|
||||
KEYWORD_PATH = "{PATH}"
|
||||
KEYWORD_REL_PATH = "{REL_PATH}"
|
||||
KEYWORD_DIR = "{DIR}"
|
||||
KEYWORD_REL_DIR = "{REL_DIR}"
|
||||
KEYWORD_FILENAME = "{FILENAME}"
|
||||
KEYWORD_PREFIX = "{PREFIX}"
|
||||
KEYWORD_BASE = "{VGRID}"
|
||||
KEYWORD_EXTENSION = "{EXTENSION}"
|
||||
KEYWORD_JOB = "{JOB}"
|
||||
|
||||
|
||||
#TODO Make this guaranteed unique
|
||||
def generate_id(prefix:str="", length:int=16, existing_ids:List[str]=[],
|
||||
charset:str=CHAR_UPPERCASE+CHAR_LOWERCASE, attempts:int=24):
|
||||
random_length = max(length - len(prefix), 0)
|
||||
for _ in range(attempts):
|
||||
id = prefix + ''.join(SystemRandom().choice(charset)
|
||||
for _ in range(random_length))
|
||||
if id not in existing_ids:
|
||||
return id
|
||||
raise ValueError(f"Could not generate ID unique from '{existing_ids}' "
|
||||
f"using values '{charset}' and length of '{length}'.")
|
||||
|
||||
def wait(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
|
||||
if os.name == 'nt':
|
||||
return wait_windows(inputs)
|
||||
return wait_linux(inputs)
|
||||
|
||||
def wait_windows(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
|
||||
all_connections = [i for i in inputs if type(i) is Connection] \
|
||||
+ [i for i in inputs if type(i) is PipeConnection] \
|
||||
+ [i._reader for i in inputs if type(i) is Queue]
|
||||
ready = multi_wait(all_connections)
|
||||
ready_inputs = [i for i in inputs if \
|
||||
(type(i) is Connection and i in ready) \
|
||||
or (type(i) is PipeConnection and i in ready) \
|
||||
or (type(i) is Queue and i._reader in ready)]
|
||||
return ready_inputs
|
||||
|
||||
def wait_linux(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
|
||||
all_connections = [i for i in inputs if type(i) is Connection] \
|
||||
+ [i._reader for i in inputs if type(i) is Queue]
|
||||
ready = multi_wait(all_connections)
|
||||
ready_inputs = [i for i in inputs if \
|
||||
(type(i) is Connection and i in ready) \
|
||||
or (type(i) is Queue and i._reader in ready)]
|
||||
return ready_inputs
|
||||
|
||||
def _get_file_sha256(file_path):
|
||||
sha256_hash = hashlib.sha256()
|
||||
|
||||
with open(file_path, 'rb') as file_to_hash:
|
||||
while True:
|
||||
buffer = file_to_hash.read(HASH_BUFFER_SIZE)
|
||||
if not buffer:
|
||||
break
|
||||
sha256_hash.update(buffer)
|
||||
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def get_file_hash(file_path:str, hash:str, hint:str=""):
|
||||
check_type(hash, str, hint=hint)
|
||||
|
||||
valid_existing_file_path(file_path)
|
||||
|
||||
valid_hashes = {
|
||||
SHA256: _get_file_sha256
|
||||
}
|
||||
if hash not in valid_hashes:
|
||||
raise KeyError(f"Cannot use hash '{hash}'. Valid are "
|
||||
f"'{list(valid_hashes.keys())}")
|
||||
|
||||
return valid_hashes[hash](file_path)
|
||||
|
||||
def rmtree(directory:str):
|
||||
"""
|
||||
Remove a directory and all its contents.
|
||||
Should be faster than shutil.rmtree
|
||||
|
||||
:param: (str) The firectory to empty and remove
|
||||
|
||||
:return: No return
|
||||
"""
|
||||
if not os.path.exists(directory):
|
||||
return
|
||||
for root, dirs, files in os.walk(directory, topdown=False):
|
||||
for file in files:
|
||||
os.remove(os.path.join(root, file))
|
||||
for dir in dirs:
|
||||
rmtree(os.path.join(root, dir))
|
||||
os.rmdir(directory)
|
||||
|
||||
def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
|
||||
"""
|
||||
Creates a new directory at the given path.
|
||||
|
||||
:param path: (str) The directory path.
|
||||
|
||||
:param can_exist: (boolean) [optional] A toggle for if a previously
|
||||
existing directory at the path will throw an error or not. Default is
|
||||
true (e.g. no error is thrown if the path already exists)
|
||||
|
||||
:param ensure_clean: (boolean) [optional] A toggle for if a previously
|
||||
existing directory at the path will be replaced with a new emtpy directory.
|
||||
Default is False.
|
||||
|
||||
:return: No return
|
||||
"""
|
||||
if os.path.exists(path):
|
||||
if os.path.isfile(path):
|
||||
raise ValueError(
|
||||
f"Cannot make directory in {path} as it already exists and is "
|
||||
"a file")
|
||||
if ensure_clean:
|
||||
rmtree(path)
|
||||
|
||||
os.makedirs(path, exist_ok=can_exist)
|
||||
|
||||
def read_file(filepath:str):
|
||||
with open(filepath, 'r') as file:
|
||||
return file.read()
|
||||
|
||||
def read_file_lines(filepath:str):
|
||||
with open(filepath, 'r') as file:
|
||||
return file.readlines()
|
||||
|
||||
def write_file(source:str, filename:str):
|
||||
with open(filename, 'w') as file:
|
||||
file.write(source)
|
||||
|
||||
def read_yaml(filepath:str):
|
||||
"""
|
||||
Reads a file path as a yaml object.
|
||||
|
||||
:param filepath: (str) The file to read.
|
||||
|
||||
:return: (object) An object read from the file.
|
||||
"""
|
||||
with open(filepath, 'r') as yaml_file:
|
||||
return yaml.load(yaml_file, Loader=yaml.Loader)
|
||||
|
||||
def write_yaml(source:Any, filename:str):
|
||||
"""
|
||||
Writes a given objcet to a yaml file.
|
||||
|
||||
:param source: (any) A python object to be written.
|
||||
|
||||
:param filename: (str) The filename to be written to.
|
||||
|
||||
:return: No return
|
||||
"""
|
||||
with open(filename, 'w') as param_file:
|
||||
yaml.dump(source, param_file, default_flow_style=False)
|
||||
|
||||
def read_notebook(filepath:str):
|
||||
valid_path(filepath, extension="ipynb")
|
||||
with open(filepath, 'r') as read_file:
|
||||
return json.load(read_file)
|
||||
|
||||
def write_notebook(source:Dict[str,Any], filename:str):
|
||||
"""
|
||||
Writes the given notebook source code to a given filename.
|
||||
|
||||
:param source: (dict) The notebook source dictionary.
|
||||
|
||||
:param filename: (str) The filename to write to.
|
||||
|
||||
:return: No return
|
||||
"""
|
||||
with open(filename, 'w') as job_file:
|
||||
json.dump(source, job_file)
|
||||
|
||||
# Adapted from: https://github.com/rasmunk/notebook_parameterizer
|
||||
def parameterize_jupyter_notebook(jupyter_notebook:Dict[str,Any],
|
||||
parameters:Dict[str,Any], expand_env_values:bool=False)->Dict[str,Any]:
|
||||
nbformat.validate(jupyter_notebook)
|
||||
check_type(parameters, Dict,
|
||||
hint="parameterize_jupyter_notebook.parameters")
|
||||
|
||||
if jupyter_notebook["nbformat"] != 4:
|
||||
raise Warning(
|
||||
"Parameterization designed to work with nbformat version 4. "
|
||||
f"Differing version of '{jupyter_notebook['nbformat']}' may "
|
||||
"produce unexpeted results.")
|
||||
|
||||
# Load input notebook
|
||||
if "kernelspec" in jupyter_notebook["metadata"]:
|
||||
kernel_name = jupyter_notebook["metadata"]["kernelspec"]["name"]
|
||||
language = jupyter_notebook["metadata"]["kernelspec"]["language"]
|
||||
if "language_info" in jupyter_notebook["metadata"]:
|
||||
kernel_name = jupyter_notebook["metadata"]["language_info"]["name"]
|
||||
language = jupyter_notebook["metadata"]["language_info"]["name"]
|
||||
else:
|
||||
raise AttributeError(
|
||||
f"Notebook lacks key language and/or kernel_name attributes "
|
||||
"within metadata")
|
||||
|
||||
translator = papermill_translators.find_translator(kernel_name, language)
|
||||
|
||||
output_notebook = copy.deepcopy(jupyter_notebook)
|
||||
|
||||
# Find each
|
||||
cells = output_notebook["cells"]
|
||||
code_cells = [
|
||||
(idx, cell) for idx, cell in enumerate(cells) \
|
||||
if cell["cell_type"] == "code"
|
||||
]
|
||||
for idx, cell in code_cells:
|
||||
cell_updated = False
|
||||
source = cell["source"]
|
||||
# Either single string or a list of strings
|
||||
if isinstance(source, str):
|
||||
lines = source.split("\n")
|
||||
else:
|
||||
lines = source
|
||||
|
||||
for idy, line in enumerate(lines):
|
||||
if "=" in line:
|
||||
d_line = list(map(lambda x: x.replace(" ", ""),
|
||||
line.split("=")))
|
||||
# Matching parameter name
|
||||
if len(d_line) == 2 and d_line[0] in parameters:
|
||||
value = parameters[d_line[0]]
|
||||
# Whether to expand value from os env
|
||||
if (
|
||||
expand_env_values
|
||||
and isinstance(value, str)
|
||||
and value.startswith("ENV_")
|
||||
):
|
||||
env_var = value.replace("ENV_", "")
|
||||
value = os.getenv(
|
||||
env_var,
|
||||
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
|
||||
)
|
||||
lines[idy] = translator.assign(
|
||||
d_line[0], translator.translate(value)
|
||||
)
|
||||
|
||||
cell_updated = True
|
||||
if cell_updated:
|
||||
cells[idx]["source"] = "\n".join(lines)
|
||||
|
||||
# Validate that the parameterized notebook is still valid
|
||||
nbformat.validate(output_notebook, version=4)
|
||||
|
||||
return output_notebook
|
||||
|
||||
def parameterize_python_script(script:List[str], parameters:Dict[str,Any],
|
||||
expand_env_values:bool=False)->Dict[str,Any]:
|
||||
check_script(script)
|
||||
check_type(parameters, Dict
|
||||
,hint="parameterize_python_script.parameters")
|
||||
|
||||
output_script = copy.deepcopy(script)
|
||||
|
||||
for i, line in enumerate(output_script):
|
||||
if "=" in line:
|
||||
d_line = list(map(lambda x: x.replace(" ", ""),
|
||||
line.split("=")))
|
||||
# Matching parameter name
|
||||
if len(d_line) == 2 and d_line[0] in parameters:
|
||||
value = parameters[d_line[0]]
|
||||
# Whether to expand value from os env
|
||||
if (
|
||||
expand_env_values
|
||||
and isinstance(value, str)
|
||||
and value.startswith("ENV_")
|
||||
):
|
||||
env_var = value.replace("ENV_", "")
|
||||
value = os.getenv(
|
||||
env_var,
|
||||
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
|
||||
)
|
||||
output_script[i] = f"{d_line[0]} = {repr(value)}"
|
||||
|
||||
# Validate that the parameterized notebook is still valid
|
||||
check_script(output_script)
|
||||
|
||||
return output_script
|
||||
|
||||
def print_debug(print_target, debug_level, msg, level)->None:
|
||||
"""Function to print a message to the debug target, if its level exceeds
|
||||
the given one."""
|
||||
if print_target is None:
|
||||
return
|
||||
else:
|
||||
if level <= debug_level:
|
||||
status = "ERROR"
|
||||
if level == DEBUG_INFO:
|
||||
status = "INFO"
|
||||
elif level == DEBUG_WARNING:
|
||||
status = "WARNING"
|
||||
print(f"{status}: {msg}", file=print_target)
|
||||
|
||||
def replace_keywords(old_dict:Dict[str,str], job_id:str, src_path:str,
|
||||
monitor_base:str)->Dict[str,str]:
|
||||
"""Function to replace all MEOW magic words in a dictionary with dynamic
|
||||
values."""
|
||||
new_dict = {}
|
||||
|
||||
filename = os.path.basename(src_path)
|
||||
dirname = os.path.dirname(src_path)
|
||||
relpath = os.path.relpath(src_path, monitor_base)
|
||||
reldirname = os.path.dirname(relpath)
|
||||
(prefix, extension) = os.path.splitext(filename)
|
||||
|
||||
for var, val in old_dict.items():
|
||||
if isinstance(val, str):
|
||||
val = val.replace(KEYWORD_PATH, src_path)
|
||||
val = val.replace(KEYWORD_REL_PATH, relpath)
|
||||
val = val.replace(KEYWORD_DIR, dirname)
|
||||
val = val.replace(KEYWORD_REL_DIR, reldirname)
|
||||
val = val.replace(KEYWORD_FILENAME, filename)
|
||||
val = val.replace(KEYWORD_PREFIX, prefix)
|
||||
val = val.replace(KEYWORD_BASE, monitor_base)
|
||||
val = val.replace(KEYWORD_EXTENSION, extension)
|
||||
val = val.replace(KEYWORD_JOB, job_id)
|
||||
|
||||
new_dict[var] = val
|
||||
else:
|
||||
new_dict[var] = val
|
||||
|
||||
return new_dict
|
||||
|
||||
def create_event(event_type:str, path:str, rule:Any, extras:Dict[Any,Any]={}
|
||||
)->Dict[Any,Any]:
|
||||
"""Function to create a MEOW dictionary."""
|
||||
return {
|
||||
**extras,
|
||||
EVENT_PATH: path,
|
||||
EVENT_TYPE: event_type,
|
||||
EVENT_RULE: rule
|
||||
}
|
||||
|
||||
def create_watchdog_event(path:str, rule:Any, base:str, hash:str,
|
||||
extras:Dict[Any,Any]={})->Dict[Any,Any]:
|
||||
"""Function to create a MEOW event dictionary."""
|
||||
return create_event(
|
||||
EVENT_TYPE_WATCHDOG,
|
||||
path,
|
||||
rule,
|
||||
extras={
|
||||
**extras,
|
||||
**{
|
||||
WATCHDOG_HASH: hash,
|
||||
WATCHDOG_BASE: base
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def create_job(job_type:str, event:Dict[str,Any], extras:Dict[Any,Any]={}
|
||||
)->Dict[Any,Any]:
|
||||
"""Function to create a MEOW job dictionary."""
|
||||
job_dict = {
|
||||
#TODO compress event?
|
||||
JOB_ID: generate_id(prefix="job_"),
|
||||
JOB_EVENT: event,
|
||||
JOB_TYPE: job_type,
|
||||
JOB_PATTERN: event[EVENT_RULE].pattern.name,
|
||||
JOB_RECIPE: event[EVENT_RULE].recipe.name,
|
||||
JOB_RULE: event[EVENT_RULE].name,
|
||||
JOB_STATUS: STATUS_QUEUED,
|
||||
JOB_CREATE_TIME: datetime.now(),
|
||||
JOB_REQUIREMENTS: event[EVENT_RULE].recipe.requirements
|
||||
}
|
||||
|
||||
return {**extras, **job_dict}
|
||||
|
||||
def lines_to_string(lines:List[str])->str:
|
||||
"""Function to convert a list of str lines, into one continuous string
|
||||
separated by newline characters"""
|
||||
return "\n".join(lines)
|
||||
+1
-1
@@ -19,7 +19,7 @@ from core.correctness.vars import VALID_RECIPE_NAME_CHARS, \
|
||||
SWEEP_JUMP, SWEEP_START, SWEEP_STOP, get_drt_imp_msg
|
||||
from core.correctness.validation import valid_string, check_type, \
|
||||
check_implementation, valid_list, valid_dict
|
||||
from core.functionality import generate_id
|
||||
from functionality.naming import generate_id
|
||||
|
||||
|
||||
class BaseRecipe:
|
||||
|
||||
+5
-5
@@ -15,12 +15,12 @@ from random import randrange
|
||||
from typing import Any, Union, Dict, List
|
||||
|
||||
from core.correctness.vars import DEBUG_WARNING, DEBUG_INFO, EVENT_TYPE, \
|
||||
VALID_CHANNELS, JOB_ID, META_FILE, DEFAULT_JOB_OUTPUT_DIR, \
|
||||
DEFAULT_JOB_QUEUE_DIR
|
||||
from core.correctness.validation import setup_debugging, check_type, \
|
||||
valid_list, valid_dir_path
|
||||
from core.functionality import print_debug, wait, read_yaml, make_dir
|
||||
VALID_CHANNELS, META_FILE, DEFAULT_JOB_OUTPUT_DIR, DEFAULT_JOB_QUEUE_DIR
|
||||
from core.correctness.validation import check_type, valid_list, valid_dir_path
|
||||
from core.meow import BaseHandler, BaseMonitor, BaseConductor
|
||||
from functionality.debug import setup_debugging, print_debug
|
||||
from functionality.file_io import make_dir, read_yaml
|
||||
from functionality.process_io import wait
|
||||
|
||||
|
||||
class MeowRunner:
|
||||
|
||||
Reference in New Issue
Block a user