From 4013b7e964e034f2ecf39175ce176d96f2fdc52c Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Wed, 18 Sep 2024 10:48:06 +0100 Subject: [PATCH 01/17] First go at adding the preprocess functionality --- pyproject.toml | 1 + src/yamlprocessor/datapreprocessor.py | 75 +++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100755 src/yamlprocessor/datapreprocessor.py diff --git a/pyproject.toml b/pyproject.toml index 2c2bb8d..650424d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ test = ["flake8", "pytest", "pytest-cov"] [project.scripts] yp-data = "yamlprocessor.dataprocess:main" +yp-preprocessor = "yamlprocessor.datapreprocess:main" yp-schema = "yamlprocessor.schemaprocess:main" [tool.pytest.ini_options] diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py new file mode 100755 index 0000000..d658903 --- /dev/null +++ b/src/yamlprocessor/datapreprocessor.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +"""The pre-process looks for the DIRECT_INCLUDE keyword in the input yaml and concatenates + the associated file at this point in the input file. The result is written to the + output file. + +Example usage: + python datapreprocessor.py --define JOPA_AUX=/path/to/my/file +""" + +import argparse +import re + +class DataPreProcessor: + + def __init__(self, replacements): + self.replacements = replacements + + def process_yaml(self, in_yaml, out_yaml): + # read yaml file + src_file = open(in_yaml, 'r') + lines = src_file.readlines() + src_file.close() + + # process yaml file + new_line = [] + for iline in lines: + # look for specific pattern in each line + if 'DIRECT_INCLUDE=' in iline: + # retrieve header file + yaml_header_File = iline.split('=')[1].rstrip() + # Replace variables in the string + print("yaml_header_File = ", yaml_header_File) + for key, value in self.replacements.items(): + yaml_header_File = re.sub(rf'\${key}', value, yaml_header_File) + print("yaml_header_File = ", yaml_header_File) + # open header file + with open(yaml_header_File, 'r') as file: + auxFileData = file.read() + # update lies for new file + new_line.append(auxFileData) + else: + new_line.append(iline) + # same the outcome + with open(out_yaml, "w") as file: + file.writelines(new_line) + +def main(): + parser = argparse.ArgumentParser(description="Process input and output files with multiple --define options.") + + # Positional arguments for input and output files + parser.add_argument('input_file', type=str, help='Input file') + parser.add_argument('output_file', type=str, help='Output file') + + # Optional --define arguments + parser.add_argument('--define', action='append', help='Key-value pairs in the format key=value', default=[]) + + # Parse arguments and print for sanity checking + args = parser.parse_args() + print(f"Input file: {args.input_file}") + print(f"Output file: {args.output_file}") + print(f"Defines: {args.define}") + + # process define arguments into a dictionary for passing to the class + key_value_pairs = {} + if args.define: + for item in args.define: + key, value = item.split('=') + key_value_pairs[key] = value + + # Run preprocessor + preprocessor = DataPreProcessor(key_value_pairs) + preprocessor.process_yaml(args.input_file, args.output_file) + +if __name__ == "__main__": + main() From 25f310134e7d45ada0ec4f0402373267df190e6b Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Wed, 18 Sep 2024 10:56:42 +0100 Subject: [PATCH 02/17] Remove pointless print statements --- src/yamlprocessor/datapreprocessor.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index d658903..f2b25d1 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -29,10 +29,8 @@ def process_yaml(self, in_yaml, out_yaml): # retrieve header file yaml_header_File = iline.split('=')[1].rstrip() # Replace variables in the string - print("yaml_header_File = ", yaml_header_File) for key, value in self.replacements.items(): yaml_header_File = re.sub(rf'\${key}', value, yaml_header_File) - print("yaml_header_File = ", yaml_header_File) # open header file with open(yaml_header_File, 'r') as file: auxFileData = file.read() From fb2dd5e2b4115a36e4aaafb4f1892e93cd7e76f2 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Wed, 18 Sep 2024 14:19:33 +0100 Subject: [PATCH 03/17] Fix following Matt S comments and remove spelling mistakes --- src/yamlprocessor/datapreprocessor.py | 44 ++++++++++++++++----------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index f2b25d1..ac89f18 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -1,14 +1,15 @@ #!/usr/bin/env python3 -"""The pre-process looks for the DIRECT_INCLUDE keyword in the input yaml and concatenates - the associated file at this point in the input file. The result is written to the - output file. +"""The datapreprocessor looks for the DIRECT_INCLUDE keyword in the input yaml and concatenates + the associated file at this point in the input file. The result is written to the + output file or standard out if - is specified. Example usage: - python datapreprocessor.py --define JOPA_AUX=/path/to/my/file + python datapreprocessor.py -o --define JOPA_AUX=/path/to/my/file + python datapreprocessor.py -o- --define JOPA_AUX=/path/to/my/file """ import argparse -import re +import sys class DataPreProcessor: @@ -28,37 +29,46 @@ def process_yaml(self, in_yaml, out_yaml): if 'DIRECT_INCLUDE=' in iline: # retrieve header file yaml_header_File = iline.split('=')[1].rstrip() - # Replace variables in the string + # replace variables in the string for key, value in self.replacements.items(): - yaml_header_File = re.sub(rf'\${key}', value, yaml_header_File) + yaml_header_File = yaml_header_File.replace(f'${key}', value) # open header file with open(yaml_header_File, 'r') as file: auxFileData = file.read() - # update lies for new file + # update lines for new file new_line.append(auxFileData) else: new_line.append(iline) - # same the outcome - with open(out_yaml, "w") as file: - file.writelines(new_line) + # save the result + if out_yaml == '-': + out_file = sys.stdout + else: + out_file = open(out_yaml, 'w') + out_file.writelines(new_line) def main(): parser = argparse.ArgumentParser(description="Process input and output files with multiple --define options.") - # Positional arguments for input and output files + # Positional argument for input parser.add_argument('input_file', type=str, help='Input file') - parser.add_argument('output_file', type=str, help='Output file') + + # Output file specified + parser.add_argument( + '--output-file', '-o', + metavar='FILENAME', + action="store", + help='Name of output file, "-" for STDOUT') # Optional --define arguments parser.add_argument('--define', action='append', help='Key-value pairs in the format key=value', default=[]) # Parse arguments and print for sanity checking args = parser.parse_args() - print(f"Input file: {args.input_file}") - print(f"Output file: {args.output_file}") - print(f"Defines: {args.define}") + print(f"Input file: {args.input_file}", file=sys.stderr) + print(f"Output file: {args.output_file}", file=sys.stderr) + print(f"Defines: {args.define}", file=sys.stderr) - # process define arguments into a dictionary for passing to the class + # Process define arguments into a dictionary for passing to the class key_value_pairs = {} if args.define: for item in args.define: From 08d217ce171f0157fcc330e73892c98da80bdf24 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Wed, 18 Sep 2024 15:01:32 +0100 Subject: [PATCH 04/17] Documentation added for the preprocessor --- docs/data-preprocessor.rst | 71 +++++++++++++++++++++++++++ docs/index.rst | 1 + src/yamlprocessor/datapreprocessor.py | 8 ++- 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 docs/data-preprocessor.rst diff --git a/docs/data-preprocessor.rst b/docs/data-preprocessor.rst new file mode 100644 index 0000000..8a48ec3 --- /dev/null +++ b/docs/data-preprocessor.rst @@ -0,0 +1,71 @@ +Data Pre-Processor +================== + +The preprocessor looks for the DIRECT_INCLUDE keyword in the input yaml and concatenates +the associated file at this point in the input file. The result is written to the +output file or standard out if - is specified. + +It is expected that the keyword in the input yaml file will take the following format: + +.. code-block:: yaml + + DIRECT_INCLUDE: /path/to/file/to/be/included + +Command line +------------ + +.. code-block:: bash + + yp-preprocessor [options] -o output-file-name input-file-name + +Type ``yp-preprocessor --help`` for a list of options. See :doc:`cli` for detail. + +Python +------ + +.. code-block:: python + + from yamlprocessor.datapreprocess import DataPreProcessor + preprocessor = DataPreProcessor() + preprocessor.add_replacements_map(keymap) # optional line + preprocessor.process_yaml(input_file, output_file) + +Examples +------------------------ + +Consider an input YAML file containing the following data: + +.. code-block:: yaml + + DIRECT_INCLUDE: a.yaml + + hello: + - location: *planet + targets: + - human + - cat + - dog + +If ``a.yaml`` contains: + +.. code-block:: yaml + + _: + - &planet earth + +Running the preprocessor on the input YAML file will yield the following output: + +.. code-block:: yaml + + _: + - &planet earth + + hello: + - location: *planet + targets: + - human + - cat + - dog + +The preprocessor simply concatenates the contents of `a.yaml` at the correct place in the +input yaml file. This file can then be passed to `yp-data` for parsing. diff --git a/docs/index.rst b/docs/index.rst index cb9425c..32acd1b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,6 +30,7 @@ User Guide And Reference install basic-usage + data-preprocessor data-process schema-process cli diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index ac89f18..8e0c635 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -13,7 +13,10 @@ class DataPreProcessor: - def __init__(self, replacements): + def __init__(self): + self.replacements = {} + + def add_replacements_map(self, replacements): self.replacements = replacements def process_yaml(self, in_yaml, out_yaml): @@ -76,7 +79,8 @@ def main(): key_value_pairs[key] = value # Run preprocessor - preprocessor = DataPreProcessor(key_value_pairs) + preprocessor = DataPreProcessor() + preprocessor.add_replacements_map(key_value_pairs) preprocessor.process_yaml(args.input_file, args.output_file) if __name__ == "__main__": From 0ca9e03a1b24cd5f3402cf5848c1b5d1ebf3c094 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Fri, 20 Sep 2024 17:43:39 +0100 Subject: [PATCH 05/17] Add testing --- docs/data-preprocessor.rst | 6 +- src/yamlprocessor/datapreprocessor.py | 2 +- .../tests/test_datapreprocess.py | 58 +++++++++++++++++++ 3 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 src/yamlprocessor/tests/test_datapreprocess.py diff --git a/docs/data-preprocessor.rst b/docs/data-preprocessor.rst index 8a48ec3..93dfaf8 100644 --- a/docs/data-preprocessor.rst +++ b/docs/data-preprocessor.rst @@ -9,7 +9,7 @@ It is expected that the keyword in the input yaml file will take the following f .. code-block:: yaml - DIRECT_INCLUDE: /path/to/file/to/be/included + DIRECT_INCLUDE=/path/to/file/to/be/included Command line ------------ @@ -25,7 +25,7 @@ Python .. code-block:: python - from yamlprocessor.datapreprocess import DataPreProcessor + from yamlprocessor.datapreprocessor import DataPreProcessor preprocessor = DataPreProcessor() preprocessor.add_replacements_map(keymap) # optional line preprocessor.process_yaml(input_file, output_file) @@ -37,7 +37,7 @@ Consider an input YAML file containing the following data: .. code-block:: yaml - DIRECT_INCLUDE: a.yaml + DIRECT_INCLUDE=a.yaml hello: - location: *planet diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index 8e0c635..7b0d878 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -29,7 +29,7 @@ def process_yaml(self, in_yaml, out_yaml): new_line = [] for iline in lines: # look for specific pattern in each line - if 'DIRECT_INCLUDE=' in iline: + if 'DIRECT_INCLUDE' in iline: # retrieve header file yaml_header_File = iline.split('=')[1].rstrip() # replace variables in the string diff --git a/src/yamlprocessor/tests/test_datapreprocess.py b/src/yamlprocessor/tests/test_datapreprocess.py new file mode 100644 index 0000000..6e62732 --- /dev/null +++ b/src/yamlprocessor/tests/test_datapreprocess.py @@ -0,0 +1,58 @@ +import json + +from dateutil.parser import parse as datetimeparse +import filecmp +import pytest +from ruamel.yaml import YAML + +from ..datapreprocess import ( + DataPreProcessor, main) + + +@pytest.fixture +def yaml(): + return YAML(typ='safe', pure=True) + +def create_input_files(path): + # Write the content to a file named "a.yaml" + with open(path+"/a.yaml", 'w') as file: + file.write("DIRECT_INCLUDE=$FILE_PATH/b.yaml\n") + file.write("\n") + file.write("data:\n") + file.write(" brain: *banana\n") + file.write(" tel: *groups\n") + # Write the content to a file named "b.yaml" + with open(path+"/b.yaml", 'w') as file: + file.write("_:\n") + file.write("- &banana 1\n") + file.write("- &groups [4, 5, 6]\n") + +def create_comparison(path): + with open(path+"/reference_0.yaml", 'w') as file: + file.write("_:\n") + file.write("- &banana 1\n") + file.write("- &groups [4, 5, 6]\n") + file.write("\n") + file.write("data:\n") + file.write(" brain: *banana\n") + file.write(" tel: *groups\n") + +def compare_files(file1, file2): + with open(file1, 'r') as f1, open(file2, 'r') as f2: + content1 = f1.read() + content2 = f2.read() + + assert content1 == content2, f"Files {file1} and {file2} do not match." + +def test_main_0(tmp_path, yaml): + """Test main, basic.""" + # Create test files + create_input_files(tmp_path) + create_comparison(tmp_path) + # Run preprocessor + preprocessor = DataPreProcessor() + keymap = {"$FILE_PATH": tmp_path} + preprocessor.add_replacements_map(keymap) + preprocessor.process_yaml(tmp_path + "/a.yaml", tmp_path + "/test_0.yaml") + # Check output + compare_files(tmp_path + "/test_0.yaml", tmp_path + "/reference_0.yaml") From 59116b505319b57dca97b1549896225e21191c13 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Fri, 20 Sep 2024 21:39:41 +0100 Subject: [PATCH 06/17] Make the test work --- .../tests/test_datapreprocess.py | 73 +++++++++---------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/src/yamlprocessor/tests/test_datapreprocess.py b/src/yamlprocessor/tests/test_datapreprocess.py index 6e62732..e0e56ac 100644 --- a/src/yamlprocessor/tests/test_datapreprocess.py +++ b/src/yamlprocessor/tests/test_datapreprocess.py @@ -5,7 +5,7 @@ import pytest from ruamel.yaml import YAML -from ..datapreprocess import ( +from ..datapreprocessor import ( DataPreProcessor, main) @@ -13,46 +13,43 @@ def yaml(): return YAML(typ='safe', pure=True) -def create_input_files(path): - # Write the content to a file named "a.yaml" - with open(path+"/a.yaml", 'w') as file: - file.write("DIRECT_INCLUDE=$FILE_PATH/b.yaml\n") - file.write("\n") - file.write("data:\n") - file.write(" brain: *banana\n") - file.write(" tel: *groups\n") - # Write the content to a file named "b.yaml" - with open(path+"/b.yaml", 'w') as file: - file.write("_:\n") - file.write("- &banana 1\n") - file.write("- &groups [4, 5, 6]\n") - -def create_comparison(path): - with open(path+"/reference_0.yaml", 'w') as file: - file.write("_:\n") - file.write("- &banana 1\n") - file.write("- &groups [4, 5, 6]\n") - file.write("\n") - file.write("data:\n") - file.write(" brain: *banana\n") - file.write(" tel: *groups\n") - -def compare_files(file1, file2): - with open(file1, 'r') as f1, open(file2, 'r') as f2: - content1 = f1.read() - content2 = f2.read() - - assert content1 == content2, f"Files {file1} and {file2} do not match." - def test_main_0(tmp_path, yaml): """Test main, basic.""" - # Create test files - create_input_files(tmp_path) - create_comparison(tmp_path) + yaml_0 = """ +DIRECT_INCLUDE=$FILE_PATH/aux.yaml + +data: + brain: *banana + tel: *groups +""" + yaml_1 = """ +_: +- &banana 1 +- &groups [4, 5, 6] +""" + reference = """ +_: +- &banana 1 +- &groups [4, 5, 6] + +data: + brain: *banana + tel: *groups +""" + infilename = tmp_path / 'in_0.yaml' + with infilename.open('w') as infile: + infile.write(yaml_0) + + auxfilename = tmp_path / 'aux.yaml' + with auxfilename.open('w') as auxfile: + auxfile.write(yaml_1) + # Run preprocessor preprocessor = DataPreProcessor() - keymap = {"$FILE_PATH": tmp_path} + keymap = {"FILE_PATH": str(tmp_path)} preprocessor.add_replacements_map(keymap) - preprocessor.process_yaml(tmp_path + "/a.yaml", tmp_path + "/test_0.yaml") + outfilename = tmp_path / 'test_0.yaml' + preprocessor.process_yaml(tmp_path / 'in_0.yaml', outfilename) # Check output - compare_files(tmp_path + "/test_0.yaml", tmp_path + "/reference_0.yaml") + ref_yaml = yaml.load(reference) + assert yaml.load(outfilename.open()) == ref_yaml From d9a26e2e71d33efe6d00f575e882486bf8760e6b Mon Sep 17 00:00:00 2001 From: Michael Cooke <48374999+mikecooke77@users.noreply.github.com> Date: Sun, 22 Sep 2024 18:03:08 +0100 Subject: [PATCH 07/17] Add equals back in to distinguish from yaml entry --- src/yamlprocessor/datapreprocessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index 7b0d878..5019eae 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""The datapreprocessor looks for the DIRECT_INCLUDE keyword in the input yaml and concatenates +"""The datapreprocessor looks for the DIRECT_INCLUDE= keyword in the input yaml and concatenates the associated file at this point in the input file. The result is written to the output file or standard out if - is specified. @@ -29,7 +29,7 @@ def process_yaml(self, in_yaml, out_yaml): new_line = [] for iline in lines: # look for specific pattern in each line - if 'DIRECT_INCLUDE' in iline: + if 'DIRECT_INCLUDE=' in iline: # retrieve header file yaml_header_File = iline.split('=')[1].rstrip() # replace variables in the string From 4cba0925594c6085f64beac05b0999da943ef28f Mon Sep 17 00:00:00 2001 From: Michael Cooke <48374999+mikecooke77@users.noreply.github.com> Date: Sun, 22 Sep 2024 18:09:59 +0100 Subject: [PATCH 08/17] Update documentation to include = --- docs/data-preprocessor.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data-preprocessor.rst b/docs/data-preprocessor.rst index 93dfaf8..9051988 100644 --- a/docs/data-preprocessor.rst +++ b/docs/data-preprocessor.rst @@ -1,7 +1,7 @@ Data Pre-Processor ================== -The preprocessor looks for the DIRECT_INCLUDE keyword in the input yaml and concatenates +The preprocessor looks for the DIRECT_INCLUDE= keyword in the input yaml and concatenates the associated file at this point in the input file. The result is written to the output file or standard out if - is specified. From 6882f679eb1b236c47212e0219b139e8269b6f1b Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Mon, 23 Sep 2024 07:43:01 +0100 Subject: [PATCH 09/17] Add a comment about testing --- docs/install.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/install.rst b/docs/install.rst index 85e1bc3..63d8c22 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -18,3 +18,10 @@ run, for example: conda env create -n yamlprocessor conda activate yamlprocessor python3 -m pip install . + +To check that the code has installed correctly the testing can be run from +the main directory like this: + +.. code-block:: bash + + pytest From 6dbe18ab47a6d2357a4b75a7d182140056a2be14 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Mon, 23 Sep 2024 07:51:40 +0100 Subject: [PATCH 10/17] Add cli description --- docs/cli.rst | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index 8f378f1..78597f6 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -69,6 +69,30 @@ See :doc:`data-process` for detail. Reference value for date-time substitutions. See also :envvar:`YP_TIME_REF_VALUE`. +yp-preprocesor +-------------- + +Usage: + +.. code-block:: bash + + yp-preprocesor [options] -o output-file-name input-file-name + +See :doc:`data-process` for detail. + +.. program:: yp-data + +.. option:: file-names + + Names of input or input files. Use ``-`` for STDIN/STDOUT. + +.. option:: --out-filename=FILENAME, -o FILENAME + + Name of output file. Use ``-`` for STDOUT. + +.. option:: --define=KEY=VALUE, -D KEY=VALUE + + Map KEY to VALUE for variable substitutions. yp-schema --------- @@ -94,8 +118,8 @@ See :doc:`schema-process` for detail. Common Options -------------- -The following options apply to both :program:`yp-data` and :program:`yp-schema` -commands. +The following options apply to both :program:`yp-data`, :program:`yp-preprocessor` +and :program:`yp-schema` commands. .. program:: yp-* From 111f8c2aa9fc4408a967a129ba16cad016649423 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Mon, 23 Sep 2024 07:56:15 +0100 Subject: [PATCH 11/17] Fix toml path and docs --- docs/cli.rst | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index 78597f6..2464bd6 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -78,9 +78,9 @@ Usage: yp-preprocesor [options] -o output-file-name input-file-name -See :doc:`data-process` for detail. +See :doc:`data-preprocessor` for detail. -.. program:: yp-data +.. program:: yp-preprocessor .. option:: file-names diff --git a/pyproject.toml b/pyproject.toml index 650424d..8ca2e1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ test = ["flake8", "pytest", "pytest-cov"] [project.scripts] yp-data = "yamlprocessor.dataprocess:main" -yp-preprocessor = "yamlprocessor.datapreprocess:main" +yp-preprocessor = "yamlprocessor.datapreprocessor:main" yp-schema = "yamlprocessor.schemaprocess:main" [tool.pytest.ini_options] From 2adaceee9a6f6149cbf8aa4292cb67f921997d88 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Mon, 23 Sep 2024 08:50:50 +0100 Subject: [PATCH 12/17] Fixes for linter --- src/yamlprocessor/datapreprocessor.py | 10 ++++++++-- src/yamlprocessor/tests/test_datapreprocess.py | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index 5019eae..ee2125b 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -49,8 +49,11 @@ def process_yaml(self, in_yaml, out_yaml): out_file = open(out_yaml, 'w') out_file.writelines(new_line) + def main(): - parser = argparse.ArgumentParser(description="Process input and output files with multiple --define options.") + parser = argparse.ArgumentParser( + description="Process input and output files with multiple --define options." + ) # Positional argument for input parser.add_argument('input_file', type=str, help='Input file') @@ -63,7 +66,9 @@ def main(): help='Name of output file, "-" for STDOUT') # Optional --define arguments - parser.add_argument('--define', action='append', help='Key-value pairs in the format key=value', default=[]) + parser.add_argument('--define', action='append', + help='Key-value pairs in the format key=value', default=[] + ) # Parse arguments and print for sanity checking args = parser.parse_args() @@ -83,5 +88,6 @@ def main(): preprocessor.add_replacements_map(key_value_pairs) preprocessor.process_yaml(args.input_file, args.output_file) + if __name__ == "__main__": main() diff --git a/src/yamlprocessor/tests/test_datapreprocess.py b/src/yamlprocessor/tests/test_datapreprocess.py index e0e56ac..615fdbc 100644 --- a/src/yamlprocessor/tests/test_datapreprocess.py +++ b/src/yamlprocessor/tests/test_datapreprocess.py @@ -13,6 +13,7 @@ def yaml(): return YAML(typ='safe', pure=True) + def test_main_0(tmp_path, yaml): """Test main, basic.""" yaml_0 = """ From d3f4f346bc92d2e5955e52bbbd988cef07acd60b Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Mon, 23 Sep 2024 09:08:39 +0100 Subject: [PATCH 13/17] Make flake8 pass --- src/yamlprocessor/datapreprocessor.py | 27 ++++++++++++------- .../tests/test_datapreprocess.py | 6 +---- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index ee2125b..47db75f 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -1,16 +1,20 @@ #!/usr/bin/env python3 -"""The datapreprocessor looks for the DIRECT_INCLUDE= keyword in the input yaml and concatenates - the associated file at this point in the input file. The result is written to the - output file or standard out if - is specified. +"""The datapreprocessor looks for the DIRECT_INCLUDE= keyword in the input + yaml and concatenates the associated file at this point in the input + file. The result is written to the output file or standard out if - is + specified. Example usage: - python datapreprocessor.py -o --define JOPA_AUX=/path/to/my/file - python datapreprocessor.py -o- --define JOPA_AUX=/path/to/my/file + python datapreprocessor.py -o \ + --define JOPA_AUX=/path/to/my/file + python datapreprocessor.py -o- \ + --define JOPA_AUX=/path/to/my/file """ import argparse import sys + class DataPreProcessor: def __init__(self): @@ -34,7 +38,8 @@ def process_yaml(self, in_yaml, out_yaml): yaml_header_File = iline.split('=')[1].rstrip() # replace variables in the string for key, value in self.replacements.items(): - yaml_header_File = yaml_header_File.replace(f'${key}', value) + yaml_header_File = \ + yaml_header_File.replace(f'${key}', value) # open header file with open(yaml_header_File, 'r') as file: auxFileData = file.read() @@ -52,7 +57,8 @@ def process_yaml(self, in_yaml, out_yaml): def main(): parser = argparse.ArgumentParser( - description="Process input and output files with multiple --define options." + description="Process input and output " + "files with multiple --define options." ) # Positional argument for input @@ -63,10 +69,13 @@ def main(): '--output-file', '-o', metavar='FILENAME', action="store", - help='Name of output file, "-" for STDOUT') + help='Name of output file, "-" for STDOUT' + ) # Optional --define arguments - parser.add_argument('--define', action='append', + parser.add_argument( + '--define', + action='append', help='Key-value pairs in the format key=value', default=[] ) diff --git a/src/yamlprocessor/tests/test_datapreprocess.py b/src/yamlprocessor/tests/test_datapreprocess.py index 615fdbc..838ada4 100644 --- a/src/yamlprocessor/tests/test_datapreprocess.py +++ b/src/yamlprocessor/tests/test_datapreprocess.py @@ -1,12 +1,8 @@ -import json - -from dateutil.parser import parse as datetimeparse -import filecmp import pytest from ruamel.yaml import YAML from ..datapreprocessor import ( - DataPreProcessor, main) + DataPreProcessor) @pytest.fixture From c3b444d0af6fe3e80163c27d8d0d23a407bdbb6a Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Mon, 23 Sep 2024 09:18:17 +0100 Subject: [PATCH 14/17] Make doc8 pass --- docs/cli.rst | 4 ++-- docs/data-preprocessor.rst | 20 ++++++++++++-------- docs/install.rst | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index 2464bd6..6131509 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -118,8 +118,8 @@ See :doc:`schema-process` for detail. Common Options -------------- -The following options apply to both :program:`yp-data`, :program:`yp-preprocessor` -and :program:`yp-schema` commands. +The following options apply to both :program:`yp-data`, +:program:`yp-preprocessor` and :program:`yp-schema` commands. .. program:: yp-* diff --git a/docs/data-preprocessor.rst b/docs/data-preprocessor.rst index 9051988..8385b72 100644 --- a/docs/data-preprocessor.rst +++ b/docs/data-preprocessor.rst @@ -1,11 +1,12 @@ Data Pre-Processor ================== -The preprocessor looks for the DIRECT_INCLUDE= keyword in the input yaml and concatenates -the associated file at this point in the input file. The result is written to the -output file or standard out if - is specified. +The preprocessor looks for the DIRECT_INCLUDE= keyword in the input yaml and +concatenates the associated file at this point in the input file. The result +is written to the output file or standard out if - is specified. -It is expected that the keyword in the input yaml file will take the following format: +It is expected that the keyword in the input yaml file will take the following +format: .. code-block:: yaml @@ -18,7 +19,8 @@ Command line yp-preprocessor [options] -o output-file-name input-file-name -Type ``yp-preprocessor --help`` for a list of options. See :doc:`cli` for detail. +Type ``yp-preprocessor --help`` for a list of options. See :doc:`cli` for +detail. Python ------ @@ -53,7 +55,8 @@ If ``a.yaml`` contains: _: - &planet earth -Running the preprocessor on the input YAML file will yield the following output: +Running the preprocessor on the input YAML file will yield the following +output: .. code-block:: yaml @@ -67,5 +70,6 @@ Running the preprocessor on the input YAML file will yield the following output: - cat - dog -The preprocessor simply concatenates the contents of `a.yaml` at the correct place in the -input yaml file. This file can then be passed to `yp-data` for parsing. +The preprocessor simply concatenates the contents of `a.yaml` at the correct +place in the input yaml file. This file can then be passed to `yp-data` for +parsing. diff --git a/docs/install.rst b/docs/install.rst index 63d8c22..f09d114 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -19,7 +19,7 @@ run, for example: conda activate yamlprocessor python3 -m pip install . -To check that the code has installed correctly the testing can be run from +To check that the code has installed correctly the testing can be run from the main directory like this: .. code-block:: bash From cc383b3cb0a0fda28734a06adbf2296383693e8e Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Mon, 23 Sep 2024 10:25:33 +0100 Subject: [PATCH 15/17] Update the docs --- docs/data-preprocessor.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/data-preprocessor.rst b/docs/data-preprocessor.rst index 8385b72..09f6af7 100644 --- a/docs/data-preprocessor.rst +++ b/docs/data-preprocessor.rst @@ -12,8 +12,8 @@ format: DIRECT_INCLUDE=/path/to/file/to/be/included -Command line ------------- +Command line useage +------------------- .. code-block:: bash @@ -22,8 +22,8 @@ Command line Type ``yp-preprocessor --help`` for a list of options. See :doc:`cli` for detail. -Python ------- +Python useage +------------- .. code-block:: python @@ -33,7 +33,7 @@ Python preprocessor.process_yaml(input_file, output_file) Examples ------------------------- +-------- Consider an input YAML file containing the following data: From fd9d7ec0a70d3e7ece5821ae281a52f6d5055856 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Thu, 26 Sep 2024 10:31:02 +0100 Subject: [PATCH 16/17] Changes as suggested by review --- docs/cli.rst | 2 +- docs/data-preprocessor.rst | 2 +- src/yamlprocessor/datapreprocessor.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index 6131509..14047a0 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -76,7 +76,7 @@ Usage: .. code-block:: bash - yp-preprocesor [options] -o output-file-name input-file-name + yp-preprocesor input-file-name -o output-file-name [options] See :doc:`data-preprocessor` for detail. diff --git a/docs/data-preprocessor.rst b/docs/data-preprocessor.rst index 09f6af7..b98b9cb 100644 --- a/docs/data-preprocessor.rst +++ b/docs/data-preprocessor.rst @@ -17,7 +17,7 @@ Command line useage .. code-block:: bash - yp-preprocessor [options] -o output-file-name input-file-name + yp-preprocessor input-file-name -o output-file-name [options] Type ``yp-preprocessor --help`` for a list of options. See :doc:`cli` for detail. diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index 47db75f..f1a3dc0 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -5,9 +5,9 @@ specified. Example usage: - python datapreprocessor.py -o \ + python datapreprocessor.py -o \ --define JOPA_AUX=/path/to/my/file - python datapreprocessor.py -o- \ + python datapreprocessor.py -o- \ --define JOPA_AUX=/path/to/my/file """ @@ -74,7 +74,7 @@ def main(): # Optional --define arguments parser.add_argument( - '--define', + '--define', '-d', action='append', help='Key-value pairs in the format key=value', default=[] ) From 06fc4fe58789f872929cde8587c30aa272f24ed9 Mon Sep 17 00:00:00 2001 From: mikecooke77 Date: Fri, 27 Sep 2024 09:57:51 +0100 Subject: [PATCH 17/17] Update the method for variable substiution to include environment variables --- docs/cli.rst | 8 +++- src/yamlprocessor/datapreprocessor.py | 37 +++++++++++++++---- .../tests/test_datapreprocess.py | 26 ++++++++++--- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/docs/cli.rst b/docs/cli.rst index 14047a0..fa469c1 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -92,7 +92,13 @@ See :doc:`data-preprocessor` for detail. .. option:: --define=KEY=VALUE, -D KEY=VALUE - Map KEY to VALUE for variable substitutions. + Map KEY to VALUE for variable substitutions. These override + environment variables which are used by default in the variable + substitution. + +.. option:: --no-environment, -i + + Do not use environment variables in variable substitutions. yp-schema --------- diff --git a/src/yamlprocessor/datapreprocessor.py b/src/yamlprocessor/datapreprocessor.py index f1a3dc0..0de05af 100755 --- a/src/yamlprocessor/datapreprocessor.py +++ b/src/yamlprocessor/datapreprocessor.py @@ -12,16 +12,30 @@ """ import argparse +import os +import re import sys class DataPreProcessor: def __init__(self): - self.replacements = {} + self.replacements = os.environ.copy() + + def __replace_placeholders(self, text): + # Create a regex pattern that matches $VAR or ${VAR} + pattern = re.compile(r'\$\{(\w+)\}|\$(\w+)') + + # Function to get the replacement value from env_vars + def replacer(match): + var_name = match.group(1) or match.group(2) + return self.replacements.get(var_name, match.group(0)) + + # Substitute the placeholders with actual values + return pattern.sub(replacer, text) def add_replacements_map(self, replacements): - self.replacements = replacements + self.replacements.update(replacements) def process_yaml(self, in_yaml, out_yaml): # read yaml file @@ -37,9 +51,8 @@ def process_yaml(self, in_yaml, out_yaml): # retrieve header file yaml_header_File = iline.split('=')[1].rstrip() # replace variables in the string - for key, value in self.replacements.items(): - yaml_header_File = \ - yaml_header_File.replace(f'${key}', value) + yaml_header_File = self.__replace_placeholders( + yaml_header_File) # open header file with open(yaml_header_File, 'r') as file: auxFileData = file.read() @@ -72,12 +85,17 @@ def main(): help='Name of output file, "-" for STDOUT' ) - # Optional --define arguments + # Optional parser.add_argument( - '--define', '-d', + '--define', '-D', action='append', help='Key-value pairs in the format key=value', default=[] ) + parser.add_argument( + '--no-environment', '-i', + action='store_true', + default=False, + help='Do not use environment variables in variable substitutions') # Parse arguments and print for sanity checking args = parser.parse_args() @@ -85,7 +103,8 @@ def main(): print(f"Output file: {args.output_file}", file=sys.stderr) print(f"Defines: {args.define}", file=sys.stderr) - # Process define arguments into a dictionary for passing to the class + # Process define arguments into a dictionary for adding to the + # environment variable dictionary key_value_pairs = {} if args.define: for item in args.define: @@ -94,6 +113,8 @@ def main(): # Run preprocessor preprocessor = DataPreProcessor() + if args.no_environment: + preprocessor.replacements.clear() preprocessor.add_replacements_map(key_value_pairs) preprocessor.process_yaml(args.input_file, args.output_file) diff --git a/src/yamlprocessor/tests/test_datapreprocess.py b/src/yamlprocessor/tests/test_datapreprocess.py index 838ada4..9bbd6c5 100644 --- a/src/yamlprocessor/tests/test_datapreprocess.py +++ b/src/yamlprocessor/tests/test_datapreprocess.py @@ -20,6 +20,13 @@ def test_main_0(tmp_path, yaml): tel: *groups """ yaml_1 = """ +DIRECT_INCLUDE=${FILE_PATH}/aux.yaml + +data: + brain: *banana + tel: *groups +""" + yaml_2 = """ _: - &banana 1 - &groups [4, 5, 6] @@ -37,16 +44,25 @@ def test_main_0(tmp_path, yaml): with infilename.open('w') as infile: infile.write(yaml_0) + infilename = tmp_path / 'in_1.yaml' + with infilename.open('w') as infile: + infile.write(yaml_1) + auxfilename = tmp_path / 'aux.yaml' with auxfilename.open('w') as auxfile: - auxfile.write(yaml_1) + auxfile.write(yaml_2) # Run preprocessor preprocessor = DataPreProcessor() keymap = {"FILE_PATH": str(tmp_path)} preprocessor.add_replacements_map(keymap) - outfilename = tmp_path / 'test_0.yaml' - preprocessor.process_yaml(tmp_path / 'in_0.yaml', outfilename) - # Check output + # Setup reference ref_yaml = yaml.load(reference) - assert yaml.load(outfilename.open()) == ref_yaml + # Test first style input + outfilename0 = tmp_path / 'test_0.yaml' + preprocessor.process_yaml(tmp_path / 'in_0.yaml', outfilename0) + assert yaml.load(outfilename0.open()) == ref_yaml + # Test second style input + outfilename1 = tmp_path / 'test_1.yaml' + preprocessor.process_yaml(tmp_path / 'in_1.yaml', outfilename1) + assert yaml.load(outfilename1.open()) == ref_yaml