Converting workshop checker to use Reporter class.

1. Converting error reporting. 2. Simplifying checking logic. 3. Refactoring metadata loading. 4. Modifying lesson checker to reflect refactoring. Closes #47.
HPC-workshops · Jun 26, 2016 · ac67ebc · ac67ebc
1 parent 519cd35
commit ac67ebc
Show file tree

Hide file tree

Showing 3 changed files with 159 additions and 199 deletions.
diff --git a/bin/lesson_check.py b/bin/lesson_check.py
@@ -27,7 +27,6 @@
 # This list must include all the Markdown files listed in the 'bin/initialize' script.
 REQUIRED_FILES = {
     '%/CONDUCT.md': True,
-    '%/CONTRIBUTING.md': False,
     '%/LICENSE.md': True,
     '%/README.md': False,
     '%/_extras/discuss.md': True,
@@ -38,12 +37,6 @@
     '%/setup.md': True,
 }
 
-# Required non-Markdown files.
-NON_MARKDOWN_FILES = {
-    "AUTHORS",
-    "CITATION"
-}
-
 # Episode filename pattern.
 P_EPISODE_FILENAME = re.compile(r'/_episodes/(\d\d)-[-\w]+.md$')
 
@@ -97,10 +90,9 @@ def main():
     """Main driver."""
 
     args = parse_args()
-    args.reporter = Reporter(args)
-    check_config(args)
-    check_non_markdown_files(args.source_dir, args.reporter)
-    docs = read_all_markdown(args, args.source_dir)
+    args.reporter = Reporter()
+    check_config(args.reporter, args.source_dir)
+    docs = read_all_markdown(args.source_dir, args.parser)
     check_fileset(args.source_dir, args.reporter, docs.keys())
     for filename in docs.keys():
         checker = create_checker(args, filename, docs[filename])
@@ -134,27 +126,16 @@ def parse_args():
     return args
 
 
-def check_config(args):
+def check_config(reporter, source_dir):
     """Check configuration file."""
 
-    config_file = os.path.join(args.source_dir, '_config.yml')
+    config_file = os.path.join(source_dir, '_config.yml')
     with open(config_file, 'r') as reader:
         config = yaml.load(reader)
+    reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson')
 
-    args.reporter.check_field(config_file, 'configuration', config, 'kind', 'lesson')
-
-
-def check_non_markdown_files(source_dir, reporter):
-    """Check presence of non-Markdown files."""
-
-    for filename in NON_MARKDOWN_FILES:
-        path = os.path.join(source_dir, filename)
-        reporter.check(os.path.exists(path),
-                       filename,
-                       "File not found")
 
-
-def read_all_markdown(args, source_dir):
+def read_all_markdown(source_dir, parser):
     """Read source files, returning
     {path : {'metadata':yaml, 'metadata_len':N, 'text':text, 'lines':[(i, line, len)], 'doc':doc}}
     """
@@ -164,7 +145,7 @@ def read_all_markdown(args, source_dir):
     result = {}
     for pat in all_patterns:
         for filename in glob.glob(pat):
-            data = read_markdown(args.parser, filename)
+            data = read_markdown(parser, filename)
             if data:
                 result[filename] = data
     return result
@@ -192,9 +173,9 @@ def check_fileset(source_dir, reporter, filenames_present):
 
     # Check for duplicate episode numbers.
     reporter.check(len(seen) == len(set(seen)),
-                   None,
-                   'Duplicate episode numbers {0} vs {1}',
-                   sorted(seen), sorted(set(seen)))
+                        None,
+                        'Duplicate episode numbers {0} vs {1}',
+                        sorted(seen), sorted(set(seen)))
 
     # Check that numbers are consecutive.
     seen = [int(s) for s in seen]
@@ -239,6 +220,7 @@ def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
         self.text = text
         self.lines = lines
         self.doc = doc
+
         self.layout = None
 
 
@@ -371,7 +353,6 @@ class CheckEpisode(CheckBase):
     def __init__(self, args, filename, metadata, metadata_len, text, lines, doc):
         super(CheckEpisode, self).__init__(args, filename, metadata, metadata_len, text, lines, doc)
 
-
     def check_metadata(self):
         super(CheckEpisode, self).check_metadata()
         if self.metadata:

diff --git a/bin/util.py b/bin/util.py
@@ -7,7 +7,7 @@
 class Reporter(object):
     """Collect and report errors."""
 
-    def __init__(self, args):
+    def __init__(self):
         """Constructor."""
 
         super(Reporter, self).__init__()
@@ -62,23 +62,13 @@ def read_markdown(parser, path):
     """
 
     # Split and extract YAML (if present).
-    metadata = None
-    metadata_len = None
     with open(path, 'r') as reader:
         body = reader.read()
-    pieces = body.split('---', 2)
-    if len(pieces) == 3:
-        try:
-            metadata = yaml.load(pieces[1])
-        except yaml.YAMLError as e:
-            print('Unable to parse YAML header in {0}:\n{1}'.format(path, e))
-            sys.exit(1)
-        metadata_len = pieces[1].count('\n')
-        body = pieces[2]
+    metadata_raw, metadata_yaml, body = split_metadata(path, body)
 
     # Split into lines.
-    offset = 0 if metadata_len is None else metadata_len
-    lines = [(offset+i+1, l, len(l)) for (i, l) in enumerate(body.split('\n'))]
+    metadata_len = 0 if metadata_raw is None else metadata_raw.count('\n')
+    lines = [(metadata_len+i+1, line, len(line)) for (i, line) in enumerate(body.split('\n'))]
 
     # Parse Markdown.
     cmd = 'ruby {0}'.format(parser)
@@ -87,9 +77,32 @@ def read_markdown(parser, path):
     doc = json.loads(stdout_data)
 
     return {
-        'metadata': metadata,
+        'metadata': metadata_yaml,
         'metadata_len': metadata_len,
         'text': body,
         'lines': lines,
         'doc': doc
     }
+
+
+def split_metadata(path, text):
+    """
+    Get raw (text) metadata, metadata as YAML, and rest of body.
+    If no metadata, return (None, None, body).
+    """
+
+    metadata_raw = None
+    metadata_yaml = None
+    metadata_len = None
+
+    pieces = text.split('---', 2)
+    if len(pieces) == 3:
+        metadata_raw = pieces[1]
+        text = pieces[2]
+        try:
+            metadata_yaml = yaml.load(metadata_raw)
+        except yaml.YAMLError as e:
+            print('Unable to parse YAML header in {0}:\n{1}'.format(path, e), file=sys.stderr)
+            sys.exit(1)
+
+    return metadata_raw, metadata_yaml, text