From 5520175060add65af31ac11a5d13a90c3b3f0d07 Mon Sep 17 00:00:00 2001 From: Greg Wilson Date: Fri, 8 Jul 2016 07:07:18 -0400 Subject: [PATCH] Handling anchors by ignoring them --- bin/extract_figures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/extract_figures.py b/bin/extract_figures.py index 5199122..7eb55f0 100755 --- a/bin/extract_figures.py +++ b/bin/extract_figures.py @@ -69,7 +69,7 @@ def find_image_links(doc, result): if ((doc['type'] == 'a') and ('attr' in doc) and ('href' in doc['attr'])) \ or \ - ((doc['type'] == 'html_element') and (doc['value'] == 'a')): + ((doc['type'] == 'html_element') and (doc['value'] == 'a') and ('href' in doc['attr'])): path = doc['attr']['href'] if os.path.splitext(path)[1].lower() in IMAGE_FILE_SUFFIX: result.append({'alt':'', 'src': doc['attr']['href']})