diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..66f4293 Binary files /dev/null and b/.DS_Store differ diff --git a/svg-parse.py b/svg-parse.py new file mode 100755 index 0000000..525ac9b --- /dev/null +++ b/svg-parse.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +""" +svg_extract.py – dump every title + id pair found in an SVG file +usage: python3 svg_extract.py input.svg [> pairs.txt] +""" + +import sys, re, html + +def main(): + if len(sys.argv) != 2: + sys.exit("Usage: python3 svg_extract.py file.svg") + + with open(sys.argv[1], encoding='utf-8') as f: + data = f.read() + + # fast & simple regex: captures id and title in any order + pattern = re.compile( + r'<[^>]*?\b(id=["\'](?P[^"\']+)["\'])[^>]*?\b(title=["\'](?P[^"\']+)["\'])' + r'|' + r'<[^>]*?\b(title=["\'](?P<title2>[^"\']+)["\'])[^>]*?\b(id=["\'](?P<id2>[^"\']+)["\'])', + re.I + ) + + seen = set() + for m in pattern.finditer(data): + id_val = html.unescape(m.group('id') or m.group('id2')) + title_val = html.unescape(m.group('title') or m.group('title2')) + if (id_val, title_val) not in seen: # avoid duplicates + seen.add((id_val, title_val)) + print(f"{title_val}\t{id_val}") + +if __name__ == '__main__': + main()