#!/usr/bin/env python3 """ svg_extract.py – dump every title + id pair found in an SVG file usage: python3 svg_extract.py input.svg [> pairs.txt] """ import sys, re, html def main(): if len(sys.argv) != 2: sys.exit("Usage: python3 svg_extract.py file.svg") with open(sys.argv[1], encoding='utf-8') as f: data = f.read() # fast & simple regex: captures id and title in any order pattern = re.compile( r'<[^>]*?\b(id=["\'](?P[^"\']+)["\'])[^>]*?\b(title=["\'](?P[^"\']+)["\'])' r'|' r'<[^>]*?\b(title=["\'](?P<title2>[^"\']+)["\'])[^>]*?\b(id=["\'](?P<id2>[^"\']+)["\'])', re.I ) seen = set() for m in pattern.finditer(data): id_val = html.unescape(m.group('id') or m.group('id2')) title_val = html.unescape(m.group('title') or m.group('title2')) if (id_val, title_val) not in seen: # avoid duplicates seen.add((id_val, title_val)) print(f"{title_val}\t{id_val}") if __name__ == '__main__': main()