From 93aab8ab2b51a83eaa1e954da05b5748efb8bc96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=92=D0=BB=D0=B0=D0=B4=D0=B8=D1=81=D0=BB=D0=B0=D0=B2?= Date: Mon, 22 Dec 2025 09:02:24 +0300 Subject: [PATCH] svg-parse --- .DS_Store | Bin 0 -> 6148 bytes svg-parse.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 .DS_Store create mode 100755 svg-parse.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..66f42939e6d3829b10be72315e38e8e015155f37 GIT binary patch literal 6148 zcmeHK%TB{E5FCdhwJH)laNrnmClY@UqV&Ll3;Y1|AsP{ARG?CC`363PAB0)2RW)f* zZwOVp)q3o;$F9dw)&bzMkI5A<0MMliMm-igO!~#QEDDihqVsbsP~e@v6)v|%Ti`z` zAZK@mN0jt2HqWoXE8cjP(pMs3eRfmM$D?vSB1ayWjGlaZV-4tYh8d2Q8!&J$L4;S0t2)ftcdaJW>_N>jiTXew}| z&4J|q;qTx7O_H@t0aM_h6mXsVKF_$NkXyGlC%HDJKhVWwT&=jKu;NoO$4V+br+Z^O XmI^TqSSvC@vx|V2!3tC0PZjt9GyPWA literal 0 HcmV?d00001 diff --git a/svg-parse.py b/svg-parse.py new file mode 100755 index 0000000..525ac9b --- /dev/null +++ b/svg-parse.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +""" +svg_extract.py – dump every title + id pair found in an SVG file +usage: python3 svg_extract.py input.svg [> pairs.txt] +""" + +import sys, re, html + +def main(): + if len(sys.argv) != 2: + sys.exit("Usage: python3 svg_extract.py file.svg") + + with open(sys.argv[1], encoding='utf-8') as f: + data = f.read() + + # fast & simple regex: captures id and title in any order + pattern = re.compile( + r'<[^>]*?\b(id=["\'](?P[^"\']+)["\'])[^>]*?\b(title=["\'](?P[^"\']+)["\'])' + r'|' + r'<[^>]*?\b(title=["\'](?P<title2>[^"\']+)["\'])[^>]*?\b(id=["\'](?P<id2>[^"\']+)["\'])', + re.I + ) + + seen = set() + for m in pattern.finditer(data): + id_val = html.unescape(m.group('id') or m.group('id2')) + title_val = html.unescape(m.group('title') or m.group('title2')) + if (id_val, title_val) not in seen: # avoid duplicates + seen.add((id_val, title_val)) + print(f"{title_val}\t{id_val}") + +if __name__ == '__main__': + main()