34 lines
1.0 KiB
Python
Executable File
34 lines
1.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
svg_extract.py – dump every title + id pair found in an SVG file
|
||
usage: python3 svg_extract.py input.svg [> pairs.txt]
|
||
"""
|
||
|
||
import sys, re, html
|
||
|
||
def main():
|
||
if len(sys.argv) != 2:
|
||
sys.exit("Usage: python3 svg_extract.py file.svg")
|
||
|
||
with open(sys.argv[1], encoding='utf-8') as f:
|
||
data = f.read()
|
||
|
||
# fast & simple regex: captures id and title in any order
|
||
pattern = re.compile(
|
||
r'<[^>]*?\b(id=["\'](?P<id>[^"\']+)["\'])[^>]*?\b(title=["\'](?P<title>[^"\']+)["\'])'
|
||
r'|'
|
||
r'<[^>]*?\b(title=["\'](?P<title2>[^"\']+)["\'])[^>]*?\b(id=["\'](?P<id2>[^"\']+)["\'])',
|
||
re.I
|
||
)
|
||
|
||
seen = set()
|
||
for m in pattern.finditer(data):
|
||
id_val = html.unescape(m.group('id') or m.group('id2'))
|
||
title_val = html.unescape(m.group('title') or m.group('title2'))
|
||
if (id_val, title_val) not in seen: # avoid duplicates
|
||
seen.add((id_val, title_val))
|
||
print(f"{title_val}\t{id_val}")
|
||
|
||
if __name__ == '__main__':
|
||
main()
|