svg-parse
This commit is contained in:
parent
5dc25a0a06
commit
93aab8ab2b
33
svg-parse.py
Executable file
33
svg-parse.py
Executable file
@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
svg_extract.py – dump every title + id pair found in an SVG file
|
||||
usage: python3 svg_extract.py input.svg [> pairs.txt]
|
||||
"""
|
||||
|
||||
import sys, re, html
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 2:
|
||||
sys.exit("Usage: python3 svg_extract.py file.svg")
|
||||
|
||||
with open(sys.argv[1], encoding='utf-8') as f:
|
||||
data = f.read()
|
||||
|
||||
# fast & simple regex: captures id and title in any order
|
||||
pattern = re.compile(
|
||||
r'<[^>]*?\b(id=["\'](?P<id>[^"\']+)["\'])[^>]*?\b(title=["\'](?P<title>[^"\']+)["\'])'
|
||||
r'|'
|
||||
r'<[^>]*?\b(title=["\'](?P<title2>[^"\']+)["\'])[^>]*?\b(id=["\'](?P<id2>[^"\']+)["\'])',
|
||||
re.I
|
||||
)
|
||||
|
||||
seen = set()
|
||||
for m in pattern.finditer(data):
|
||||
id_val = html.unescape(m.group('id') or m.group('id2'))
|
||||
title_val = html.unescape(m.group('title') or m.group('title2'))
|
||||
if (id_val, title_val) not in seen: # avoid duplicates
|
||||
seen.add((id_val, title_val))
|
||||
print(f"{title_val}\t{id_val}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user