svg-parse
This commit is contained in:
parent
5dc25a0a06
commit
93aab8ab2b
33
svg-parse.py
Executable file
33
svg-parse.py
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
svg_extract.py – dump every title + id pair found in an SVG file
|
||||||
|
usage: python3 svg_extract.py input.svg [> pairs.txt]
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys, re, html
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
sys.exit("Usage: python3 svg_extract.py file.svg")
|
||||||
|
|
||||||
|
with open(sys.argv[1], encoding='utf-8') as f:
|
||||||
|
data = f.read()
|
||||||
|
|
||||||
|
# fast & simple regex: captures id and title in any order
|
||||||
|
pattern = re.compile(
|
||||||
|
r'<[^>]*?\b(id=["\'](?P<id>[^"\']+)["\'])[^>]*?\b(title=["\'](?P<title>[^"\']+)["\'])'
|
||||||
|
r'|'
|
||||||
|
r'<[^>]*?\b(title=["\'](?P<title2>[^"\']+)["\'])[^>]*?\b(id=["\'](?P<id2>[^"\']+)["\'])',
|
||||||
|
re.I
|
||||||
|
)
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
for m in pattern.finditer(data):
|
||||||
|
id_val = html.unescape(m.group('id') or m.group('id2'))
|
||||||
|
title_val = html.unescape(m.group('title') or m.group('title2'))
|
||||||
|
if (id_val, title_val) not in seen: # avoid duplicates
|
||||||
|
seen.add((id_val, title_val))
|
||||||
|
print(f"{title_val}\t{id_val}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user