svg-parse

This commit is contained in:
Владислав 2025-12-22 09:02:24 +03:00
parent 5dc25a0a06
commit 93aab8ab2b
2 changed files with 33 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

33
svg-parse.py Executable file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env python3
"""
svg_extract.py dump every title + id pair found in an SVG file
usage: python3 svg_extract.py input.svg [> pairs.txt]
"""
import sys, re, html
def main():
if len(sys.argv) != 2:
sys.exit("Usage: python3 svg_extract.py file.svg")
with open(sys.argv[1], encoding='utf-8') as f:
data = f.read()
# fast & simple regex: captures id and title in any order
pattern = re.compile(
r'<[^>]*?\b(id=["\'](?P<id>[^"\']+)["\'])[^>]*?\b(title=["\'](?P<title>[^"\']+)["\'])'
r'|'
r'<[^>]*?\b(title=["\'](?P<title2>[^"\']+)["\'])[^>]*?\b(id=["\'](?P<id2>[^"\']+)["\'])',
re.I
)
seen = set()
for m in pattern.finditer(data):
id_val = html.unescape(m.group('id') or m.group('id2'))
title_val = html.unescape(m.group('title') or m.group('title2'))
if (id_val, title_val) not in seen: # avoid duplicates
seen.add((id_val, title_val))
print(f"{title_val}\t{id_val}")
if __name__ == '__main__':
main()