Files
ieemoo-ai-zhanting/parseXml.py
huangtao 4bb117c407 1.3
2022-07-01 14:19:10 +08:00

192 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
# -*- author: jokker -*-
import os
import time
import numpy as np
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString
from lxml.etree import Element, SubElement, tostring
from xml.etree.ElementTree import fromstring, ElementTree
from xml.etree import ElementTree
cwd=os.getcwd()
def prettyXml(element, indent, newline, level=0): # elemnt为传进来的Elment类参数indent用于缩进newline用于换行
if element: # 判断element是否有子元素
if element.text == None or element.text.isspace(): # 如果element的text没有内容
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# else: # 此处两行如果把注释去掉Element的text也会另起一行
# element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # 将elemnt转成list
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最后一个元素说明下一个行是同级别元素的起始缩进应一致
subelement.tail = newline + indent * (level + 1)
else: # 如果是list的最后一个元素 说明下一行是母元素的结束,缩进应该少一个
subelement.tail = newline + indent * level
prettyXml(subelement, indent, newline, level=level + 1) # 对子元素进行递归操作
return element
class ParseXml(object):
"""解析 xml 中的信息,将信息导出为 xml"""
def __init__(self, input_xml):
self.__ttrs = {"folder", "filename", "path", "segmented", "size", "source", "object"} # 所有的属性
self.__xml_info_dict = {} # xml 信息字典
# self.__objects_info = []
# self.__size_info = {}
# self.__source_info = {}
self.input_xml = input_xml
def get_info(self):
tree = ET.parse(self.input_xml)
root = tree.getroot()
for name in self.__ttrs:
for element in root.findall(name):
print(element.text)
def get_xml_info(self): #获取xml中的信息以字典形式保存
"""获取xml中所有信息并以字典形式呈现"""
xml_info = {}
tree = ET.parse(self.input_xml)
root = tree.getroot()
for element in root.findall('folder'):
xml_info["folder"]=element.text
for element in root.findall('filename'):
xml_info['filename'] = element.text
for element in root.findall('path'):
xml_info['path'] = element.text
for element in root.findall('source'):
for element1 in element.findall('database'):
xml_info['database'] = "Unknown"
for element in root.findall('size'):
for element1 in element.findall('width'):
xml_info['width'] = element1.text
for element1 in element.findall('height'):
xml_info['height'] = element1.text
for element1 in element.findall('depth'):
xml_info['depth'] = element1.text
for element in root.findall('segmented'):
xml_info['segmented'] = 0
boxes=[]
for element in root.findall('object'):
box={}
for element1 in element.findall('name'):
box['name'] = element1.text
for element1 in element.findall('pose'):
box['pose'] = element1.text
for element1 in element.findall('truncated'):
box['truncated'] = 0
for element1 in element.findall('difficult'):
box['difficult'] = 0
for element1 in element.findall("bndbox"):
box['bndbox']={}
for element2 in element1.findall("xmin"):
box['bndbox']['xmin'] = element2.text
for element2 in element1.findall("ymin"):
box['bndbox']['ymin'] = element2.text
for element2 in element1.findall("xmax"):
box['bndbox']['xmax'] = element2.text
for element2 in element1.findall("ymax"):
box['bndbox']['ymax'] = element2.text
boxes.append(box)
xml_info['object']=boxes
return xml_info
def write_to_xml(self,result,save_dir):
"""
将目标信息写入到xml中
"""
# if not os.path.exists(save_dir):
# os.mkdir(save_dir)
imgname=os.path.splitext(result['filename'])[0]
xmlPath = os.path.join(save_dir, imgname)+".xml"
root = ET.Element("annotations")
ET.SubElement(root, "folder").text = result['folder']
ET.SubElement(root, "filename").text = result['filename']
ET.SubElement(root, "path").text = result['path']
size = ET.SubElement(root, "size")
ET.SubElement(size, "width").text = str(result['width'])
ET.SubElement(size, "height").text = str(result['height'])
ET.SubElement(size, "depth").text = "3"
for info in result['object']:
obj = ET.SubElement(root, "object")
ET.SubElement(obj, "name").text = str(info['name'])
ET.SubElement(obj, "pose").text = str(info['pose'])
ET.SubElement(obj, "truncated").text = str(info['truncated'])
ET.SubElement(obj, "difficult").text = str(info['difficult'])
bbox = ET.SubElement(obj, "bndbox")
ET.SubElement(bbox, "xmin").text = str(int(float(info['bndbox']['xmin'])))
ET.SubElement(bbox, "ymin").text = str(int(float(info['bndbox']['ymin'])))
ET.SubElement(bbox, "xmax").text = str(int(float(info['bndbox']['xmax'])))
ET.SubElement(bbox, "ymax").text = str(int(float(info['bndbox']['ymax'])))
tree = ET.ElementTree(root)
tree.write(xmlPath)
tree = ElementTree.parse(xmlPath)
root = tree.getroot() # 得到根元素Element类
root = prettyXml(root, '\t', '\n') # 执行美化方法
# ElementTree.dump(root) # 打印美化后的结果
tree = ET.ElementTree(root) # 转换为可保存的结构
tree.write(xmlPath) # 保存美化后的结果
return xmlPath
def update_node(self,xml,save_dir,note,note_value):
"""
更改xml中的任意节点并保存新的xml
"""
# if not os.path.exists(save_dir):
# os.mkdir(save_dir)
imgname = os.path.basename(xml)
xmlPath = os.path.join(save_dir, imgname)
tree = ElementTree.parse(xml)
root = tree.getroot() # 得到根元素Element类
try:
if note in ["folder", "filename", "path"]:
for element in root.findall(note):
element.text = note_value
elif note in ["width", "height", "depth"]:
for element in root.findall("size"):
for element1 in element.findall(note):
element1.text = str(note_value)
elif note in ["name", "pose", "truncated", "difficult"]:
for element in root.findall("object"):
for element1 in element.findall(note):
element1.text = str(note_value)
elif note in ["xmin", "xmax", "ymin", "ymax"]:
for element in root.findall("object"):
for element1 in element.findall("bndbox"):
for element2 in element1.findall(note):
element2.text = str(note_value)
except Exception as error:
print("修改的节点不在xml")
tree.write(xmlPath)
#
if __name__ == "__main__":
xml_path = 'C:\\Users\\Administrator\\Desktop\\1.xml'
save_dir = 'C:\\Users\\Administrator\\Desktop\\merge1'
xmlInfo = ParseXml(xml_path)
result = xmlInfo.get_xml_info()
print(result)
xmlPath = xmlInfo.write_to_xml(result,save_dir)
print("--*--xmlPath--*--:", xmlPath)
xmlInfo.update_node(xmlPath, save_dir, "xmin", 200)