# -*- coding: utf-8 -*- # -*- author: jokker -*- import os import time import numpy as np import xml.etree.ElementTree as ET from xml.dom.minidom import parseString from lxml.etree import Element, SubElement, tostring from xml.etree.ElementTree import fromstring, ElementTree from xml.etree import ElementTree cwd=os.getcwd() def prettyXml(element, indent, newline, level=0): # elemnt为传进来的Elment类,参数indent用于缩进,newline用于换行 if element: # 判断element是否有子元素 if element.text == None or element.text.isspace(): # 如果element的text没有内容 element.text = newline + indent * (level + 1) else: element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1) # else: # 此处两行如果把注释去掉,Element的text也会另起一行 # element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level temp = list(element) # 将elemnt转成list for subelement in temp: if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最后一个元素,说明下一个行是同级别元素的起始,缩进应一致 subelement.tail = newline + indent * (level + 1) else: # 如果是list的最后一个元素, 说明下一行是母元素的结束,缩进应该少一个 subelement.tail = newline + indent * level prettyXml(subelement, indent, newline, level=level + 1) # 对子元素进行递归操作 return element class ParseXml(object): """解析 xml 中的信息,将信息导出为 xml""" def __init__(self, input_xml): self.__ttrs = {"folder", "filename", "path", "segmented", "size", "source", "object"} # 所有的属性 self.__xml_info_dict = {} # xml 信息字典 # self.__objects_info = [] # self.__size_info = {} # self.__source_info = {} self.input_xml = input_xml def get_info(self): tree = ET.parse(self.input_xml) root = tree.getroot() for name in self.__ttrs: for element in root.findall(name): print(element.text) def get_xml_info(self): #获取xml中的信息,以字典形式保存 """获取xml中所有信息,并以字典形式呈现""" xml_info = {} tree = ET.parse(self.input_xml) root = tree.getroot() for element in root.findall('folder'): xml_info["folder"]=element.text for element in root.findall('filename'): xml_info['filename'] = element.text for element in root.findall('path'): xml_info['path'] = element.text for element in root.findall('source'): for element1 in element.findall('database'): xml_info['database'] = "Unknown" for element in root.findall('size'): for element1 in element.findall('width'): xml_info['width'] = element1.text for element1 in element.findall('height'): xml_info['height'] = element1.text for element1 in element.findall('depth'): xml_info['depth'] = element1.text for element in root.findall('segmented'): xml_info['segmented'] = 0 boxes=[] for element in root.findall('object'): box={} for element1 in element.findall('name'): box['name'] = element1.text for element1 in element.findall('pose'): box['pose'] = element1.text for element1 in element.findall('truncated'): box['truncated'] = 0 for element1 in element.findall('difficult'): box['difficult'] = 0 for element1 in element.findall("bndbox"): box['bndbox']={} for element2 in element1.findall("xmin"): box['bndbox']['xmin'] = element2.text for element2 in element1.findall("ymin"): box['bndbox']['ymin'] = element2.text for element2 in element1.findall("xmax"): box['bndbox']['xmax'] = element2.text for element2 in element1.findall("ymax"): box['bndbox']['ymax'] = element2.text boxes.append(box) xml_info['object']=boxes return xml_info def write_to_xml(self,result,save_dir): """ 将目标信息写入到xml中 """ # if not os.path.exists(save_dir): # os.mkdir(save_dir) imgname=os.path.splitext(result['filename'])[0] xmlPath = os.path.join(save_dir, imgname)+".xml" root = ET.Element("annotations") ET.SubElement(root, "folder").text = result['folder'] ET.SubElement(root, "filename").text = result['filename'] ET.SubElement(root, "path").text = result['path'] size = ET.SubElement(root, "size") ET.SubElement(size, "width").text = str(result['width']) ET.SubElement(size, "height").text = str(result['height']) ET.SubElement(size, "depth").text = "3" for info in result['object']: obj = ET.SubElement(root, "object") ET.SubElement(obj, "name").text = str(info['name']) ET.SubElement(obj, "pose").text = str(info['pose']) ET.SubElement(obj, "truncated").text = str(info['truncated']) ET.SubElement(obj, "difficult").text = str(info['difficult']) bbox = ET.SubElement(obj, "bndbox") ET.SubElement(bbox, "xmin").text = str(int(float(info['bndbox']['xmin']))) ET.SubElement(bbox, "ymin").text = str(int(float(info['bndbox']['ymin']))) ET.SubElement(bbox, "xmax").text = str(int(float(info['bndbox']['xmax']))) ET.SubElement(bbox, "ymax").text = str(int(float(info['bndbox']['ymax']))) tree = ET.ElementTree(root) tree.write(xmlPath) tree = ElementTree.parse(xmlPath) root = tree.getroot() # 得到根元素,Element类 root = prettyXml(root, '\t', '\n') # 执行美化方法 # ElementTree.dump(root) # 打印美化后的结果 tree = ET.ElementTree(root) # 转换为可保存的结构 tree.write(xmlPath) # 保存美化后的结果 return xmlPath def update_node(self,xml,save_dir,note,note_value): """ 更改xml中的任意节点,并保存新的xml """ # if not os.path.exists(save_dir): # os.mkdir(save_dir) imgname = os.path.basename(xml) xmlPath = os.path.join(save_dir, imgname) tree = ElementTree.parse(xml) root = tree.getroot() # 得到根元素,Element类 try: if note in ["folder", "filename", "path"]: for element in root.findall(note): element.text = note_value elif note in ["width", "height", "depth"]: for element in root.findall("size"): for element1 in element.findall(note): element1.text = str(note_value) elif note in ["name", "pose", "truncated", "difficult"]: for element in root.findall("object"): for element1 in element.findall(note): element1.text = str(note_value) elif note in ["xmin", "xmax", "ymin", "ymax"]: for element in root.findall("object"): for element1 in element.findall("bndbox"): for element2 in element1.findall(note): element2.text = str(note_value) except Exception as error: print("修改的节点不在xml") tree.write(xmlPath) # if __name__ == "__main__": xml_path = 'C:\\Users\\Administrator\\Desktop\\1.xml' save_dir = 'C:\\Users\\Administrator\\Desktop\\merge1' xmlInfo = ParseXml(xml_path) result = xmlInfo.get_xml_info() print(result) xmlPath = xmlInfo.write_to_xml(result,save_dir) print("--*--xmlPath--*--:", xmlPath) xmlInfo.update_node(xmlPath, save_dir, "xmin", 200)