I managed to use xml.etree.ElementTree to parse xml, search for content, and then write it to another xml. However, I just worked with the text inside the same tag.
import os, sys, glob, xml.etree.ElementTree as ET
path = r"G:\\63D RRC GIS Data\\metadata\\general\\2010_contract"
for fn in os.listdir(path):
filepaths = glob.glob(path + os.sep + fn + os.sep + "*overall.xml")
for filepath in filepaths:
(pa, filename) = os.path.split(filepath)
root = ET.parse(pa + os.sep + "archive" + os.sep + "base_metadata_overall.xml").getroot()
iterator = root.getiterator()
for item in iterator:
if item.tag == "abstract":
correct_abstract = item.text
root2 = ET.parse(pa + os.sep + "base_metadata_overall.xml").getroot()
iterator2 = root2.getiterator("descript")
for item in iterator2:
if item.tag == "abstract":
old_abstract = item.find("abstract")
old_abstract_text = old_abstract.text
item.remove(old_abstract)
new_symbol_abstract = ET.SubElement(item, "title")
new_symbol_abstract.text = correct_abstract
tree = ET.ElementTree(root2)
tree.write(pa + os.sep + "base_metadata_overall.xml")
print "created --- " + filename + " metadata"
But now I need:
1) find xml and take everything between the attr tags, below is an example:
<attr><attrlabl Sync="TRUE">OBJECTID</attrlabl><attalias Sync="TRUE">ObjectIdentifier</attalias><attrtype Sync="TRUE">OID</attrtype><attwidth Sync="TRUE">4</attwidth><atprecis Sync="TRUE">0</atprecis><attscale Sync="TRUE">0</attscale><attrdef Sync="TRUE">Internal feature number.</attrdef></attr>
2) Now I need to open another xml and find all the contents between the same attr tag and replace it above.
Basically, what I did before, but ignoring the subelements, attributes, ect ... between the attr tags and treat them as text.
thank!!
Please carry me, this forum is a little different (post), and then I used!
Here is what I still have:
import os, sys, glob, re, xml.etree.ElementTree as ET
from lxml import etree
path = r"C:\\temp\\python\\xml"
for fn in os.listdir(path):
filepaths = glob.glob(path + os.sep + fn + os.sep + "*overall.xml")
for filepath in filepaths:
(pa, filename) = os.path.split(filepath)
xml = open(pa + os.sep + "attributes.xml")
xmltext = xml.read()
correct_attrs = re.findall("<attr> (.*?)</attr>",xmltext,re.DOTALL)
for item in correct_attrs:
correct_attribute = "<attr>" + item + "</attr>"
xml2 = open(pa + os.sep + "base_metadata_overall.xml")
xmltext2 = xml2.read()
old_attrs = re.findall("<attr>(.*?)</attr>",xmltext,re.DOTALL)
for item2 in old_attrs:
old_attribute = "<attr>" + item + "</attr>"
old = etree.fromstring(old_attribute)
replacement = new.xpath('//attr')
for attr in old.xpath('//attr'):
attr.getparent().replace(attr, copy.deepcopy(replacement))
print lxml.etree.tostring(old)
, . , , .xml
, # of attr is diff. dest, , ?
node= replacements.pop()
IndexError:
import os, sys, glob, re, copy, lxml, xml.etree.ElementTree as ET
from lxml import etree
path = r"C:\\temp\\python\\xml"
for fn in os.listdir(path):
filepaths = glob.glob(path + os.sep + fn + os.sep + "*overall.xml")
for filepath in filepaths:
xmlatributes = open(pa + os.sep + "attributes.xml")
xmlatributes_txt = xmlatributes.read()
xmltarget = open(pa + os.sep + "base_metadata_overall.xml")
xmltarget_txt = xmltarget.read()
source = lxml.etree.fromstring(xmlatributes_txt)
dest = lxml.etree.fromstring(xmltarget_txt)
replacements = source.xpath('//attr')
replacements.reverse()
for attr in dest.xpath('//attr'):
node = replacements.pop()
attr.getparent().replace(attr, copy.deepcopy(node))
tree = ET.ElementTree(dest)
tree.write (pa + os.sep + "edited_metadata.xml")
print fn + "--- sucessfully edited"
5/16/2011
, "IndexError: pop from empty list", . , "attr" 1 1. . .xml 20 attr, .xml 25 attr. 1 1 .
, attr, attr. "", , attr, "" .
, .
import os, sys, glob, re, copy, lxml, xml.etree.ElementTree as ET
from lxml import etree
path = r"G:\\63D RRC GIS Data\\metadata\\general\\2010_contract"
for fn in os.listdir(path):
correct_title = fn.replace ('_', ' ') + " various facilities"
correct_fc_name = fn.replace ('_', ' ')
filepaths = glob.glob(path + os.sep + fn + os.sep + "*overall.xml")
for filepath in filepaths:
print "-----" + fn + "-----"
(pa, filename) = os.path.split(filepath)
xmlatributes = open(pa + os.sep + "attributes.xml")
xmlatributes_txt = xmlatributes.read()
xmltarget = open(pa + os.sep + "base_metadata_overall.xml")
xmltarget_txt = xmltarget.read()
source = lxml.etree.fromstring(xmlatributes_txt)
dest = lxml.etree.fromstring(xmltarget_txt)
replacements = source.xpath('//attr')
replacesubtypes = source.xpath('//subtype')
subtype_true_f = len(replacesubtypes)
attrtag = dest.xpath('//attr')
num_realatrs = len(replacements)
for n in attrtag:
n.getparent().remove(n)
print n.tag + " removed"
detailedtag = dest.xpath('//detailed')
for n2 in detailedtag:
pos = 0
for realatrs in replacements:
n2.insert(pos + 1, realatrs)
print "attr replaced"
if subtype_true_f >= 1:
for realsubtypes in replacesubtypes:
n2.insert(num_realatrs + 1, realsubtypes)
print "subtype replaced"
tree = ET.ElementTree(dest)
tree.write (pa + os.sep + "base_metadata_overall_v2.xml")
print fn + "--- sucessfully edited"