python批量处理xml文件


以下脚本功能为查出xml总结点数,并删除30%的节点,保留70%的节点数据,重新生成修改后的文件并按照原来的文件名保存。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/python
# -*- coding=utf-8 -*-
# author : kernelsky
# date: 2018-06-05
# version: 0.2

import os
import xml.etree.ElementTree as ET

def file_list(dir):
    for root, dirs, files in os.walk(dir):
        for file in files:

            File_list =  os.path.join(root,file)
            delete_xml(File_list, file)
            print File_list

def delete_xml(File_list,file):
    tree = ET.parse(File_list)
    root = tree.getroot()
    global Page_count
    for Result in root:
        for Pages in Result.iter('Pages'):
            Page_count = int(Pages.attrib["pageCount"])
    print Page_count
    Delete_num = Page_count / 3
    Baoliu_num = Page_count - Delete_num
    # print Baoliu_num
    pageNum = Page_count
    while pageNum > Baoliu_num:
        for Pages in root.iter('Pages'):
            for Page in Pages:
                pageNum = int(Page.attrib["pageNum"])
                if pageNum > Baoliu_num:
                    #print pageNum
                    Pages.remove(Page)
    tree.write(file, encoding="utf-8")

dir = "E:\\week\\ebooks"
file_list(dir)

Whatever is worth doing is worth doing well.