好得很程序员自学网

<tfoot draggable='sEl'></tfoot>

使用python解析xml成对应的html示例分享

SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。

代码如下:


#!/usr/bin/env python
# -*- coding: utf-8 -*-
#---------------------------------------
# 程序:XML解析器
# 版本:01.0
# 作者:mupeng
# 日期:2013-12-18
# 语言:Python 2.7
# 功能:将xml解析成对应的html
# 注解:该程序用xml.sax模块的parse函数解析XML,并生成事件
# 继承ContentHandler并重写其事件处理函数
# Dispatcher主要用于相应标签的起始、结束事件的派发
#---------------------------------------
from xml.sax.handler import ContentHandler
from xml.sax import parse

class Dispatcher:
def dispatch(self, prefix, name, attrs=None):
mname = prefix + name.capitalize()
dname = 'default' + prefix.capitalize()
method = getattr(self, mname, None)
if callable(method): args = ()
else:
method = getattr(self, dname, None)
#args = name
#if prefix == 'start': args += attrs
if callable(method): method()

def startElement(self, name, attrs):
self.dispatch('start', name, attrs)

def endElement(self, name):
self.dispatch('end', name)

class Website(Dispatcher, ContentHandler):

def __init__(self):
self.fout = open('ddt_SAX.html', 'w')
self.imagein = False
self.desflag = False
self.item = False
self.title = ''
self.link = ''
self.guid = ''
self.url = ''
self.pubdate = ''
self.description = ''
self.temp = ''
self.prx = ''
def startChannel(self):

self.fout.write(''' \n \n RSS-''')

def endChannel(self):
self.fout.write('''




function GetTimeDiff(str)
{
if(str == '')
{
return '';
}

var pubDate = new Date(str);
var nowDate = new Date();
var diffMilSeconds = nowDate.valueOf()-pubDate.valueOf();
var days = diffMilSeconds/86400000;
days = parseInt(days);

diffMilSeconds = diffMilSeconds-(days*86400000);
var hours = diffMilSeconds/3600000;
hours = parseInt(hours);

diffMilSeconds = diffMilSeconds-(hours*3600000);
var minutes = diffMilSeconds/60000;
minutes = parseInt(minutes);

diffMilSeconds = diffMilSeconds-(minutes*60000);
var seconds = diffMilSeconds/1000;
seconds = parseInt(seconds);

var returnStr = "±±¾©·¢²¼Ê±¼ä£º" + pubDate.toLocaleString();

if(days > 0)
{
returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + days + "Ìì" + hours + "Сʱ" + minutes + "·ÖÖÓ£©";
}
else if (hours > 0)
{
returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + hours + "Сʱ" + minutes + "·ÖÖÓ£©";
}
else if (minutes > 0)
{
returnStr = returnStr + " £¨¾àÀëÏÖÔÚ" + minutes + "·ÖÖÓ£©";
}

return returnStr;

}

function GetSpanText()
{
var pubDate;
var pubDateArray;
var spanArray = document.getElementsByTagName("span");

for(var i = 0; i {
pubDate = spanArray[i].innerHTML;
document.getElementsByTagName("span")[i].innerHTML = GetTimeDiff(pubDate);
}
}

GetSpanText();



''')
self.fout.close()

def characters(self, chars):
if chars.strip():
#chars = chars.strip()
self.temp += chars
#print self.temp


def startTitle(self):

if self.item:
self.fout.write('''
\n \n
''')

def endTitle(self):

if not self.imagein and not self.item:
self.title = self.temp
self.temp = ''
self.fout.write(self.title.encode('gb2312'))

#self.title = self.temp
self.fout.write('''
\n \n\n \n
\n

function copyLink()
{
clipboardData.setData("Text",window.location.href);
alert("RSSÁ´½ÓÒѾ­¸´ÖƵ½¼ôÌù°å");
}

function subscibeLink()
{
var str = window.location.pathname;
while(str.match(/^\//))
{
str = str.replace(/^\//,"");
}
window.open("http://rss.sina测试数据.cn/my_sina_web_rss_news.html?url=" + str,"_self");

}
\n
\n
\n
\n
''')

if self.item:
self.title = self.temp
self.temp = ''
self.fout.write(self.title.encode('gb2312'))
self.fout.write('''





''')

def startImage(self):
self.imagein = True

def endImage(self):
self.imagein = False

def startLink(self):
if self.imagein:
self.fout.write('''

def endLink(self):
self.link = self.temp
self.temp = ''
if self.imagein:
self.fout.write(self.link.encode('gb2312'))
self.fout.write('''" target="_blank">\n ''')
elif self.item:
#self.link = self.temp
pass
else:
self.fout.write(self.link)
self.fout.write(''' " target="
_blank
"> ''')
self.fout.write(self.title.encode('gb2312'))
self.fout.write('''


''')
self.fout.write(self.description.encode('gb2312'))
self.fout.write('''

¸´ÖÆ´ËÒ³Á´½Ó ÎÒҪǶÈë¸ÃÐÂÎÅÁÐ±íµ½ÎÒµÄÒ³Ãæ£¨¼òµ¥¡¢¿ìËÙ¡¢ÊµÊ±¡¢Ãâ·Ñ£©


''')

def startUrl(self):
if self.imagein:
self.fout.write(''' def endUrl(self):
self.url = self.temp
self.temp = ''
if self.imagein:
self.fout.write(self.url.encode('gb2312'))
self.fout.write('''" border="0">\n


''')
if self.item:
#self.url = self.temp
pass

def defaultStart(self):
pass
def defaultEnd(self):
self.temp = ''
def startDescription(self):
pass
def endDescription(self):
self.description = self.temp
self.temp = ''
if self.item:
#self.fout.write('¡¡¡¡')
self.fout.write(self.description.encode('gb2312'))

def endGuid(self):
self.guid = self.temp
def endPubdate(self):
if not self.temp.startswith('http'):
self.pubdate = self.temp
self.temp = ''
else:
self.pubdate = ''
def startItem(self):
self.item = True
def endItem(self):
self.item = False
self.fout.write('''




self.fout.write(self.link)
self.fout.write(''' " target="_blank"> ''')
self.fout.write(self.guid)
self.fout.write('''




''')
self.fout.write(self.pubdate)
self.fout.write('''

''')

#程序入口
if __name__ == '__main__':
parse('ddt.xml', Website())

查看更多关于使用python解析xml成对应的html示例分享的详细内容...

  阅读:50次

CopyRight:2016-{hedonghua:year}{hedonghua:sitegs} 备案ICP:湘ICP备09009000号-16 {hedonghua:sitejym}
本站资讯不构成任何建议,仅限于个人分享,参考须谨慎!
本网站对有关资料所引致的错误、不确或遗漏,概不负任何法律责任。
本网站刊载的所有内容(包括但不仅限文字、图片、LOGO、音频、视频、软件、程序等)版权归原作者所有。任何单位或个人认为本网站中的内容可能涉嫌侵犯其知识产权或存在不实内容时,请及时通知本站,予以删除。

网站内容来源于网络分享,如有侵权发邮箱到:kenbest@126.com,收到邮件我们会即时下线处理。
网站框架支持:HDHCMS   51LA统计 百度统计
Copyright © 2018-2026 「好得很程序员自学网
[ SiteMap ]