Python HTML / XML
HTML fra en enkel tekstfil
Programmet leser inne en enkel tekstfil og plasserer innholdet i en pre-tag på en minimalistisk HTML-side. HTML-siden ligger som en tekst i programmet.
Input fil:
frej1.txt
Output fil:
frej1.html
Python koden:
"""
This is a modul that reads a textfil
wraps it in a very simple HTML-skeleton
and produce a HTML-page
"""
#------------------------
# HTML-fragments
HTML_PAGE="""<html>
<head>
<title>a page</title>
</head>
<body>
<h1> Farbror Frej:</h1>
<pre>
%s
</pre>
</body>
</html>
"""
#-----------------------
# filenames
infile='c:\\web\\dw\\pyex\\frej1.txt'
outfile=infile.replace('.txt','.html')
#------------------------
# Read / write text files
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
def storeTextFile(filename,txt):
try:
file=open(filename,'w')
file.write(txt)
file.close()
except:
print 'Trouble writing to: '+filename
#------------------------
# do the job
def doit():
txt=getTextFile(infile)
if txt!=None:
txt=HTML_PAGE%txt
#print txt
storeTextFile(outfile,txt)
doit()
Transformasjon: CSV-XML
Modulen boktoxml gjør noen av de grunnleggende operasjonene som inngår i å lage en XML-fil fra en kommaseparert fil:
- åpner og leser en fil på en sivilisert måte
- splitter innholdet opp i linjer, og forkaster meningsløse linjer
- splitter hver linje i kommaseparete deler
- bruker delene til å produsere XML-elementer
- skriver alt tilbake til fil på en sivilisert måte
Input fil: bokliste.txt
Output fil:bokliste.xml
(hvis nettleseren din tåler det)
Pythonkoden:
"""
Transform a commaseparated (CSV) file to XML
Input data as lines:
title,author,publisher,year,isbn,pages,course,category,comment
"""
#----------------------------
# XML-skeletons
# a template for a xml-fragment
XMLFragment="""
<book isbn="%s" pages="%s">
<title>%s</title>
<course>%s</course>
<category>%s</category>
<author>%s</author>
<publisher>%s</publisher>
<year>%s</year>
<comment>%s</comment>
</book>
"""
# a template for a complete xml-file
XMLFile="""<?xml version="1.0" encoding="ISO-8859-1"?>
<booklist>
%s
</booklist>
"""
#------------------------
# Read / write text files
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
def storeTextFile(filename,txt):
try:
file=open(filename,'w')
file.write(txt)
file.close()
except:
print 'Trouble writing to: '+filename
#--------------------------------
# produce and save XML
def makeXML(filename='c:\\web\\dw\\pyex\\bokliste.txt'):
# les en text fil
text=getTextFile(filename)
if(text==''):
return
content=''
# plukk ut linjene
lines=text.split('\n')
for line in lines:
line.strip()
# drop tomme linjer og kommentarlinjer
if(len(line)<2):
continue
if(line[0:2]=='//'):
continue
# har en boklinje, finn delene
pcs=line.split(',')
if(len(pcs)!=9):
print 'ignore:' , line
continue
content+=XMLFragment%(pcs[4],pcs[5],pcs[0],pcs[6],
pcs[7],pcs[1],pcs[2],pcs[3],pcs[8])
storeTextFile(filename.replace('.txt','.xml'),XMLFile%content)
makeXML()
Transformasjon: CSV-HTML
Modulen boktohtml tar den samme tekst-fila som i eksempelet ovenfor og transformerer den til en htmlfil som viser en liste av bøker med forfatter
Input fil: bokliste.txt
Output fil: bokliste.html
Python koden:
# transform a commaseparated (CSV) file to HTML
"""
Transform a commaseparated (CSV) file to HTML
Input data as lines:
title,author,publisher,year,isbn,pages,course,category,comment
"""
#----------------------------
# HTML-skeletons
# a template for a complete html-file
HTMLFile="""<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta HTTP-EQUIV="Content-Type"
content="text/html;
charset=iso-8859-1">
<title>books</title>
<style>
li{margin-top:10;}
.fat{font-weight:bold; color:red}
</style>
</head>
<body>
<h1>Bokliste</h1>
<ul>
%s
</ul>
</body>
</html>
"""
# a template for a html-fragment, one author
HTMLFragment="""
<li>
<div class="fat">%s</div>
<div>%s</div>
</li>
"""
#------------------------
# Read / write text files
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
def storeTextFile(filename,txt):
try:
file=open(filename,'w')
file.write(txt)
file.close()
except:
print 'Trouble writing to: '+filename
#--------------------------------
# produce and save HTML
def makeHTML(filename='c:\\web\\dw\\pyex\\bokliste.txt'):
# read the input file
text=getTextFile(filename)
if (text==None) or (text==''):
return
content=''
# pick up lines
lines=text.split('\n')
for line in lines:
line.strip()
# drop too short lines
if(len(line)<2):
continue
# drop commentlines
if(line[0:2]=='//'):
continue
# We have a line , find elements
pcs=line.split(',')
# acceptable ?
if(len(pcs)!=9):
print 'ignoring:' , line
continue
content+=HTMLFragment%(pcs[0].strip(),pcs[1].strip())
storeTextFile(filename.replace('.txt','.html'),
HTMLFile%content)
makeHTML()
Transformasjon: XML-HTML
Modulen bokxmltohml tar xml-fila og transformerer den til html.
Input fil: bokliste.xml
(hvis nettleseren din tåler det)
Output fil: boklistefromxml.html
Pythonkode:
import xml.dom.minidom
"""
Produce a HTML-file from a XML-file
Input:
<booklist>
<book isbn="txt" pages="txt">
<title>txt</title>
<course>txt</course>
<category>txt</category>
<author>txt</author>
<publisher>txt</publisher>
<year>txt</year>
<comment>txt</comment>
</book>
...
</booklist>
Usage: produceHTML(filename)
"""
#--------------------------------------
# skeleton for HTML-file
HTMLFile="""<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<META http-equiv="Content-Type" content="text/html;
charset=iso-8859-1\">
<title>bokliste</title>
<!-- produced by python -->
</head>
<body>
<h2>Litteraturliste</h2>
%s
</body>
</html>
"""
#------------------------
# Read / write text files
def getTextFile(filename):
try:
file=open(filename,'r')
res=file.read()
file.close()
return res
except:
print 'Trouble reading: '+filename
return None
def storeTextFile(filename,txt):
try:
file=open(filename,'w')
file.write(txt)
file.close()
except:
print 'Trouble writing to: '+filename
#------------------------------------
# collect all text in a node
def getText(nodelist):
rc = ''
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
t=node.data.encode('ISO-8859-1')
rc += t
return rc
def getStrippedText(nodelist):
rc = ''
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
t=node.data
t=t.strip()
t=node.data.encode('ISO-8859-1')
if t!='\n':
rc += t.strip()
return rc
#----------------------------------------
# Produce a fragment for one book
def makeBook(q):
result=q.getElementsByTagName('title')[0]
T='<h3>%s</h3>\n'%getText(result.childNodes)
T+='<div>isbn:%s</div>\n'%q.getAttribute('isbn').encode('ISO-8859-1')
result=q.getElementsByTagName('course')[0]
T+='<div>%s</div>\n'%getText(result.childNodes)
result=q.getElementsByTagName('category')[0]
T+='<div>%s</div>\n'%getText(result.childNodes)
result=q.getElementsByTagName('author')[0]
T+='<div>%s</div>\n'%getText(result.childNodes)
result=q.getElementsByTagName('publisher')[0]
T+='<div>%s</div>\n'%getText(result.childNodes)
result=q.getElementsByTagName('year')[0]
T+='<div>%s</div>\n'%getText(result.childNodes)
result=q.getElementsByTagName('comment')[0]
T+='<div>%s</div>\n'%getText(result.childNodes)
return T
#----------------------------------
# Produce the entire file and save it
def produceHTML(infile='c:\\web\\dw\\pyex\\bokliste.xml'):
# load xml-file
document=getTextFile(infile)
if document==None:
return
# and establish DOM-tree
dom = xml.dom.minidom.parseString(document)
# get a list of all books
bliste=dom.getElementsByTagName('book')
# doing the books
T=''
for b in bliste:
T+=makeBook(b)
# write the file
storeTextFile(infile.replace('.xml','fromxml.html'),HTMLFile%T)
# clean up
dom.unlink()
produceHTML()
Du kan sammenligne denne Pythonkoden med en XSLT-fil som gjør samme jobben:
<?xml version="1.0" encoding="ISO-8859-1"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml"
omit-xml-declaration="no"
indent="yes"
doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
encoding="ISO-8859-1"/>
<xsl:template match="/">
<html>
<head>
<title>bokliste</title>
</head>
<body>
<h1>Litteratur</h1>
<xsl:apply-templates select="booklist/book"/>
</body>
</html>
</xsl:template>
<xsl:template match="book">
<h3><xsl:value-of select="title"/></h3>
<div><xsl:value-of select="author"/></div>
<div><xsl:value-of select="publisher"/>,
<xsl:value-of select="year"/></div>
<div>Isbn: <xsl:value-of select="@isbn"/></div>
<div class="kommentar"><xsl:value-of select="comment"/></div>
</xsl:template>
</xsl:stylesheet>