svn commit: samba-web r564 - in trunk/scripts: .
deryck at samba.org
deryck at samba.org
Thu Mar 10 21:10:52 GMT 2005
Author: deryck
Date: 2005-03-10 21:10:51 +0000 (Thu, 10 Mar 2005)
New Revision: 564
WebSVN: http://websvn.samba.org/cgi-bin/viewcvs.cgi?view=rev&root=samba-web&rev=564
Log:
Fix XML parsing errors.
deryck
Modified:
trunk/scripts/updateNews.py
Changeset:
Modified: trunk/scripts/updateNews.py
===================================================================
--- trunk/scripts/updateNews.py 2005-03-10 19:40:40 UTC (rev 563)
+++ trunk/scripts/updateNews.py 2005-03-10 21:10:51 UTC (rev 564)
@@ -192,15 +192,24 @@
feed.write('<description>Latest news and happenings with Samba and Samba development.</description>\n')
feed.write('<link>http://news.samba.org/</link>\n\n')
+# Characters to avoid as "undefined entities" in XML
+ents = { '—' : '--', '&' : 'and' }
+
count = 10
for date in post_dates:
+ item_text = all_stories[date]
+ if '&' in item_text and ';' in item_text:
+ for ent in ents.keys():
+ item_text = item_text.replace(ent, ents[ent])
+
if count > 0:
- title = re.search('(?<=\"\>).+(?=\<\/a)', all_stories[date])
- link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', all_stories[date])
+ title = re.search('(?<=\"\>).+(?=\<\/a)', item_text)
+ link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', item_text)
- begin = all_stories[date].find('<p>')
- end = all_stories[date].find('</p>')
- descrip = all_stories[date][begin:end]
+ # Index out the HTML tags for XML
+ begin = item_text.find('<p>') + 3
+ end = item_text.find('</p>')
+ descrip = item_text[begin:end]
feed.write('<item>\n')
feed.write('<title>' + title.group(0) + '</title>\n')
More information about the samba-cvs
mailing list