svn commit: samba-web r564 - in trunk/scripts: .

deryck at samba.org deryck at samba.org
Thu Mar 10 21:10:52 GMT 2005


Author: deryck
Date: 2005-03-10 21:10:51 +0000 (Thu, 10 Mar 2005)
New Revision: 564

WebSVN: http://websvn.samba.org/cgi-bin/viewcvs.cgi?view=rev&root=samba-web&rev=564

Log:

Fix XML parsing errors.

deryck

Modified:
   trunk/scripts/updateNews.py


Changeset:
Modified: trunk/scripts/updateNews.py
===================================================================
--- trunk/scripts/updateNews.py	2005-03-10 19:40:40 UTC (rev 563)
+++ trunk/scripts/updateNews.py	2005-03-10 21:10:51 UTC (rev 564)
@@ -192,15 +192,24 @@
 feed.write('<description>Latest news and happenings with Samba and Samba development.</description>\n')
 feed.write('<link>http://news.samba.org/</link>\n\n')
 
+# Characters to avoid as "undefined entities" in XML
+ents = { '&mdash;' : '--', '&amp;' : 'and' }
+
 count = 10
 for date in post_dates:
+	item_text = all_stories[date]
+	if '&' in item_text and ';' in item_text:
+		for ent in ents.keys():
+			item_text = item_text.replace(ent, ents[ent])
+				
 	if count > 0:
-		title = re.search('(?<=\"\>).+(?=\<\/a)', all_stories[date])
-		link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', all_stories[date])
+		title = re.search('(?<=\"\>).+(?=\<\/a)', item_text)
+		link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', item_text)
 
-		begin = all_stories[date].find('<p>')
-		end = all_stories[date].find('</p>')
-		descrip = all_stories[date][begin:end]
+		# Index out the HTML tags for XML
+		begin = item_text.find('<p>') + 3
+		end = item_text.find('</p>') 
+		descrip = item_text[begin:end]
 
 		feed.write('<item>\n')
 		feed.write('<title>' + title.group(0) + '</title>\n')



More information about the samba-cvs mailing list