svn commit: samba-web r1001 - in trunk/scripts: .

Tue Jun 13 22:10:37 GMT 2006

Author: deryck
Date: 2006-06-13 22:10:34 +0000 (Tue, 13 Jun 2006)
New Revision: 1001

WebSVN: http://websvn.samba.org/cgi-bin/viewcvs.cgi?view=rev&root=samba-web&rev=1001

Log:
news.samba.org is on jump now so we only need
to pull headlines for samba.org and mirrors.

deryck

Modified:
   trunk/scripts/updateNews.py


Changeset:
Modified: trunk/scripts/updateNews.py
===================================================================

--- trunk/scripts/updateNews.py	2006-06-13 18:24:56 UTC (rev 1000)
+++ trunk/scripts/updateNews.py	2006-06-13 22:10:34 UTC (rev 1001)
@@ -1,246 +1,12 @@
 #! /usr/bin/python
 
-# Copyright (C) 2004 by Deryck Hodge <deryck at samba.org>
-# 
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 2 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
-# USA
+""" Get headlines froms news.samba.org for inclusion on samba.org """
 
-""" updateNews.py -- a script for handling files on news.samba.org."""
+from urllib import urlopen
 
-import os, time, re
-from stat import ST_MTIME
+html = urlopen('http://news.samba.org/headlines/').read()
 
-top_dir = '/data/httpd/html/samba/news'      # set to news directory path
-not_news = ['.svn', 'images', 'style', 'calendar', 'index.html', 
-                'articles', 'static', 'js']
-
-
-# Get list of news directories.  Then, pair up dir name with dir files.
-os.chdir(top_dir)
-topics = []                
-
-for file in os.listdir(os.curdir):
-    if file in not_news: continue
-    if os.path.isdir(file):
-        topics.append(file)
-topics.sort()
-
-topics_files = {}
-for topic in topics:
-    topics_files[topic] = os.listdir(topic)
-    
-
-# Write list of topics to 'sections.html'
-sections = open('sections.html', 'w')
-sections.write('<ul>')
-for topic in topics:
-    sections.write('<li><a href="/samba/news/' + topic + '/">' + topic + '/</a></li>')
-sections.write('</ul>')
-sections.close()
-
-
-# Define function for converting date tuple to string
-def date_to_str((year, mn, dy)):
-    mn_name = ('', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December')
-
-    return str(dy) + ' ' + mn_name[mn] + ' ' + str(year)
-
-
-# Loop through each directory, find all stories, and create main index.html
-# Store filename/path info for search engine while we're here
-all_stories = {}
-search_index = {}
-for topic in topics:        
-    os.chdir(topic)
-    
-    for file in os.listdir(os.curdir):
-        if file in not_news: continue
-        f_lines = open(file, 'r').readlines()
-        story = "".join(f_lines) + '<div class="reference">Link: <a href="/samba/news/' + topic + '/#' + file[:-5] + '">' + topic + '/</a></div>\n\n'
-        f_date = os.stat(file)[ST_MTIME]
-        all_stories[f_date] = story
-
-        # Search engine setup.
-        search_index[file[:-5]] = '/samba/news/' + topic + '/' + file
-
-    os.chdir(top_dir)
-
-post_dates = all_stories.keys()
-post_dates.sort()
-post_dates.reverse()
-
-index = open('index.html', 'w')
-index.write('<!--#include virtual="/samba/news/header.html" -->\n')
-index.write('<title>news.samba.org</title>\n')
-index.write('<!--#include virtual="/samba/news/header2.html" -->\n\n')
-index.close()
-
-# Only list the 40 most recent stories on the main news page
-count = 40
-for date in post_dates:
-
-    if count > 0:
-        news_text = all_stories[date]
-        h2date = date_to_str(time.gmtime(date)[:3])
-        index = open('index.html', 'a')
-        if open('index.html', 'r').read().find('<h2>' + h2date + '</h2>\n\n') >= 0:
-            index.write(news_text)
-        else:    
-            index.write('<h2>' + h2date + '</h2>\n\n')
-            index.write(news_text)
-        index.close()
-        count = count - 1
-
-index = open('index.html', 'a')
-index.write('<!--#include virtual="/samba/news/footer.html" -->\n\n')
-index.close()
-
-# Create the search index table for javascript
-os.chdir(top_dir + '/js')
-js = open('searchIndex.js', 'w')
-js.write('/*\n')
-js.write('   This file is auto-generated.  Do not edit.\n')
-js.write('*/\n\n')
-js.write('sIndex = {};\n')
-for filename in search_index.keys():
-    js.write('sIndex[\'' + filename + '\'] = \'' + search_index[filename] + '\';\n')
-#js.write('}\n')
-js.close()
-
-os.chdir(top_dir)
-
-# Define function that creates index.html for each directory.
-def archive(dir, files):
-    topic = dir
-    os.chdir(topic)
-    filelist = files
-    
-    stories_by_date = {}
-    
-    for file in filelist:
-        if file in not_news: continue
-        f_lines = open(file, 'r').readlines()
-        f_date = os.stat(file)[ST_MTIME]
-        stories_by_date[f_date] = f_lines
-
-    index = open('index.html', 'w')
-    index.write('<!--#include virtual="/samba/news/header.html" -->\n')
-    index.write('<title>' + topic + '/' + ' on news.samba.org</title>\n')
-    index.write('<!--#include virtual="/samba/news/header2.html" -->\n\n')
-    index.write('<h1>' + topic + ' archive on news.samba.org</h1>\n\n')
-    index.write('<p>All stories for the ' + topic + ' topic are archived here</p>\n\n')
-    index.close()
-    
-    post_dates = stories_by_date.keys()
-    post_dates.sort()
-    post_dates.reverse()
-
-    for date in post_dates:
-        news_text = "".join(stories_by_date[date])
-        h2date = date_to_str(time.gmtime(date)[:3])
-        index = open('index.html', 'a')
-        if open('index.html', 'r').read().find('<h2>' + h2date + '</h2>\n\n') >= 0:
-            index.write(news_text)
-        else:
-            index.write('<h2>' + h2date + '</h2>\n\n')
-            index.write(news_text)
-        index.close()
-
-    index = open('index.html', 'a')
-    index.write('<!--#include virtual="/samba/news/footer.html" -->\n\n')
-    index.close
-
-    os.chdir(top_dir)
-
-
-# Loop through each subdirectory, creating an index.html file.
-for topic in topics_files.keys():
-    archive(topic, topics_files[topic])
-
-
-# Create headlines for samba.org from last ten news items
-all_news = {}
-for file in topics_files.keys():
-    os.chdir(file)
-    for this_file in topics_files[file]:
-        if this_file in not_news:
-            continue
-        else:
-            all_news[os.stat(this_file)[ST_MTIME]] = open(this_file, 'r').readlines()
-    os.chdir(top_dir)
-    
-news_dates = all_news.keys()
-news_dates.sort()
-news_dates.reverse()
-
-news_for_headlines = {}
-for date in news_dates:
-    for line in all_news[date]: 
-        if line.find('<h3>') > -1 and len(news_for_headlines) < 10:
-            # Search for text between quotes
-            link = re.search('(?<=\")\S+(?=\")', line)
-            # Search for text between > and </a
-            title = re.search('(?<=\"\>).+(?=\<\/a)', line)
-            news_for_headlines[date] = (link.group(0), title.group(0))
-
-headline_dates = news_for_headlines.keys()
-headline_dates.sort()
-headline_dates.reverse()
-
-headlines = open('headlines.html', 'w')
-headlines.write('<ul class="news">\n')
-for date in headline_dates:
-    headlines.write('<li>' + date_to_str(time.gmtime(date)[:3]) + ' <a href="/samba/news/#' + news_for_headlines[date][0] + '">' + news_for_headlines[date][1] + '</a></li>\n')
-headlines.write('</ul>\n')
+headlines = open('/data/httpd/html/samba/news/headlines.html', 'w')
+headlines.write(html)
 headlines.close()
 
-
-# Create an rss feed
-feed = open('sambanews.xml', 'w')
-feed.write('<?xml version="1.0"?>\n') 
-feed.write('<rss version="2.0">\n\n')
-feed.write('<channel>\n\n')
-feed.write('<title>news.samba.org</title>\n')
-feed.write('<description>Latest news and happenings with Samba and Samba development.</description>\n')
-feed.write('<link>http://news.samba.org/</link>\n\n')
-
-count = 10
-for date in post_dates:
-    item_text = all_stories[date]
-
-    if count > 0:
-        title = re.search('(?<=\"\>).+(?=\<\/a)', item_text)
-        text =  re.search('<div class=\"article\"\>(\s|.)*?</div>', item_text)
-        link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\"/samba/news/).+(?=\"\>)', item_text)
-        pub_date = time.asctime(time.gmtime(date))
-
-        # Drop end tag unless nested divs were used
-        if text.group()[21:].find('<div') > 0:
-            description = text.group()[21:]
-        else:
-            description =  text.group()[21:-6]
-
-        feed.write('<item>\n')
-        feed.write('<title><![CDATA[' + title.group() + ']]></title>\n')
-        feed.write('<description><![CDATA[' + description + ']]></description>\n')
-        feed.write('<link>http://news.samba.org/' + link.group() + '</link>\n')
-        feed.write('<pubDate>' + pub_date + '</pubDate>')
-        feed.write('</item>\n\n')
-        count = count - 1
-
-feed.write('</channel>\n')
-feed.write('</rss>\n')
-feed.close()
-