用户:Shizhao/dykcode

#!/usr/bin/python
# -*- coding: utf-8  -*-

import os, sys, re
import wikipedia, query, datetime, BeautifulSoup

mysite = wikipedia.getSite()

dyktext=u'{{Template:Dyk}}'
dyktitle=u'Template:Dyk'
rsstitle=u'Template:Dyk/rss'
rsspage=wikipedia.Page(mysite,rsstitle)
dykpage=wikipedia.Page(mysite,dyktitle)

params = {
    'action'    :'parse',
    'text'      :dyktext,
    }    
text = query.GetData(params, encodeTitle = False)[u'parse'][u'text'][u'*']

soup = BeautifulSoup.BeautifulSoup(text) 

image=u''
for i in soup:
    try:
        if i.name == 'ul':
            if i.small <> None:
                i.small.extract()
                image=unicode(soup.div.renderContents())
            else:
                image=u''
                
            r=ur'<li>(?P<li>.*?)</li>'

            for m in re.finditer(r,unicode(i.li),re.I):
                mm = m.groupdict()
            #xtitle=''.join(i.findAll(text=True))
            href=u'{{subst:fullurl:%s}}' % i.b.a['title']
         #   text=u'<p><div style="background:#F9F9F9; padding:10px; margin: auto 1em; width: 50%; border:1px solid #AAAAAA;">'+unicode(i.li)+u'</div></p>'
            rtext= image+u'<p>你知道吗?</p>\n<p>'+mm['li'] + u'</p><p><a href=\"'+href+ u'\">阅读条目全文 >>></a></p>'
            wikitext=u'<title>新条目推荐:%s</title>\n<link>%s</link>\n<guid>%s</guid>\n<description>%s</description>\n<pubDate>{{subst:#time:r}}</pubDate>\n<dc:creator>中文维基百科编者</dc:creator></item>\n' %(i.b.a['title'], href, href, rtext)
            rsstext=rsspage.get(force=True)
            ritem=rsstext.split(u'<item>')
            if i.b.a['title'] not in rsstext:
                if len(ritem)<7:
                    ritem.insert(1, wikitext) 
                    s = u'<item>'.join(ritem)
                else:
                    ritem.insert(1, wikitext) 
                    ritem.pop()
                    s = u'<item>'.join(ritem)
                    s=s+u'\n</channel>\n</rss>'
                rsspage.put(s, u'Bot:你知道吗rss更新: [[%s]]' % i.b.a['title'])
            else:
                print "DYK no update."
    except AttributeError, KeyError:
        pass

wikipedia.stopme()