User:The wubbot/source code

~/wn_stats2/make_stats.sh

#!/bin/bash
 
#downloads statistics
#figures out what is relevent to wikinews
 
cd ~/wn_stats2/
 
check_if_there () {
#takes a date format string that equals url of stats, and a relative date/time.
#checks http status code
#returns 0 for 200, 1 for 404 or redirect, and exits shell script for anything else

if ( HEAD -H 'From: thewub.wiki@googlemail.com' $(date -d "$2" -u  "$1") -S -d | egrep ' 301 | 404 ' --quiet )
then
	# got redirected or 404'd, file not there. try the next
	return 1
else
	# yay, file! (we think)
	return 0
fi
}
 
get_and_make() {

#takes a date format string that equals url of stats, and a relative date/time

# Add times to the tempfile for the Python script to use
date -u -d '1 hour ago' +%H > temp.txt	# HOURSTART
date -u +%H >> temp.txt			# HOUREND

# Get wikistats file, grep en.wikinews lines, rm some wrong namespace and interwikis
# Sort by hits, take top 40 (should be plenty) and append to tempfile
wget `date -d "$2" -u  "$1"`  -q --header\='From: thewub.wiki_AT_googlemail.com' -O - \
|zgrep '^en\.n' \
|awk '-F ' '{if ($2 !~ /(^Main_Page)|(^Talk:)|(^User:)|(^User_talk:)|(^Wikinews:)|(^Wikinews_talk:)|(^Template:)|(^Template talk:)|(^Portal:)|(^Portal talk:)|(^Category:)|(^Category talk:)|(^File:)|(^File talk:)|(^Special:)|(^..:)|(^w:)|(^Http:)/) print $3, $2}' \
|sort -g -r \
|head -n 40 \
>> temp.txt

# Pass baton to the Python script, which narrows down to published articles and updates the page
python wn_stats.py

}


# try each of these until we get one
 
if check_if_there '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0000.gz' now
then
get_and_make '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0000.gz' now

elif check_if_there '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0001.gz' now # sometimes files are a minute late
then
get_and_make '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0001.gz' now
 
elif check_if_there '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0000.gz' '1 hour ago'
then
get_and_make '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0000.gz' '1 hour ago'

elif check_if_there '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0001.gz' '1 hour ago'
then
get_and_make '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0001.gz' '1 hour ago'
 
else # none of them worked :(
exit 2
fi

~/wn_stats2/wn_stats.py

# -*- coding: utf-8 -*-
import codecs
import sys
import os
sys.path.append('/home/the_wub/pywikipedia')
os.chdir('/home/the_wub/wn_stats2')

import wikipedia
site = wikipedia.getSite('en', 'wikinews')

popularpage = wikipedia.Page(site, 'Template:Popular articles')
infile = codecs.open('temp.txt', 'r', encoding='utf-8')

num = 15 # number of top results to get

try:
    # get the times from start of file (and cut off newlines)
    hourstart = infile.readline()[:2]
    hourend = infile.readline()[:2]

    # trim to list of published pages in form [hits, page]
    l = []
    while len(l) < num:
        x = infile.readline().split()
        page = wikipedia.Page(site, x[1])
        if page.exists() and (page.isRedirectPage() == False):
            if ('Publish' in page.templates() or 'publish' in page.templates() or
                'Published' in page.templates() or 'published' in page.templates()):
                l.append([x[0], page])

    # prepare wikitext
    wikitext =  '<noinclude>{{/top|' + hourstart + '}}</noinclude>\n'
    for n in range(len(l)):
        wikitext += "{{#ifexpr: {{{top|40}}} > " + str(n) +\
                    "|# " + l[n][1].aslink(noInterwiki=True) +\
                    " {{#if:{{{nohits|}}}||&nbsp;<small>('''" +\
                    l[n][0] + "''' hits last hour)</small>}}\n"
        
    wikitext += '}} ' * len(l)
    wikitext += '\n<noinclude>\nThese statistics are generated from [http://dammit.lt/wikistats/ Wikistats]. ' +\
                'They are based on number of visits to each page over the last hour. ' +\
                'These statistics include all visits, both by people and by automated computer programs. ' +\
                'Although these are probably reasonably accurate, they are easy to distort. ' +\
                'Please note that sometimes these statistics are updated on an irregular basis. ' +\
                'This page was generated at ~~~~~ for the time period ' +\
                hourstart + ':00-' + hourend + ':00 UTC.</noinclude>'

    popularpage.put(wikitext, comment=u'Updating Popular article list')    
    
finally:
    wikipedia.stopme()
    infile.close()