User:The wubbot/source code
~/wn_stats2/make_stats.sh
edit#!/bin/bash
#downloads statistics
#figures out what is relevent to wikinews
cd ~/wn_stats2/
check_if_there () {
#takes a date format string that equals url of stats, and a relative date/time.
#checks http status code
#returns 0 for 200, 1 for 404 or redirect, and exits shell script for anything else
if ( HEAD -H 'From: thewub.wiki@googlemail.com' $(date -d "$2" -u "$1") -S -d | egrep ' 301 | 404 ' --quiet )
then
# got redirected or 404'd, file not there. try the next
return 1
else
# yay, file! (we think)
return 0
fi
}
get_and_make() {
#takes a date format string that equals url of stats, and a relative date/time
# Add times to the tempfile for the Python script to use
date -u -d '1 hour ago' +%H > temp.txt # HOURSTART
date -u +%H >> temp.txt # HOUREND
# Get wikistats file, grep en.wikinews lines, rm some wrong namespace and interwikis
# Sort by hits, take top 40 (should be plenty) and append to tempfile
wget `date -d "$2" -u "$1"` -q --header\='From: thewub.wiki_AT_googlemail.com' -O - \
|zgrep '^en\.n' \
|awk '-F ' '{if ($2 !~ /(^Main_Page)|(^Talk:)|(^User:)|(^User_talk:)|(^Wikinews:)|(^Wikinews_talk:)|(^Template:)|(^Template talk:)|(^Portal:)|(^Portal talk:)|(^Category:)|(^Category talk:)|(^File:)|(^File talk:)|(^Special:)|(^..:)|(^w:)|(^Http:)/) print $3, $2}' \
|sort -g -r \
|head -n 40 \
>> temp.txt
# Pass baton to the Python script, which narrows down to published articles and updates the page
python wn_stats.py
}
# try each of these until we get one
if check_if_there '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0000.gz' now
then
get_and_make '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0000.gz' now
elif check_if_there '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0001.gz' now # sometimes files are a minute late
then
get_and_make '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0001.gz' now
elif check_if_there '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0000.gz' '1 hour ago'
then
get_and_make '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0000.gz' '1 hour ago'
elif check_if_there '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0001.gz' '1 hour ago'
then
get_and_make '+http://dammit.lt/wikistats/pagecounts-%Y%m%d-%H0001.gz' '1 hour ago'
else # none of them worked :(
exit 2
fi
~/wn_stats2/wn_stats.py
edit# -*- coding: utf-8 -*-
import codecs
import sys
import os
sys.path.append('/home/the_wub/pywikipedia')
os.chdir('/home/the_wub/wn_stats2')
import wikipedia
site = wikipedia.getSite('en', 'wikinews')
popularpage = wikipedia.Page(site, 'Template:Popular articles')
infile = codecs.open('temp.txt', 'r', encoding='utf-8')
num = 15 # number of top results to get
try:
# get the times from start of file (and cut off newlines)
hourstart = infile.readline()[:2]
hourend = infile.readline()[:2]
# trim to list of published pages in form [hits, page]
l = []
while len(l) < num:
x = infile.readline().split()
page = wikipedia.Page(site, x[1])
if page.exists() and (page.isRedirectPage() == False):
if ('Publish' in page.templates() or 'publish' in page.templates() or
'Published' in page.templates() or 'published' in page.templates()):
l.append([x[0], page])
# prepare wikitext
wikitext = '<noinclude>{{/top|' + hourstart + '}}</noinclude>\n'
for n in range(len(l)):
wikitext += "{{#ifexpr: {{{top|40}}} > " + str(n) +\
"|# " + l[n][1].aslink(noInterwiki=True) +\
" {{#if:{{{nohits|}}}|| <small>('''" +\
l[n][0] + "''' hits last hour)</small>}}\n"
wikitext += '}} ' * len(l)
wikitext += '\n<noinclude>\nThese statistics are generated from [http://dammit.lt/wikistats/ Wikistats]. ' +\
'They are based on number of visits to each page over the last hour. ' +\
'These statistics include all visits, both by people and by automated computer programs. ' +\
'Although these are probably reasonably accurate, they are easy to distort. ' +\
'Please note that sometimes these statistics are updated on an irregular basis. ' +\
'This page was generated at ~~~~~ for the time period ' +\
hourstart + ':00-' + hourend + ':00 UTC.</noinclude>'
popularpage.put(wikitext, comment=u'Updating Popular article list')
finally:
wikipedia.stopme()
infile.close()