User:SJK/yrget perl script
Appearance
< User:SJK
#!/usr/local/bin/perl -w use strict; use LWP::UserAgent; my $ua = LWP::UserAgent->new; # Set attributes on UA $ua->timeout(30); $ua->agent("SJK downloading Year in Review: " . $ua->agent); $ua->env_proxy (); # fetch each article open ENTRIES, "<ENTRIES" or die "can't open ENTRIES file: $!"; while (<ENTRIES>) { chomp; my $url = "http://www.wikipedia.com/wiki.cgi?action=history&id=$_"; print "Getting $url... "; my $request = HTTP::Request->new('GET', $url); my $response = $ua->request($request); if ($response->is_error) { die $response->status_line . " "; } my $maxrev = 0; my @lines = split(/ /,$response->content); for my $line (@lines) { if ($line =~ [[Simon_J_Kissane/Revision|/Revision]] ([0-9]*): <a href=/so) { if ($1 > $maxrev) { $maxrev = $1; }; } } $url = "http://www.wikipedia.com/wiki.cgi?action=edit&revision=$maxrev&id=$_"; print "Getting $url... "; $request = HTTP::Request->new('GET', $url); $response = $ua->request($request); if ($response->is_error) { die $response->status_line . " "; } open DATA, ">data/$_" or die "Can't open data/$_: $!"; if ($response->content =~ /<textarea[^>]*>(.*)</textarea>/s) { my $out = "#YEAR [[$_]] REV=$maxrev " . $1; $out =~ s/'/'/g; print DATA $out; } close DATA; }
- See also : Simon J Kissane