You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
977 B
Perl
41 lines
977 B
Perl
#!/usr/bin/perl
|
|
#
|
|
# Check a .bib file for unretrievable URLs.
|
|
#
|
|
# Usage:
|
|
# bib-url-check.pl sampa-pubs.bib
|
|
# -or-
|
|
# < sampa-pubs.bib bib-url-check.pl > urlreport.txt
|
|
#
|
|
|
|
|
|
use strict;
|
|
use LWP::UserAgent;
|
|
|
|
# http://daringfireball.net/2010/07/improved_regex_for_matching_urls
|
|
my $urlrx = qr{(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))}o;
|
|
|
|
my $ua = LWP::UserAgent->new(
|
|
ssl_opts => { verify_hostname => 0 },
|
|
);
|
|
$ua->timeout(10);
|
|
$ua->env_proxy;
|
|
my $response;
|
|
|
|
my %done = {};
|
|
|
|
while (<>) {
|
|
if (m/$urlrx/g) {
|
|
next if $done{$1};
|
|
|
|
$response = $ua->get($1);
|
|
if ($response->is_success) {
|
|
print "OK (", $response->code, "): line $.: $1\n";
|
|
} else {
|
|
print "FAIL (", $response->code, "): line $.: $1\n";
|
|
}
|
|
$done{$1} = 1;
|
|
sleep 1;
|
|
}
|
|
}
|