You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

41 lines
977 B
Perl

#!/usr/bin/perl
#
# Check a .bib file for unretrievable URLs.
#
# Usage:
# bib-url-check.pl sampa-pubs.bib
# -or-
# < sampa-pubs.bib bib-url-check.pl > urlreport.txt
#
use strict;
use LWP::UserAgent;
# http://daringfireball.net/2010/07/improved_regex_for_matching_urls
my $urlrx = qr{(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))}o;
my $ua = LWP::UserAgent->new(
ssl_opts => { verify_hostname => 0 },
);
$ua->timeout(10);
$ua->env_proxy;
my $response;
my %done = {};
while (<>) {
if (m/$urlrx/g) {
next if $done{$1};
$response = $ua->get($1);
if ($response->is_success) {
print "OK (", $response->code, "): line $.: $1\n";
} else {
print "FAIL (", $response->code, "): line $.: $1\n";
}
$done{$1} = 1;
sleep 1;
}
}