#!/usr/bin/perl
# hubble.pl:
# collect hubble wallpaper jpg's
# use@ownrisc
use strict;
use warnings;
use LWP::UserAgent;
use HTTP::Response;
### configure parameters below
my $proxy = $ENV{http_proxy} || "";
my $size = 8; # 0: 640x480 1: 800x600 2: 1024x768 3: 1280x1024 ...
### nothing to be changed below this line
# initialize variables
my %subdir = (
0 => "640",
1 => "800",
2 => "1024",
3 => "1280",
4 => "1152x864",
5 => "1280x768",
6 => "1280x800",
7 => "1280x960",
8 => "1440x960",
9 => "1680x1050",
10 => "1920x1200",
11 => "2048x1280",
12 => "2560x1024",
);
my $site = "http://hubblesite.org";
my $base = "http://hubblesite.org/gallery/wallpaper/";
# here we go:
( -d $subdir{$size} ) || mkdir $subdir{$size};
my $ua = ua_connect();
#for my $i (1 .. 6) {
# my $page = $base . "+$i";
my $page = $base;
get_page($ua, $page);
#}
# end.
# define subroutines
# get an html page and extract URLs pointing to jpg's
sub get_page {
my $ua = shift;
my $page = shift;
my $content = get_content($ua, $page);
my @content = split(/\n/, $content);
for (@content) {
if (/<a id="pr(\d{4})\d(\d{2})([a-z])"/) {
my $file = "http://imgsrc.hubblesite.org/hu/db/images/hs-$1-$2-$3-$subdir{$size}_wallpaper.jpg";
my $filename = "$subdir{$size}/pr$1$2$3.jpg";
if ( ! -f $filename ) {
print "Downloading: $filename\n";
my $pict = get_content($ua, $file);
open (FD, ">$filename");
print FD $pict;
close FD;
}
}
}
}
# get contents from a given URL
sub get_content {
my $ua = shift;
my $url = shift;
my $request = HTTP::Request->new(GET => $url);
$request->content_type('application/x-www-form-urlencoded');
$request->header(Accept => "text/xml,application/xml,application/xhtml+xml,text/html,*/*;q=0.1");
my $response = $ua->request($request);
if ($response->is_success) {
my $answer = $response->content;
} else {
print "Error: " . $response->status_line . "\n";
}
return $response->content;
}
# open new connection (create new user agent)
sub ua_connect {
my $agent = LWP::UserAgent->new;
$agent->agent("Mozilla/5.0");
$agent->proxy('http', $proxy);
return $agent;
}
# That's it.