Friday, January 19, 2007

Wretch Album Fetcher

期末考唸書唸累了,於是寫了這個抓無名小站相簿的 bash script。只要把相簿第一頁網址輸入就可以自動把整個相簿抓下來。雖然我是沒有抓相簿的習慣,練習一下 scripting 也不錯,還有 wget 的用法。還有請不要把 SLEEP 設太小,以免增加主機負荷。

#!/bin/sh
# Usage:
#   wretch-get 'http://www.wre....'
# Please don't set too short sleeping interval.

UserAgent="\"\""
URL="http://www.wretch.cc/album/"
ID="DisplayImage"
WGET="wget -U $UserAgent -q"
SLEEP="sleep 1"

tmp="/tmp/wretch-get.$$"
str=""
next=""
pic=""
ref=""
count=0

$WGET -U $UserAgent $@ -O $tmp
str=`grep $ID $tmp`
next=`echo $str | sed -e "s/.*href='\([a-zA-Z0-9\.\/\?=&:]*\)'.*/\1/"`
pic=`echo $str | sed -e "s/.*src='\([a-zA-Z0-9\.\/\?&=:]*\)'.*/\1/"`
ref="$@"

while [ -n "$str" ]; do
    $WGET --referer $ref $pic
    # settings for the next iteration
    ref="$URL$next"
    $WGET $URL$next -O $tmp
    str=`grep $ID $tmp`
    next=`echo $str |\
        sed -e "s/.*href='\([a-zA-Z0-9\.\/\?=&:]*\)'.*/\1/"`
    pic=`echo $str |\
        sed -e "s/.*src='\([a-zA-Z0-9\.\/\?&=:]*\)'.*/\1/"`

    # counting and sleep
    count=`expr $count + 1`
    $SLEEP
done

rm $tmp

echo "$count pics fetched. Some pics may have two sizes."

No comments: