期末考唸書唸累了,於是寫了這個抓無名小站相簿的 bash script。只要把相簿第一頁網址輸入就可以自動把整個相簿抓下來。雖然我是沒有抓相簿的習慣,練習一下 scripting 也不錯,還有 wget 的用法。還有請不要把 SLEEP 設太小,以免增加主機負荷。
#!/bin/sh
# Usage:
# wretch-get 'http://www.wre....'
# Please don't set too short sleeping interval.
UserAgent="\"\""
URL="http://www.wretch.cc/album/"
ID="DisplayImage"
WGET="wget -U $UserAgent -q"
SLEEP="sleep 1"
tmp="/tmp/wretch-get.$$"
str=""
next=""
pic=""
ref=""
count=0
$WGET -U $UserAgent $@ -O $tmp
str=`grep $ID $tmp`
next=`echo $str | sed -e "s/.*href='\([a-zA-Z0-9\.\/\?=&:]*\)'.*/\1/"`
pic=`echo $str | sed -e "s/.*src='\([a-zA-Z0-9\.\/\?&=:]*\)'.*/\1/"`
ref="$@"
while [ -n "$str" ]; do
$WGET --referer $ref $pic
# settings for the next iteration
ref="$URL$next"
$WGET $URL$next -O $tmp
str=`grep $ID $tmp`
next=`echo $str |\
sed -e "s/.*href='\([a-zA-Z0-9\.\/\?=&:]*\)'.*/\1/"`
pic=`echo $str |\
sed -e "s/.*src='\([a-zA-Z0-9\.\/\?&=:]*\)'.*/\1/"`
# counting and sleep
count=`expr $count + 1`
$SLEEP
done
rm $tmp
echo "$count pics fetched. Some pics may have two sizes."
No comments:
Post a Comment