#!/usr/bin/perl
use strict;
use warnings;
use Mojo::UserAgent;
use Mojo::URL;
use utf8;
use File::Basename;
binmode(STDIN, ':utf8');
binmode(STDOUT, ':utf8');
my ($fp_pics, $fp_extracted_links);
open $fp_pics, '<', "pics.txt";
binmode($fp_pics, ':utf8');
my @pics = <$fp_pics>;
chomp @pics;
close $fp_pics;
my $time = localtime;
push @pics, "$time";
my @extracted_links;
if (open $fp_extracted_links, '<', "extracted_links.txt") {
binmode($fp_extracted_links, ':utf8');
@extracted_links = <$fp_extracted_links>;
chomp @extracted_links;
close $fp_extracted_links;
} else {
@extracted_links = ();
}
##starting URL
my @urls = (
"http://www.xiuren.org/tag/%E8%AF%B1%E6%83%91.html",
"http://www.xiuren.org/tag/%E7%A7%81%E6%88%BF.html",
"http://www.xiuren.org/tag/%E6%80%A7%E6%84%9F.html",
"http://www.xiuren.org/tag/%E5%A4%A7%E6%B3%A2.html",
"http://www.xiuren.org/tag/%E7%BE%8E%E5%A5%B3.html",
"http://www.xiuren.org/tag/%E5%85%BB%E7%9C%BC.html",
"http://www.xiuren.org/tag/%E7%99%BD%E8%99%8E.html",
"http://www.xiuren.org/tag/%E5%86%99%E7%9C%9F.html",
"http://www.xiuren.org/tag/%E5%A4%A7%E8%83%B8.html",
"http://www.xiuren.org/tag/%E6%92%A9%E4%BA%BA.html",
"http://www.xiuren.org/tag/%E6%91%A9%E6%93%A6%E6%91%A9%E6%93%A6.html",
"http://www.xiuren.org/tag/%E6%9B%BC%E5%A6%99.html",
"http://www.xiuren.org/tag/%E7%9C%9F%E7%A9%BA.html",
"http://www.xiuren.org/tag/%E5%A4%A7%E5%A5%B6.html",
"http://www.xiuren.org/tag/%E5%86%85%E8%A1%A3.html",
"http://www.xiuren.org/tag/%E6%8C%BA%E6%8B%94.html",
"http://www.xiuren.org/tag/%E9%A5%B1%E6%BB%A1.html",
"http://www.xiuren.org/tag/%E7%BE%8E%E4%B9%B3.html",
"http://www.xiuren.org/tag/%E6%A1%83%E4%B9%B3.html",
"http://www.xiuren.org/tag/%E4%B9%8B%E5%90%8E%E5%B9%B2%E4%BA%86%E4%B8%AA%E7%88%BD.html",
"http://www.xiuren.org/tag/%E8%83%B4%E4%BD%93.html",
"http://www.xiuren.org/tag/%E8%95%BE%E4%B8%9D.html",
"http://www.xiuren.org/tag/%E5%B0%8F%E7%99%BD%E5%85%94.html",
"http://www.xiuren.org/tag/MyGirl.html",
"http://www.xiuren.org/category/toutiaogirls.html",
"http://www.xiuren.org/category/miitao.html",
"http://www.xiuren.org/category/youwu.html",
"http://www.xiuren.org/category/TuiGirl.html",
"http://www.xiuren.org/category/ugirls.html",
"http://www.xiuren.org/category/tgod.html",
"http://www.xiuren.org/category/bololi.html",
"http://www.xiuren.org/category/imiss.html",
"http://www.xiuren.org/category/mistar.html",
"http://www.xiuren.org/category/feilin.html",
"http://www.xiuren.org/category/mfstar.html",
"http://www.xiuren.org/category/vgirlmm.html",
"http://www.xiuren.org/category/rayshen.html",
"http://www.xiuren.org/category/uxing.html",
"http://www.xiuren.org/category/AISS.html",
"http://www.xiuren.org/category/ru1mm.html",
"http://www.xiuren.org/category/donggan.html",
"http://www.xiuren.org/category/XiuRen.html"
);
my $ua = Mojo::UserAgent->new;
my %visited;
while (@urls) {
my $url = shift @urls;
next if exists $visited{$url};
$visited{$url} = 1;
print "[$url]\n";
my $tx = $ua->get($url)->res;
if ($tx->dom('span=photoThum]')) {
$tx->dom('span[class=photoThum]')->each(sub{
my ($match_url) = ( m/<a href="([^"]*)" / );
if ($match_url) {
my ($site, $album, $file) = ($match_url =~ m/http:\/\/[^\/]*\/([^\/]*)\/([^\/]*)\/(.*)/);
my ($f, $p, $s) = fileparse($file);
if ($site && $album && $file && ! grep (/^$site\/$album\/$file/, @pics)) {
mkdir "pics"; mkdir "pics/$site"; mkdir "pics/$site/$album";
mkdir "pics/$site/$album/$p" if $p ne '.';
print "getting $site/$album/$file...";
my $ua_gif = Mojo::UserAgent->new(max_redirects => 5);
my $gif = $ua_gif->get($match_url);
$gif->res->content->asset->move_to("pics/$site/$album/$file");
push @pics, "$site/$album/$file";
print "done\n";
}
}
});
}
if ($tx->dom('div[class=content]')) {
$tx->dom('div[class=content]')->each(sub{
my ($match_url) = ( m/<a href="([^"]*)" / );
if ($match_url && !grep (m/$match_url/, @extracted_links)) {
push (@urls, $match_url);
push (@extracted_links, $match_url);
}
});
}
}
open $fp_pics, '>', "pics.txt";
binmode($fp_pics, ':utf8');
print $fp_pics join("\n", @pics), "\n";
close $fp_pics;
open $fp_extracted_links, '>', "extracted_links.txt";
binmode($fp_extracted_links, ':utf8');
print $fp_extracted_links join("\n", sort @extracted_links), "\n";
close $fp_extracted_links;
2017/10/16
2017/10/01
讓chrome 可以讀取 local file
首先,要知道的就是,啟動命令中加上 --allow-file-access-from-files
做法上,有幾個地方可以進行
1. 可以在 ubuntu 快捷圖示修改,檔案在
~/.local/share/applications/google-chrome.desktop
如果已經在的話直接修,否則參考以下內容:
[Desktop Entry]
Encoding=UTF-8
Version=1.0
Type=Application
Name=Welcome to Chrome - Google Chrome
Icon=google-chrome
Path=/home/wade
Exec=/opt/google/chrome/chrome --allow-file-access-from-files
StartupNotify=false
StartupWMClass=Google-chrome
OnlyShowIn=Unity;
X-UnityGenerated=true
2. 直接修改(如果你有權限的話) 啟動的 google-chrome script
通常是 /usr/bin/google-chrome
複雜的我們就不看了,只看最後一行,我的範例是:
exec -a "$0" "$HERE/chrome" --allow-cross-origin-auth-prompt --allow-file-access-from-files "$@"
3. 或者是修改安裝的目錄,例如 /opt/google/chrome/google-chrome
附註:事實上底下幾個位置是相同的(都是超連結)
/usr/bin/google-chrome
/etc/alternatives/google-chrome
/usr/bin/google-chrome-stable
/opt/google/chrome/google-chrome
最後一個才是普通檔,其他幾個最後都指向它
做法上,有幾個地方可以進行
1. 可以在 ubuntu 快捷圖示修改,檔案在
~/.local/share/applications/google-chrome.desktop
如果已經在的話直接修,否則參考以下內容:
[Desktop Entry]
Encoding=UTF-8
Version=1.0
Type=Application
Name=Welcome to Chrome - Google Chrome
Icon=google-chrome
Path=/home/wade
Exec=/opt/google/chrome/chrome --allow-file-access-from-files
StartupNotify=false
StartupWMClass=Google-chrome
OnlyShowIn=Unity;
X-UnityGenerated=true
2. 直接修改(如果你有權限的話) 啟動的 google-chrome script
通常是 /usr/bin/google-chrome
複雜的我們就不看了,只看最後一行,我的範例是:
exec -a "$0" "$HERE/chrome" --allow-cross-origin-auth-prompt --allow-file-access-from-files "$@"
3. 或者是修改安裝的目錄,例如 /opt/google/chrome/google-chrome
附註:事實上底下幾個位置是相同的(都是超連結)
/usr/bin/google-chrome
/etc/alternatives/google-chrome
/usr/bin/google-chrome-stable
/opt/google/chrome/google-chrome
最後一個才是普通檔,其他幾個最後都指向它
訂閱:
文章 (Atom)