マンガサイトを調査 実践。( Ruby )
有効な URL と 有効ではない URL を調査し、データベースに記録しよう。
Ruby 言語を使って https リクエストして調査してみましょう。 有効なURLとは漫画の画像があるページのことになります。すなわち、マンガのサイトにある全てのコンテンツの数を全部数えてみようということになります。 対象は漫画Bankです。
Python 言語の場合はこっちにあります。
ポリシーについてはこちら
- レポート この日本語の翻訳ニュースの配信の約1時間後、漫画bankはサイトを閉鎖。 本稿以降のプログラムは対象のサイトが消失。
curl -is "https://mangabank.org/watch/?tour=/vol" | grep location
ターミナルコマンドを実行してリダイレクトされるアドレスを表示するには上記のようになりま。
require 'net/http' require 'uri' require 'sqlite3' SQL =<<EOS create table url ( id INTEGER PRIMARY KEY, url text ); EOS ## ---------------------------------- # _INVALID DB ===> DB1 db1 = SQLite3::Database.open("mb_urls_INVALID.db") tb1 = db1.execute("SELECT COUNT(*) FROM sqlite_master WHERE TYPE='table' AND NAME='url';") if tb1[0][0] == 0 then db1.execute(SQL) last_id1 = 0 else last1 = db1.execute("SELECT id FROM url ORDER BY id DESC LIMIT 1;") puts "invalid" pp last1[0][0] puts " ---------------------------------- " last_id1 = last1[0][0] end # ---------------------------------- # _VALID DB ===> DB db = SQLite3::Database.open("mb_urls_VALID.db") tb = db.execute("SELECT COUNT(*) FROM sqlite_master WHERE TYPE='table' AND NAME='url';") if tb[0][0] == 0 then db.execute(SQL) last_id = 0 else last = db.execute("SELECT id FROM url ORDER BY id DESC LIMIT 1;") puts "valid" pp last[0][0] puts " ---------------------------------- " last_id = last[0][0] end count = last_id count1 = last_id1 # ---------------------------------- target_url ="https://mangabank.org/watch/?tour=/vol/" #uri = URI.parse(target_url) # extract redirect URL def get_redirect_url(uri) redirect = Net::HTTP.get_response(uri)['location'] return redirect end # ---------------------------------- 50000.times do |repeat| uri = URI.parse(target_url) redirect_url = get_redirect_url(uri) puts redirect_url address = redirect_url s_number = address.split('/')[-1] puts s_number number = s_number.to_i def dbr(address,number,db1,count1,db,count,second) gate = false again = false 11.times do |x| puts " ---------------------------------- " puts x ###### <1 check invalid db if x == 0 then puts " ---------------------------------- " flag1 = nil flag1 = db1.execute("SELECT id FROM url WHERE url=\"#{address}\" ;") if !flag1.empty? then puts "flag 1:INVALID DB id=#{flag1[0][0]} #{address}" db1.execute("UPDATE url SET url='null' WHERE id=#{flag1[0][0]} ;") puts "is VALID / remove from INVALID DB" gate = true end if second != true then number = number - 10 address = "https://mangabank.org/#{number}/" end end ###### 1> if x != 0 then number = number + 1 address = "https://mangabank.org/#{number}/" puts " ---------------------------------- " puts address ###### <2 flag2 = nil flag2 = db1.execute("SELECT id FROM url WHERE url=\"#{address}\" ;") if !flag2.empty? then puts "flag 2:INVALID DB id=#{flag2[0][0]} #{address}" puts "#{number} + INVALID" if gate != true then next end end ###### 2> puts " ---------------------------------- " uri = URI.parse(address) res = Net::HTTP.get_response(uri) puts "#{address} responce code: #{res.code}" if res.code != '200' then count1 += 1 puts "insert INVALID DB id: #{count1} -> #{address}" db1.execute("INSERT INTO url (id,url) VALUES (?,?)",count1,address ) # db1.commit if second == true then break end next elsif !flag2.empty? and res.code == '200' then puts "flag 2:INVALID DB id=#{flag2[0][0]} + #{address}" db1.execute("UPDATE url SET url='null' WHERE id=#{flag2[0][0]} ;") puts "is VALID / remove from INVALID DB" end end flag = nil flag = db.execute("select id from url where url=\"#{address}\" ;") if !flag.empty? then puts "flag 0: id=#{flag[0][0]} #{address}" puts "already exsist in DB" puts break else if x == 0 then uri = URI.parse(address) res = Net::HTTP.get_response(uri) puts "#{address} responce code: #{res.code}" end if res.code == '200' then count += 1 puts "insert VALID DB id: #{count} -> #{address}" db.execute("INSERT INTO url (id,url) VALUES (?,?)",count,address ) # db.commit again = true next elsif res.code != '200' then count1 += 1 puts "insert NOT-VALID-DB id: #{count1} -> #{address}" db1.execute("INSERT INTO url (id,url) VALUES (?,?)",count1,address ) # db1.commit end end end return count1,count,address,again end second = false count1,count,address,again = dbr(address,number,db1,count1,db,count,second) puts limit = 5 while again == true do puts " ---------------------------------- " puts " " + limit.to_s number = number + 1 address = "https://mangabank.org/#{number}/" puts address uri = URI.parse(address) res = Net::HTTP.get_response(uri) puts "#{address} responce code: #{res.code}" if res.code == '200' then second = true count1,count,address,again = dbr(address,number,db1,count1,db,count,second) limit = 5 else limit -= 1 puts " Retry #{limit}" ###### <3 flag1 = nil flag1 = db1.execute("SELECT id FROM url WHERE url=\"#{address}\" ;") if !flag1.empty? then puts "flag 3: id=#{flag1[0][0]} #{address}" puts "#{number} INVALID" if limit < 1 then again = false end next end ###### 3> count1 += 1 puts "insert INVALID DB id: #{count1} -> #{address}" db1.execute("INSERT INTO url (id,url) VALUES (?,?)",count1,address ) ## db1.commit if limit < 1 then again = false end end end end db.close db1.close
2つのデータベースをマージするためのもの。Ruby
##---- Lua ##local sqlite3 = require("lsqlite3") ## ##--[[local db1 = sqlite3.open('lua_mb_urls_not.db') -- Lua ##local db2 = sqlite3.open('mb_urls_not.db') -- other ## ##local newdb = sqlite3.open('mb_url_notadditive.db') -- for working space ##local newdb1 = sqlite3.open('lua_mb_url_not1.db') -- additive & order by url ##]] ##---- Lua ####----> Ruby > ##db1 = SQLite3::Database.open "lua_mb_urls_not.db" # Lua ##db2 = SQLite3::Database.open "mb_urls_not.db" # made from python code ##newdb = SQLite3::Database.open "mb_url_notadditive.db" # additive work space ##newdb1 = SQLite3::Database.open "mb_url_not1.db" # additive & order by url ####----< Ruby < ##---- Lua ##local db1 = sqlite3.open('lua_mb_urls_new.db') -- Lua ##local db2 = sqlite3.open('mb_urls_new.db') -- other ## ##local newdb = sqlite3.open('mb_url_additive.db') -- for working space ##local newdb1 = sqlite3.open('lua_mb_url_new1.db') ## ##newdb:exec[[ ## CREATE TABLE IF NOT EXIST url (id INTEGER PRIMARY KEY, url text); ##]] ## ##newdb1:exec[[ ## CREATE TABLE IF NOT EXIST url (id INTEGER PRIMARY KEY, url text); ##]] ##---- Lua ##----> Ruby > require 'sqlite3' db1 = SQLite3::Database.open "lua_mb_urls_new.db" # Lua db2 = SQLite3::Database.open "mb_urls_new.db" # made from python code newdb = SQLite3::Database.open "mb_url_additive.db" # additive work space newdb1 = SQLite3::Database.open "mb_url_new1.db" # additive & order by url SQL =<<EOS create table IF NOT EXISTS url( id INTEGER PRIMARY KEY, url text ); EOS newdb.execute(SQL) newdb1.execute(SQL) ##----< Ruby < ##---- Lua ##local smt1 = "SELECT id FROM url ORDER BY id DESC LIMIT 1 ;" ## ##local last1 = 0 ##local last2 = 0 ## ##for id in db1:urows(smt1) do ## last1 = id ##end ## ##for id in db2:urows(smt1) do ## last2 = id ##end ## ##print("db1 has "..last1.." URLs") ##print("db2 has "..last2.." URLs") ##---- Lua ##----> Ruby > last1 = db1.execute("select id from url order by id desc limit 1") last2 = db2.execute("select id from url order by id desc limit 1") k = last1[0].pop # offset l = last2[0].pop puts "db1 has #{k} URLs" puts "db2 has #{l} URLs" i = 0 newdb_last = newdb.execute("select id from url order by id desc limit 1") if !newdb_last.empty? then newcount = newdb_last[0].pop # offset else newcount = 0 end ##----< Ruby < ##---- Lua ##local newcount = 0 ##for last_id in newdb:urows(smt1) do ## newcount = last_id ##end ## ##local i = 0 ## ##for url_data in db2:urows("SELECT url FROM url ;") do ##--[[ ##for url_data in db2:urows("SELECT url FROM url ORDER BY url ;") do ##]] ## i = i + 1 ## print("copy from db2 ") ## print(i) ## print("total "..last2) ## ## local existflag = 0 ## ## for exist in newdb:urows("SELECT id FROM url WHERE url=\""..url_data.."\" ;") do ## existflag = exist ## print(url_data.." is aleady exist in DB / skip") ## end ## ## if existflag == 0 then -- record ## newcount = newcount + 1 ## print() ## print("newdb : "..newcount.." "..url_data) ## local stmt2 = newdb:prepare[[ INSERT INTO url VALUES (:id, :url) ]] ## stmt2:bind_names{ id = newcount, url = url_data } ## stmt2:step() ## stmt2:reset() ## stmt2:finalize() ## end ##end ## ##db2:close() ## ##i = 0 -- reset ## ##for url_data in db1:urows("SELECT url FROM url ;") do ##--[[ ##for url_data in db1:urows("SELECT url FROM url ORDER BY url ;") do ##]] ## i = i + 1 ## print("copy from db1 ") ## print(i) ## print("total "..last1) ## ## local existflag = 0 ## ## for exist in newdb:urows("SELECT id FROM url WHERE url=\""..url_data.."\" ;") do ## existflag = exist ## print(url_data.." is aleady exist in DB / skip") ## end ## ## if existflag == 0 then -- record ## newcount = newcount + 1 ## print() ## print("newdb : "..newcount.." "..url_data) ## local stmt2 = newdb:prepare[[ INSERT INTO url VALUES (:id, :url) ]] ## stmt2:bind_names{ id = newcount, url = url_data } ## stmt2:step() ## stmt2:reset() ## stmt2:finalize() ## end ##end ## ##db1:close() ##---- Lua ##----> Ruby > l.times do |index| i += 1 res = db2.execute("select url from url where id='#{i}' ;") if res.empty? then puts "no data / skip" next end x = res[0][0] # url puts "copy from db2" p i,l res2 = newdb.execute("select url from url where url='#{x}' ;") if !res2.empty? then p x puts 'already exist in db / skip' next end puts newcount += 1 url = x #url.gsub!(/\'/,"\'\'") puts "#{newcount} #{url}" newdb.execute("insert into url (id, url) values( '#{newcount}','#{url}') ;") end db2.close i = 0 k.times do |index| i += 1 res = db1.execute("select url from url where id='#{i}' ;") if res.empty? then puts "no data / skip" next end x = res[0][0] # url f = newdb.execute("select url from url where url='#{x}' ;") puts "copy from db1" p i,k if !f.empty? then p res[0] puts 'already exist in db / skip' next else newcount += 1 url = x #url.gsub!(/\'/,"\'\'") puts "#{newcount} #{url}" newdb.execute("insert into url (id, url ) values( '#{newcount}','#{url}') ;") end end db1.close ##----< Ruby < ##---- Lua ##print("new DB last id : "..newcount) ##---- Lua ##----> Ruby > puts "new DB last id:#{newcount}" ##----< Ruby < ##---- Lua ##local newdb1_last = 0 ##for last_id in newdb1:urows(smt1) do ## newdb1_last = last_id ##end ## ##local counter = newdb1_last -- first time ==> 0 ##---- Lua ##----> Ruby > newdb1_last = newdb1.execute("select id from url order by id desc limit 1") if !newdb1_last.empty? then counter = newdb1_last[0].pop # offset else counter = 0 end ##----< Ruby < ##---- Lua ##i = 0 -- reset ##---- Lua ##----> Ruby > i = 0 ##----< Ruby < ##---- Lua ##for url_data in newdb:urows("SELECT url FROM url ORDER BY url ;") do ## i = i + 1 ## print("copy from newdb ") ## print(i) ## print("total "..newcount) ## ## local existflag = 0 ## ## for exist in newdb1:urows("SELECT id FROM url WHERE url=\""..url_data.."\" ;") do ## existflag = exist ## print(url_data.." is aleady exist in DB / skip") ## end ## ## if existflag == 0 then -- record ## counter = counter + 1 ## print() ## print("newdb1 : "..counter.." "..url_data) ## local stmt2 = newdb1:prepare[[ INSERT INTO url VALUES (:id, :url) ]] ## stmt2:bind_names{ id = counter, url = url_data } ## stmt2:step() ## stmt2:reset() ## stmt2:finalize() ## end ##end ## ##--[[ ##newdb1:close() ##newdb:close() ##]] -- end -- ##---- Lua ##----> Ruby > newdb.execute("select url from url order by url ;") do |row| i += 1 url = row[0] #url.gsub!(/\'/,"\'\'") f = newdb1.execute("select url from url where url='#{url}' ;") puts "copy from newdb" p i,newcount if !f.empty? then p url puts 'already exist in db / skip' next else counter = counter + 1 puts "#{counter} #{url}" newdb1.execute("insert into url (id, url ) values( '#{counter}','#{url}') ;") end end newdb1.close newdb.close ##----< Ruby <