CiNiiのAPIを叩くコードのめも

  • CiNii APIを使うためのコード
  • 結局東大図書館について詳しい情報が取得できないことがわかったのでつかわなかった

# -*- coding: utf-8 -*-
require "open-uri"
require "rubygems"
require "pp"
require "nokogiri"
include Math

class Booklist < Array
def initialize()
super
end
end

class Book
attr_reader :book_data
attr_accessor :library
def initialize(*init)
default = {
:title => nil,
:author => [],
:date => nil,
}
@book_data = default
if init[0].is_a?(Hash)
@book_data.merge!(init[0])
end
@library=[]
@book_data
end
end

# library for analyzing CiNii book repository
module CiNii
Chunk = 100
SLEEP_TIME = 0.2
BASE_URL ="http://ci.nii.ac.jp/books/opensearch/search?"
TODAI = "FA001798 OR FA001823 OR FA011747 OR FA011780 OR FA022153 OR FA001787 OR FA011769 OR FA011929 OR FA013990 OR FA001845 OR FA011791 OR FA011849 OR FA01185X OR FA01193X OR FA011951 OR FA011962 OR FA011984 OR FA012003 OR FA012025 OR FA012080 OR FA012091 OR FA022142 OR FA011758 OR FA01177X OR FA011973 OR FA011995 OR FA012069 OR FA020395 OR FA001801"
DEFAULT_PARAM={
"appid"=>"yourappkey",
"type"=>"1",
"format"=>"html",
"sortorder"=>"2",
"count"=>"15",
"lang"=>"jpn",
"fano"=>TODAI,
"year_from"=>"2011"
}
# fetching 15 books(default) from cinii search result
# fail when more than 15
def get_onepage_namelist(param)
namelist = []
doc = nil
open(BASE_URL+rpconv(DEFAULT_PARAM.merge(param))){|f|
doc = Nokogiri::XML(f)
}
#html body form ul li div div
doc.search('a[@class="taggedlink"]').each do |e|
namelist << e["href"].split("/")[2]
end
sleep SLEEP_TIME
namelist
end

# return booklist contains num books as a list
def get_namelist(num)
namelist=[]
max = num/15
for i in 1..max+1
namelist=namelist+get_onepage_namelist({"p"=>"#{i}"})
puts "#{100.0*i/(max+1)}% success in #{num} files(making list)"
end
namelist.take(num)
end

# collect num of rfd data files
def collect_rdf(num,file_path)
namelist=get_namelist(num)
max = (namelist.size-1 )/ Chunk
for i in 0..max
bl=Booklist.new()
list = []
if i==max
list = namelist[i*Chunk..namelist.size-1]
else
list = namelist[i*Chunk..(i+1)*Chunk-1]
end
list.each do |ncid|
bl << get_rdf(ncid)
end
p bl
puts "#{100.0*(i+1)/(max+1)} % success in #{num} files"
open(file_path+"/"+i.to_s+".dat","w"){|f| Marshal.dump(bl,f)}
end
end

# get rfd data file
# return as a Book data structure
def get_rdf(ncid)
open("http://ci.nii.ac.jp/ncid/"+ncid+".rdf") do |f|
doc=Nokogiri::XML(f)
# scraping book_info
title =doc.xpath("rdf:RDF/rdf:Description/dc:title").children.text
author = []
doc.xpath("rdf:RDF/rdf:Description/foaf:maker").each do |a|
author = author << a.xpath("foaf:Person/foaf:name").children.first.text
end
date = doc.xpath("rdf:RDF/rdf:Description/dc:date").children.text
book_info = {
:title =>title,
:author => author,
:date => date
}
book = Book.new(book_info)
library = []
doc.xpath("rdf:RDF/rdf:Description/bibo:owner").each do |e|
library << e.xpath("foaf:Organization/foaf:name").text
end
book.library=library
return book
end
end

# acess randamly according to isbn code
def rand_access(times)
times.times do
begin
n = rand_isbn
puts n
search({"rft.isbn"=>n})
sleep SLEEP_TIME
rescue
puts "fail"
sleep SLEEP_TIME
retry
end
end
end

# convert requestparameter from Hash to String
def rpconv(hash)
request_parameter=""
i=0
hash.each_key do |k|
request_parameter << "&" unless i==0
request_parameter << "#{URI.encode(k)}=#{URI.encode(hash[k])}"
i=i+1
end
request_parameter
end

def todai_library()
list = search_library({
"count"=>"40",
"name"=>"東京大学"})
list
end

def search_library(*option)
url = "http://ci.nii.ac.jp/books/opensearch/library?"
param = {
"appid"=>"8B7Ols7k63ktXMiqxz2z"
}
if option[0].is_a?(Hash)
param.merge!(option[0])
end
puts url+rpconv(param)
list = {}
open(url+rpconv(param)) do |f|
doc = Nokogiri::XML(f)
p doc
doc.css("entry").each do |e|
list[e.css("id").text.split("/")[4]]=e.css("title").text
end
end
list
end

def todai_research
lib_list = todai_library
collect_rdf
end

######
## garbage
# creating rand cnid
def rand_cnid
num=rand(99999999)
num_s = "%08d" % num
"BB"+ num_s
end

#creating rand isbn
def rand_isbn
num=rand(999999999)
num_s = "%9d" % num
"4"+num_s
end
end