1616require 'oai/client/metadata_format'
1717require 'oai/client/response'
1818require 'oai/client/header'
19- require 'oai/client/record'
19+ require 'oai/client/record'
2020require 'oai/client/identify'
2121require 'oai/client/get_record'
2222require 'oai/client/list_identifiers'
2727module OAI
2828
2929 # A OAI::Client provides a client api for issuing OAI-PMH verbs against
30- # a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you
30+ # a OAI-PMH server. The 6 OAI-PMH verbs translate directly to methods you
3131 # can call on a OAI::Client object. Verb arguments are passed as a hash:
3232 #
3333 # client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
@@ -36,9 +36,9 @@ module OAI
3636 # puts identifier
3737 # end
3838 #
39- # It is worth noting that the api uses methods and parameter names with
40- # underscores in them rather than studly caps. So above list_identifiers
41- # and metadata_prefix are used instead of the listIdentifiers and
39+ # It is worth noting that the api uses methods and parameter names with
40+ # underscores in them rather than studly caps. So above list_identifiers
41+ # and metadata_prefix are used instead of the listIdentifiers and
4242 # metadataPrefix used in the OAI-PMH specification.
4343 #
4444 # Also, the from and until arguments which specify dates should be passed
@@ -49,10 +49,10 @@ module OAI
4949 # the OAI-PMH docs at:
5050 #
5151 # http://www.openarchives.org/OAI/openarchivesprotocol.html
52-
52+
5353 class Client
5454
55- # The constructor which must be passed a valid base url for an oai
55+ # The constructor which must be passed a valid base url for an oai
5656 # service:
5757 #
5858 # client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
@@ -67,15 +67,15 @@ class Client
6767 # back XML::Node objects
6868 #
6969 # client = OAI::Client.new 'http://example.com', :parser => 'libxml'
70- #
71- # You can configure the Faraday HTTP client by providing an alternate
70+ #
71+ # You can configure the Faraday HTTP client by providing an alternate
7272 # Faraday instance:
7373 #
7474 # client = OAI::Client.new 'http://example.com', :http => Faraday.new { |c| }
7575 #
7676 # === HIGH PERFORMANCE
7777 #
78- # If you want to supercharge this api install libxml-ruby >= 0.3.8 and
78+ # If you want to supercharge this api install libxml-ruby >= 0.3.8 and
7979 # use the :parser option when you construct your OAI::Client.
8080 #
8181 def initialize ( base_url , options = { } )
@@ -94,7 +94,7 @@ def initialize(base_url, options={})
9494 require 'faraday_middleware'
9595 @http_client . use FaradayMiddleware ::FollowRedirects , :limit => count
9696 end
97-
97+
9898 # load appropriate parser
9999 case @parser
100100 when 'libxml'
@@ -113,33 +113,33 @@ def initialize(base_url, options={})
113113 end
114114
115115 # Equivalent to a Identify request. You'll get back a OAI::IdentifyResponse
116- # object which is essentially just a wrapper around a REXML::Document
117- # for the response. If you created your client using the libxml
116+ # object which is essentially just a wrapper around a REXML::Document
117+ # for the response. If you created your client using the libxml
118118 # parser then you will get an XML::Node object instead.
119-
119+
120120 def identify
121121 return OAI ::IdentifyResponse . new ( do_request ( 'Identify' ) )
122122 end
123123
124124 # Equivalent to a ListMetadataFormats request. A ListMetadataFormatsResponse
125- # object is returned to you.
126-
125+ # object is returned to you.
126+
127127 def list_metadata_formats ( opts = { } )
128128 return OAI ::ListMetadataFormatsResponse . new ( do_request ( 'ListMetadataFormats' , opts ) )
129129 end
130130
131131 # Equivalent to a ListIdentifiers request. Pass in :from, :until arguments
132- # as Date or DateTime objects as appropriate depending on the granularity
132+ # as Date or DateTime objects as appropriate depending on the granularity
133133 # supported by the server.
134-
134+
135135 def list_identifiers ( opts = { } )
136- return OAI ::ListIdentifiersResponse . new ( do_request ( 'ListIdentifiers' , opts ) )
136+ return OAI ::ListIdentifiersResponse . new ( do_request ( 'ListIdentifiers' , opts ) )
137137 end
138138
139- # Equivalent to a GetRecord request. You must supply an identifier
139+ # Equivalent to a GetRecord request. You must supply an identifier
140140 # argument. You should get back a OAI::GetRecordResponse object
141141 # which you can extract a OAI::Record object from.
142-
142+
143143 def get_record ( opts = { } )
144144 return OAI ::GetRecordResponse . new ( do_request ( 'GetRecord' , opts ) )
145145 end
@@ -150,47 +150,47 @@ def get_record(opts={})
150150 # for record in client.list_records
151151 # puts record.metadata
152152 # end
153-
153+
154154 def list_records ( opts = { } )
155155 return OAI ::ListRecordsResponse . new ( do_request ( 'ListRecords' , opts ) )
156156 end
157157
158158 # Equivalent to the ListSets request. A ListSetsResponse object
159- # will be returned which you can use for iterating through the
159+ # will be returned which you can use for iterating through the
160160 # OAI::Set objects
161161 #
162162 # for set in client.list_sets
163163 # puts set
164164 # end
165-
165+
166166 def list_sets ( opts = { } )
167167 return OAI ::ListSetsResponse . new ( do_request ( 'ListSets' , opts ) )
168168 end
169169
170- private
170+ private
171171
172172 def do_request ( verb , opts = nil )
173173 # fire off the request and return appropriate DOM object
174174 uri = build_uri ( verb , opts )
175175 xml = strip_invalid_utf_8_chars ( get ( uri ) )
176- if @parser == 'libxml'
176+ if @parser == 'libxml'
177177 # remove default namespace for oai-pmh since libxml
178- # isn't able to use our xpaths to get at them
178+ # isn't able to use our xpaths to get at them
179179 # if you know a way around thins please let me know
180180 xml = xml . gsub (
181- /xmlns=\" http:\/ \/ www.openarchives.org\/ OAI\/ .\. .\/ \" / , '' )
181+ /xmlns=\" http:\/ \/ www.openarchives.org\/ OAI\/ .\. .\/ \" / , '' )
182182 end
183183 return load_document ( xml )
184184 end
185-
185+
186186 def build_uri ( verb , opts )
187187 opts = validate_options ( verb , opts )
188188 uri = @base . clone
189189 uri . query = "verb=" << verb
190190 opts . each_pair { |k , v | uri . query << '&' << externalize ( k ) << '=' << encode ( v ) }
191191 uri
192192 end
193-
193+
194194 def encode ( value )
195195 return CGI . escape ( value ) unless value . respond_to? ( :strftime )
196196 if value . kind_of? ( DateTime )
@@ -229,28 +229,28 @@ def get(uri)
229229 def debug ( msg )
230230 $stderr. print ( "#{ msg } \n " ) if @debug
231231 end
232-
232+
233233 # Massage the standard OAI options to make them a bit more palatable.
234234 def validate_options ( verb , opts = { } )
235235 raise OAI ::VerbException . new unless Const ::VERBS . keys . include? ( verb )
236236
237237 return { } if opts . nil?
238238
239239 raise OAI ::ArgumentException . new unless opts . respond_to? ( :keys )
240-
240+
241241 realopts = { }
242242 # Internalize the hash
243243 opts . keys . each do |key |
244244 realopts [ key . to_s . gsub ( /([A-Z])/ , '_\1' ) . downcase . intern ] = opts . delete ( key )
245245 end
246-
246+
247247 return realopts if is_resumption? ( realopts )
248-
248+
249249 # add in a default metadataPrefix if none exists
250250 if ( Const ::VERBS [ verb ] . include? ( :metadata_prefix ) )
251251 realopts [ :metadata_prefix ] ||= 'oai_dc'
252252 end
253-
253+
254254 # Convert date formated strings in dates.
255255 #realopts[:from] = parse_date(realopts[:from]) if realopts[:from]
256256 #realopts[:until] = parse_date(realopts[:until]) if realopts[:until]
@@ -261,43 +261,43 @@ def validate_options(verb, opts = {})
261261 end
262262 realopts
263263 end
264-
264+
265265 def is_resumption? ( opts )
266- if opts . keys . include? ( :resumption_token )
266+ if opts . keys . include? ( :resumption_token )
267267 return true if 1 == opts . keys . size
268268 raise OAI ::ArgumentException . new
269269 end
270270 end
271-
271+
272272 # Convert our internal representations back into standard OAI options
273273 def externalize ( value )
274274 value . to_s . gsub ( /_[a-z]/ ) { |m | m . sub ( "_" , '' ) . capitalize }
275275 end
276-
276+
277277 def parse_date ( value )
278278 return value if value . respond_to? ( :strftime )
279-
279+
280280 Date . parse ( value ) # This will raise an exception for badly formatted dates
281281 Time . parse ( value ) . utc # Sadly, this will not
282282 rescue
283- raise OAI ::ArgumentError . new
283+ raise OAI ::ArgumentError . new
284284 end
285-
285+
286286 # Strip out invalid UTF-8 characters. Regex from the W3C, inverted.
287287 # http://www.w3.org/International/questions/qa-forms-utf-8.en.php
288288 #
289- # Regex is from WebCollab:
289+ # Regex is from WebCollab:
290290 # http://webcollab.sourceforge.net/unicode.html
291291 def strip_invalid_utf_8_chars ( xml )
292- simple_bytes = xml . gsub ( /[\x00 -\x08 \x10 \x0B \x0C \x0E -\x19 \x7F ]
292+ xml && xml . gsub ( /[\x00 -\x08 \x10 \x0B \x0C \x0E -\x19 \x7F ]
293293 | [\x00 -\x7F ][\x80 -\xBF ]+
294294 | ([\xC0 \xC1 ]|[\xF0 -\xFF ])[\x80 -\xBF ]*
295295 | [\xC2 -\xDF ]((?![\x80 -\xBF ])|[\x80 -\xBF ]{2,})
296296 | [\xE0 -\xEF ](([\x80 -\xBF ](?![\x80 -\xBF ]))
297- | (?![\x80 -\xBF ]{2})|[\x80 -\xBF ]{3,})/x , '?' )
298- simple_bytes . gsub ( /\xE0 [\x80 -\x9F ][\x80 -\xBF ]
297+ | (?![\x80 -\xBF ]{2})|[\x80 -\xBF ]{3,})/x , '?' ) \
298+ . gsub ( /\xE0 [\x80 -\x9F ][\x80 -\xBF ]
299299 | \xED [\xA0 -\xBF ][\x80 -\xBF ]/ , '?' )
300300 end
301-
301+
302302 end
303303end
0 commit comments