Module: ASpaceImport::XML::SAX

Included in:
EADConverter
Defined in:
backend/app/converters/lib/xml_sax.rb

Defined Under Namespace

Modules: ClassMethods

Class Method Summary (collapse)

Instance Method Summary (collapse)

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

- (Object) method_missing(*args)



59
60
# File 'backend/app/converters/lib/xml_sax.rb', line 59

def method_missing(*args)
end

Class Method Details

+ (Object) included(base)



54
55
56
# File 'backend/app/converters/lib/xml_sax.rb', line 54

def self.included(base)
  base.extend(ClassMethods)
end

Instance Method Details

- (Object) ancestor(*types)



302
303
304
305
306
307
# File 'backend/app/converters/lib/xml_sax.rb', line 302

def ancestor(*types)
  queue_offset = (@context_nodes.has_key?(@node_name) && @context_nodes[@node_name][@node_depth]) ? -2 : -1

  obj = @batch.working_area[0..queue_offset].reverse.find { |o| types.map {|t| t.to_s }.include?(o.class.record_type)}
  block_given? ? yield(obj) : obj
end

- (Object) append(obj = context_obj, property, value)



233
234
235
236
237
238
239
240
241
242
# File 'backend/app/converters/lib/xml_sax.rb', line 233

def append(obj = context_obj, property, value)
  property_type = ASpaceImport::Utils.get_property_type(obj.class.schema['properties'][property.to_s])
  return unless property_type[0].match(/string/) && value.is_a?(String)
  filtered_value = ASpaceImport::Utils.value_filter(property_type[0]).call(value)
  if obj.send(property)
    obj.send(property).send(:<<, filtered_value)
  else
    obj.send("#{property}=", filtered_value)
  end
end

- (Object) att(attribute)



310
311
312
313
314
315
316
317
# File 'backend/app/converters/lib/xml_sax.rb', line 310

def att(attribute)
  att_pair = @node.attributes.find {|a| a[0] == attribute}
  if att_pair.nil?
    nil
  else
    att_pair[1]
  end
end

- (Object) close_context(type)



207
208
209
210
211
212
213
214
215
216
# File 'backend/app/converters/lib/xml_sax.rb', line 207

def close_context(type)
  if @batch.working_area.last.jsonmodel_type != type.to_s
    Log.debug(@batch.working_area.last.inspect)
    raise "Unexpected Object Type in Queue: Expected #{type} got #{@batch.working_area.last.jsonmodel_type}"
  end

  @proxies.discharge_proxy("#{@batch.working_area.last.jsonmodel_type}-#{@contexts.length}", @batch.working_area.last)
  @contexts.pop
  @batch.flush_last
end

- (Object) context



320
321
322
# File 'backend/app/converters/lib/xml_sax.rb', line 320

def context
  @contexts.last
end

- (Object) context_obj



330
331
332
# File 'backend/app/converters/lib/xml_sax.rb', line 330

def context_obj
  @batch.working_area.last
end

- (Object) full_context



325
326
327
# File 'backend/app/converters/lib/xml_sax.rb', line 325

def full_context
  @contexts
end

- (Object) handle_closer(node)



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'backend/app/converters/lib/xml_sax.rb', line 168

def handle_closer(node)
  @node_shadow = nil
  @empty_node = false
  node_info = node.is_a?(Array) ? node : [node.local_name, node.depth]
    
  if self.respond_to?("_closing_#{@node_name}")
    self.send("_closing_#{@node_name}", node)
  end

  if @context_nodes[node_info[0]] && @context_nodes[node_info[0]][node_info[1]]
    @context_nodes[node_info[0]][node_info[1]].reverse.each do |type|
      close_context(type)
    end
    @context_nodes[node_info[0]].delete_at(node_info[1])
  end
  @stickies.pop if @stickies.last == node_info[0]
end

- (Object) handle_opener(node, empty_node)



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'backend/app/converters/lib/xml_sax.rb', line 139

def handle_opener(node, empty_node)
  @node_name = node.local_name
  @node_depth = node.depth
  @node_shadow = [node.local_name, node.depth]
  
  @node = node
  @empty_node = empty_node 


  # constrained handlers, e.g. publication/date
  @stickies.each_with_index do |prefix, i|
    self.send("_#{@stickies[i..@stickies.length].join('_')}_#{@node_name}", node)
  end

  # unconstrained handlers, e.g., date
  self.send("_#{@node_name}", node)

  # config calls for constrained handlers on this path
  make_sticky(@node_name) if self.class.make_sticky?(@node_name)

  @node = nil
end

- (Object) handle_text(node)



163
164
165
# File 'backend/app/converters/lib/xml_sax.rb', line 163

def handle_text(node)
  @proxies.discharge_proxy(:text, node.value)
end

- (Object) inner_xml



219
220
221
# File 'backend/app/converters/lib/xml_sax.rb', line 219

def inner_xml
  @node.inner_xml.gsub("&","&amp;").strip
end

- (Boolean) is_node_empty?(node)

this is used to check if a node is empty before processing. this is a bit of a processing hit on this, especially for nodes that have any children. For this reason we skip the root node.
You should override this in order to not check nodes that are expected to be very deep.

Returns:

  • (Boolean)


129
130
131
132
133
134
135
136
# File 'backend/app/converters/lib/xml_sax.rb', line 129

def is_node_empty?(node)
  # calling inner_xml on the root note slows things down a lot...
  if node.depth == 0 
    return false
  else   
    return  node.inner_xml.strip.empty? # using empty_element? returns true if there's just whitespace... 
  end
end

- (Object) make_sticky(node_name)



335
336
337
# File 'backend/app/converters/lib/xml_sax.rb', line 335

def make_sticky(node_name)
  @stickies << node_name
end

- (Object) node



297
298
299
# File 'backend/app/converters/lib/xml_sax.rb', line 297

def node
  @node
end

- (Object) node_queue_for(reader)

Get a hold of Nokogiri’s internal nodeQueue for the sake of being able to clear it. This might not be necessary in new versions of Nokogiri.



65
66
67
68
69
70
# File 'backend/app/converters/lib/xml_sax.rb', line 65

def node_queue_for(reader)
  obj = reader.to_java
  nodeQueueField = obj.get_class.get_declared_field("nodeQueue")
  nodeQueueField.setAccessible(true)
  nodeQueueField.get(obj)
end

- (Object) open_context(type, properties = {}) {|obj| ... } Also known as: make

Yields:

  • (obj)


187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'backend/app/converters/lib/xml_sax.rb', line 187

def open_context(type, properties = {})
  obj = ASpaceImport::JSONModel(type).new
  obj["import_context"]= pprint_current_node

  @contexts.push(type)
  @batch << obj
  @context_nodes[@node_name] ||= []
  @context_nodes[@node_name][@node_depth] ||= []
  @context_nodes[@node_name][@node_depth] << type
  properties.each do |k,v|
    set obj, k, v
  end

  yield obj if block_given?
end

- (Object) outer_xml



223
224
225
# File 'backend/app/converters/lib/xml_sax.rb', line 223

def outer_xml
  @node.outer_xml.strip
end

- (Object) pprint_current_node



227
228
229
230
231
# File 'backend/app/converters/lib/xml_sax.rb', line 227

def pprint_current_node
  Nokogiri::XML::Builder.new {|b|
    b.send(@node.name.intern, @node.attributes).cdata(" ... ")
  }.doc.root.to_s
end

- (Object) proxy(record_type = context)

Since it won’t do to push subrecords into parent records until the subrecords are complete, a proxy can be assigned instead, and the proxy will discharge the JSON subrecord once it is complete



292
293
294
# File 'backend/app/converters/lib/xml_sax.rb', line 292

def proxy(record_type = context)
  @proxies.get_proxy_for("#{record_type}-#{@contexts.length}", record_type)
end

- (Object) run



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'backend/app/converters/lib/xml_sax.rb', line 73

def run
  @reader = Nokogiri::XML::Reader( IO.read(@input_file).gsub(/\s\s+/, " ")) do |config|
    config.noblanks.strict
  end
  node_queue = node_queue_for(@reader)
  @contexts = []
  @context_nodes = {}
  @proxies = ASpaceImport::RecordProxyMgr.new
  @stickies = []
  # another hack for noko:
  @node_shadow = nil
  @empty_node = false 

  self.class.ensure_configuration

  @reader.each_with_index do |node, i|

    case node.node_type

    when 1
      
      next if @ignore

      # Nokogiri Reader won't create events for closing tags on empty nodes
      # https://github.com/sparklemotion/nokogiri/issues/928
      # handle_closer(node) if node.self_closing? #<--- don't do this it's horribly slow
      if @node_shadow && @empty_node 
        handle_closer(@node_shadow)
      end
      
      #we do not bother with empty and attributesless nodes. however, a
      #node can be empty as long as it has attributes 
      empty_node = is_node_empty?(node)
      handle_opener(node, empty_node) unless ( empty_node && !node.attributes? ) 

    when 3
      handle_text(node)
    when 15
      if @node_shadow && node.local_name != @node_shadow[0] 
        handle_closer(@node_shadow)
      end
      handle_closer(node)
    end

    # A gross hack.  Use Java Reflection to clear Nokogiri's node queue,
    # since otherwise we end up accumulating all nodes in memory.
    node_queue.set(i, nil)
  end
end

- (Object) set(*args)



245
246
247
# File 'backend/app/converters/lib/xml_sax.rb', line 245

def set(*args)
  set_property(*args)
end

- (Object) set_property(obj = context_obj, property, value)



250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# File 'backend/app/converters/lib/xml_sax.rb', line 250

def set_property(obj = context_obj, property, value)
  if obj.nil?
    Log.warn "Tried to set property #{property} on an object that couldn't be found"
    return false
  end

  if property.nil?
    Log.warn("Can't set <#{obj.class.record_type}> <#{property}>: nil value")
    return false
  end

  begin
    property_type = ASpaceImport::Utils.get_property_type(obj.class.schema['properties'][property.to_s])
  rescue NoMethodError
    raise "Having some trouble finding a property <#{property}> on a <#{obj.class.record_type}> object"
  end

  if value.is_a?(ASpaceImport::RecordProxy)
    value.on_discharge(self, :set_property, obj, property)
  else
    if value.nil?
      # Log.debug("Given a nil value for <#{obj.class.record_type}><#{property}>")
    else
      filtered_value = ASpaceImport::Utils.value_filter(property_type[0]).call(value)
      if property_type[0].match /list$/
        obj.send("#{property}").push(filtered_value)
      else
        if obj.send("#{property}")
          Log.warn("Setting a property that has already been set")
        end
        obj.send("#{property}=", filtered_value)
      end
    end
  end
end