# coding: utf-8

class PDF::Reader
  # Provides low level access to the objects in a PDF file via a hash-like
  # object.
  #
  # A PDF file can be viewed as a large hash map. It is a series of objects
  # stored at precise byte offsets, and a table that maps object IDs to byte
  # offsets. Given an object ID, looking up an object is an O(1) operation.
  #
  # Each PDF object can be mapped to a ruby object, so by passing an object
  # ID to the [] method, a ruby representation of that object will be
  # retrieved.
  #
  # The class behaves much like a standard Ruby hash, including the use of
  # the Enumerable mixin. The key difference is no []= method - the hash
  # is read only.
  #
  # == Basic Usage
  #
  #     h = PDF::Reader::ObjectHash.new("somefile.pdf")
  #     h[1]
  #     => 3469
  #
  #     h[PDF::Reader::Reference.new(1,0)]
  #     => 3469
  #
  class ObjectHash
    include Enumerable

    attr_accessor :default
    attr_reader :trailer, :pdf_version
    attr_reader :sec_handler

    # Creates a new ObjectHash object. Input can be a string with a valid filename
    # or an IO-like object.
    #
    # Valid options:
    #
    #   :password - the user password to decrypt the source PDF
    #
    def initialize(input, opts = {})
      @io          = extract_io_from(input)
      @xref        = PDF::Reader::XRef.new(@io)
      @pdf_version = read_version
      @trailer     = @xref.trailer
      @cache       = opts[:cache] || PDF::Reader::ObjectCache.new
      @sec_handler = NullSecurityHandler.new
      @sec_handler = build_security_handler(opts)
    end

    # returns the type of object a ref points to
    def obj_type(ref)
      self[ref].class.to_s.to_sym
    rescue
      nil
    end

    # returns true if the supplied references points to an object with a stream
    def stream?(ref)
      self.has_key?(ref) && self[ref].is_a?(PDF::Reader::Stream)
    end

    # Access an object from the PDF. key can be an int or a PDF::Reader::Reference
    # object.
    #
    # If an int is used, the object with that ID and a generation number of 0 will
    # be returned.
    #
    # If a PDF::Reader::Reference object is used the exact ID and generation number
    # can be specified.
    #
    def [](key)
      return default if key.to_i <= 0

      unless key.is_a?(PDF::Reader::Reference)
        key = PDF::Reader::Reference.new(key.to_i, 0)
      end

      if @cache.has_key?(key)
        @cache[key]
      elsif xref[key].is_a?(Integer)
        buf = new_buffer(xref[key])
        @cache[key] = decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
      elsif xref[key].is_a?(PDF::Reader::Reference)
        container_key = xref[key]
        object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
        @cache[key] = object_streams[container_key][key.id]
      end
    rescue InvalidObjectError
      return default
    end

    # If key is a PDF::Reader::Reference object, lookup the corresponding
    # object in the PDF and return it. Otherwise return key untouched.
    #
    def object(key)
      key.is_a?(PDF::Reader::Reference) ? self[key] : key
    end
    alias :deref :object

    # Recursively dereferences the object refered to be +key+. If +key+ is not
    # a PDF::Reader::Reference, the key is returned unchanged.
    #
    def deref!(key)
      deref_internal!(key, {})
    end

    # Access an object from the PDF. key can be an int or a PDF::Reader::Reference
    # object.
    #
    # If an int is used, the object with that ID and a generation number of 0 will
    # be returned.
    #
    # If a PDF::Reader::Reference object is used the exact ID and generation number
    # can be specified.
    #
    # local_default is the object that will be returned if the requested key doesn't
    # exist.
    #
    def fetch(key, local_default = nil)
      obj = self[key]
      if obj
        return obj
      elsif local_default
        return local_default
      else
        raise IndexError, "#{key} is invalid" if key.to_i <= 0
      end
    end

    # iterate over each key, value. Just like a ruby hash.
    #
    def each(&block)
      @xref.each do |ref|
        yield ref, self[ref]
      end
    end
    alias :each_pair :each

    # iterate over each key. Just like a ruby hash.
    #
    def each_key(&block)
      each do |id, obj|
        yield id
      end
    end

    # iterate over each value. Just like a ruby hash.
    #
    def each_value(&block)
      each do |id, obj|
        yield obj
      end
    end

    # return the number of objects in the file. An object with multiple generations
    # is counted once.
    def size
      xref.size
    end
    alias :length :size

    # return true if there are no objects in this file
    #
    def empty?
      size == 0 ? true : false
    end

    # return true if the specified key exists in the file. key
    # can be an int or a PDF::Reader::Reference
    #
    def has_key?(check_key)
      # TODO update from O(n) to O(1)
      each_key do |key|
        if check_key.kind_of?(PDF::Reader::Reference)
          return true if check_key == key
        else
          return true if check_key.to_i == key.id
        end
      end
      return false
    end
    alias :include? :has_key?
    alias :key? :has_key?
    alias :member? :has_key?

    # return true if the specifiedvalue exists in the file
    #
    def has_value?(value)
      # TODO update from O(n) to O(1)
      each_value do |obj|
        return true if obj == value
      end
      return false
    end
    alias :value? :has_key?

    def to_s
      "<PDF::Reader::ObjectHash size: #{self.size}>"
    end

    # return an array of all keys in the file
    #
    def keys
      ret = []
      each_key { |k| ret << k }
      ret
    end

    # return an array of all values in the file
    #
    def values
      ret = []
      each_value { |v| ret << v }
      ret
    end

    # return an array of all values from the specified keys
    #
    def values_at(*ids)
      ids.map { |id| self[id] }
    end

    # return an array of arrays. Each sub array contains a key/value pair.
    #
    def to_a
      ret = []
      each do |id, obj|
        ret << [id, obj]
      end
      ret
    end

    # returns an array of PDF::Reader::References. Each reference in the
    # array points a Page object, one for each page in the PDF. The first
    # reference is page 1, second reference is page 2, etc.
    #
    # Useful for apps that want to extract data from specific pages.
    #
    def page_references
      root  = fetch(trailer[:Root])
      @page_references ||= get_page_objects(root[:Pages]).flatten
    end

    def encrypted?
      trailer.has_key?(:Encrypt)
    end

    def sec_handler?
      !!sec_handler
    end

    private

    # Private implementation of deref!, which exists to ensure the `seen` argument
    # isn't publicly available. It's used to avoid endless loops in the recursion, and
    # doesn't need to be part of the public API.
    #
    def deref_internal!(key, seen)
      seen_key = key.is_a?(PDF::Reader::Reference) ? key : key.object_id

      return seen[seen_key] if seen.key?(seen_key)

      case object = deref(key)
      when Hash
        seen[seen_key] ||= {}
        object.each do |k, value|
          seen[seen_key][k] = deref_internal!(value, seen)
        end
        seen[seen_key]
      when PDF::Reader::Stream
        seen[seen_key] ||= PDF::Reader::Stream.new({}, object.data)
        object.hash.each do |k,value|
          seen[seen_key].hash[k] = deref_internal!(value, seen)
        end
        seen[seen_key]
      when Array
        seen[seen_key] ||= []
        object.each do |value|
          seen[seen_key] << deref_internal!(value, seen)
        end
        seen[seen_key]
      else
        object
      end
    end

    def build_security_handler(opts = {})
      encrypt = deref(trailer[:Encrypt])
      if NullSecurityHandler.supports?(encrypt)
        NullSecurityHandler.new
      elsif StandardSecurityHandler.supports?(encrypt)
        encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
        StandardSecurityHandler.new(
          key_length: (encrypt[:Length] || 40).to_i,
          revision: encrypt[:R],
          owner_key: encrypt[:O],
          user_key: encrypt[:U],
          permissions: encrypt[:P].to_i,
          encrypted_metadata: encmeta,
          file_id: (deref(trailer[:ID]) || []).first,
          password: opts[:password]
        )
      else
        UnimplementedSecurityHandler.new
      end
    end

    def decrypt(ref, obj)
      case obj
      when PDF::Reader::Stream then
        obj.data = sec_handler.decrypt(obj.data, ref)
        obj
      when Hash                then
        arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
        Hash[*arr]
      when Array               then
        obj.collect { |item| decrypt(ref, item) }
      when String
        sec_handler.decrypt(obj, ref)
      else
        obj
      end
    end

    def new_buffer(offset = 0)
      PDF::Reader::Buffer.new(@io, :seek => offset)
    end

    def xref
      @xref
    end

    def object_streams
      @object_stream ||= {}
    end

    # returns a nested array of object references for all pages in this object store.
    #
    def get_page_objects(ref)
      obj = deref(ref)

      if obj[:Type] == :Page
        ref
      elsif obj[:Kids]
        deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
      end
    end

    def read_version
      @io.seek(0)
      _m, version = *@io.read(10).match(/PDF-(\d.\d)/)
      @io.seek(0)
      version.to_f
    end

    def extract_io_from(input)
      if input.respond_to?(:seek) && input.respond_to?(:read)
        input
      elsif File.file?(input.to_s)
        StringIO.new read_as_binary(input)
      else
        raise ArgumentError, "input must be an IO-like object or a filename"
      end
    end

    def read_as_binary(input)
      if File.respond_to?(:binread)
        File.binread(input.to_s)
      else
        File.open(input.to_s,"rb") { |f| f.read }
      end
    end

  end
end
