Class Cass::Context

  1. cass/lib/cass/context.rb
Parent: Object

Represents the context of a document, i.e., a list of words to analyze, along with an index.

Methods

public class

  1. new

public instance

  1. []
  2. index_words
  3. key?
  4. size

Attributes

index [RW]
words [RW]

Public class methods

new (doc, opts)
[show source]
# File cass/lib/cass/context.rb, line 8
    def initialize(doc, opts)
      min_prop = opts['min_prop'] || 0
      max_prop = opts['max_prop'] || 1
      if (defined?(VERBOSE) and VERBOSE)
        puts "Creating new context..." 
        puts "Using all words with token frequency in range of #{min_prop} and #{max_prop}."
      end
      words = doc.lines.join(' ').split(/\s+/)
      nwords = words.size
      puts "Found #{nwords} words." if (defined?(VERBOSE) and VERBOSE)
      if min_prop > 0 or max_prop < 1
        word_hash = Hash.new(0)
        words.each {|w| word_hash[w] += 1 }
        min_t, max_t = (min_prop * nwords).round, (max_prop * nwords).round
        words = word_hash.delete_if { |w,c| c < min_t or c > max_t }.keys
      else
        words.uniq!
      end
      # words = words - doc.targets
      if opts.key?('stop_file') and !opts['stop_file'].empty?
        begin
          stopwords = File.new(opts['stop_file']).read.split(/\s+/)
        rescue
          abort("Error: could not open stopword file #{opts['stop_file']}!")
        end
        puts "Removing #{stopwords.size} stopwords from context." if (defined?(VERBOSE) and VERBOSE)
        words -= stopwords
      end
      @words = opts.key?('context_size') ? words.sort_by{rand}[0, opts['context_size']] : words
      index_words
      puts "Using #{@words.size} words as context." if (defined?(VERBOSE) and VERBOSE)
    end

Public instance methods

[] (el)

Convenience accessor method for getting either words in the context, or their index in the array. If an integer is passed, returns a word; If a string is passed, return the index of the word in the array.

[show source]
# File cass/lib/cass/context.rb, line 50
    def [](el)
      el.class == Integer ? @words[el] : @index[el]
    end
index_words ()

Index the context. Necessary when words are updated manually.

[show source]
# File cass/lib/cass/context.rb, line 42
    def index_words
      @index = {}
      @words.each_index { |i| @index[@words[i]] = i }
    end
key? (k)

Returns true if a word is in the context, false otherwise.

[show source]
# File cass/lib/cass/context.rb, line 55
    def key?(k)
      @index.key?(k)
    end
size ()

Number of words in the context.

[show source]
# File cass/lib/cass/context.rb, line 60
    def size
      @words.size
    end