diff --git a/lib/hiringtrends/item.rb b/lib/hiringtrends/item.rb index d224d69..29527a2 100644 --- a/lib/hiringtrends/item.rb +++ b/lib/hiringtrends/item.rb @@ -32,7 +32,7 @@ def self.load(item_id:, force_api_source: false) end def comments - values["children"] || [] + values["children"].reject { |comment| comment["text"].nil? } || [] end def rel @@ -53,7 +53,7 @@ def save def analyze(terms_dictionary) HiringTrends.logger.info "Analyzing #{id}: #{title}" - @terms_data = terms_dictionary.software_terms_clone + @terms_data = terms_dictionary.term_counts_template count_terms_in_comments calculate_percentage_for_terms @@ -76,16 +76,13 @@ def to_record # accumulate mentions of term as comments are searched def count_terms_in_comments comments.each do |comment| - comment_text = comment["text"] - next if comment_text.nil? + posting = HiringTrends::JobPosting.new(job_description: comment["text"]) - posting = HiringTrends::JobPosting.new(job_description: comment_text) - - # identify if each term is in the comment - terms_data.each_key do |term| - # increment count as its found - terms_data[term][:count] += 1 if posting.term?(terms_data[term][:full_term]) - end + # identify if each term is in the comment/job description + terms_data.each_key do |term| + # increment count as its found + terms_data[term][:count] += 1 if posting.term?(terms_data[term][:full_term]) + end end end diff --git a/lib/hiringtrends/terms_dictionary.rb b/lib/hiringtrends/terms_dictionary.rb index b8bce6c..d3fe737 100644 --- a/lib/hiringtrends/terms_dictionary.rb +++ b/lib/hiringtrends/terms_dictionary.rb @@ -2,25 +2,25 @@ module HiringTrends class TermsDictionary - attr_reader :raw_terms, :software_terms + attr_reader :raw_terms def initialize(dictionary_url) response = Faraday.get dictionary_url @raw_terms = response.body.lines.map(&:chomp) - @software_terms = {} - - @raw_terms.each do |line| - @software_terms[line.split("/").first] = { + @term_counts_template = @raw_terms.each_with_object({}) do |term, result| + term_without_modifiers = term.split("/").first + result[term_without_modifiers] = { count: 0, percentage: 0, mavg3: 0, - full_term: line + full_term: term } end end - def software_terms_clone - Marshal.load(Marshal.dump(@software_terms)) + # Returns a copy of the term counts template + def term_counts_template + Marshal.load(Marshal.dump(@term_counts_template)) end # Some terms go by different names, modifiers are used to search for diff --git a/spec/hiringtrends_spec.rb b/spec/hiringtrends_spec.rb index 98eeb8c..c9b2973 100644 --- a/spec/hiringtrends_spec.rb +++ b/spec/hiringtrends_spec.rb @@ -5,15 +5,15 @@ describe "#initialize" do it "creates the terms hash structure" do - expect(subject.dictionary.software_terms).to be_a(Hash) + expect(subject.dictionary.term_counts_template).to be_a(Hash) end it "key is the term" do - expect(subject.dictionary.software_terms.keys.first).to eq(".NET") + expect(subject.dictionary.term_counts_template.keys.first).to eq(".NET") end it "term value is a hash" do - expect(subject.dictionary.software_terms.values.first).to eq({ count: 0, full_term: ".NET", mavg3: 0, percentage: 0 }) + expect(subject.dictionary.term_counts_template.values.first).to eq({ count: 0, full_term: ".NET", mavg3: 0, percentage: 0 }) end end diff --git a/spec/item_spec.rb b/spec/item_spec.rb index 99d2d2f..a9a8cf2 100644 --- a/spec/item_spec.rb +++ b/spec/item_spec.rb @@ -112,7 +112,7 @@ before do allow(HiringTrends::TermsDictionary).to receive(:new).and_return(dictionary) - allow(dictionary).to receive(:software_terms_clone).and_return(terms) + allow(dictionary).to receive(:term_counts_template).and_return(terms) end context "when words have slash separatos" do