From 5329c91dd572f38adc93240af7421b567bacda61 Mon Sep 17 00:00:00 2001 From: ff2248 Date: Wed, 10 May 2023 16:32:28 +0800 Subject: [PATCH] feat: handle rFont element in html format --- lib/roo/excelx.rb | 4 ++++ lib/roo/excelx/shared_strings.rb | 16 ++++++++++++++-- spec/lib/roo/excelx_spec.rb | 33 ++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index 91ebc1e0..bc7d617d 100755 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -44,6 +44,10 @@ def initialize(filename_or_stream, options = {}) shared_options = {} shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false) + if !shared_options[:disable_html_wrapper] + shared_options[:rpr_elements] = options[:rpr_elements] || SharedStrings::DEFAULT_RPR_ELEMENTS + end + unless is_stream?(filename_or_stream) file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed) basename = find_basename(filename_or_stream) diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb index e70b6238..c1d95cb7 100755 --- a/lib/roo/excelx/shared_strings.rb +++ b/lib/roo/excelx/shared_strings.rb @@ -5,6 +5,8 @@ module Roo class Excelx class SharedStrings < Excelx::Extractor + DEFAULT_RPR_ELEMENTS = %w[b i u vertAlign].freeze + def [](index) to_a[index] end @@ -21,7 +23,7 @@ def to_html # See what is happening with commit??? def use_html?(index) return false if @options[:disable_html_wrapper] - to_html[index][/<([biu]|sup|sub)>/] + to_html[index][/<([biu]|sup|sub|\/font)>/] end private @@ -102,6 +104,8 @@ def extract_html_r(r_elem) case elem.name when 'rPr' elem.children.each do |rPr_elem| + next if !@options[:rpr_elements].include?(rPr_elem.name) + case rPr_elem.name when 'b' # set formatting for Bold to true @@ -124,6 +128,8 @@ def extract_html_r(r_elem) xml_elems[:sup] = true xml_elems[:sub] = false end + when 'rFont' + xml_elems[:font] = { face: rPr_elem['val'] } end end when 't' @@ -137,7 +143,13 @@ def extract_html_r(r_elem) def create_html(text, formatting) tmp_str = +"" formatting.each do |elem, val| - tmp_str << "<#{elem}>" if val + case val + when TrueClass + tmp_str << "<#{elem}>" + when Hash + attributes = val.map{ |k, v| "#{k}=\"#{v}\"" }.join(' ') + tmp_str << "<#{elem} #{attributes}>" + end end tmp_str << text diff --git a/spec/lib/roo/excelx_spec.rb b/spec/lib/roo/excelx_spec.rb index 10b0caf5..d14e3b20 100755 --- a/spec/lib/roo/excelx_spec.rb +++ b/spec/lib/roo/excelx_spec.rb @@ -606,6 +606,39 @@ expect(subject.excelx_value(6, 1, "Sheet1")).to eq("See that regular html tags do not create html tags.\n
    \n
  1. Denver Broncos
  2. \n
  3. Carolina Panthers
  4. \n
  5. New England Patriots
  6. \n
  7. Arizona Panthers
  8. \n
") expect(subject.excelx_value(7, 1, "Sheet1")).to eq("Does create html tags when formatting is used..\n
    \n
  1. Denver Broncos
  2. \n
  3. Carolina Panthers
  4. \n
  5. New England Patriots
  6. \n
  7. Arizona Panthers
  8. \n
") end + + context 'when rpr_elements includes rFont' do + subject(:xlsx) do + Roo::Excelx.new(path, rpr_elements: [*Roo::Excelx::SharedStrings::DEFAULT_RPR_ELEMENTS, 'rFont']) + end + + it 'returns the expected result' do + expect(subject.excelx_value(1, 1, "Sheet1")).to eq("This has no formatting.") + expect(subject.excelx_value(2, 1, "Sheet1")).to eq("This has bold formatting.") + expect(subject.excelx_value(2, 2, "Sheet1")).to eq("This has italics formatting.") + expect(subject.excelx_value(2, 3, "Sheet1")).to eq("This has underline format.") + expect(subject.excelx_value(2, 4, "Sheet1")).to eq("Superscript. x123") + expect(subject.excelx_value(2, 5, "Sheet1")).to eq("SubScript. Tj") + expect(subject.excelx_value(3, 1, "Sheet1")).to eq("Bold, italics together.") + expect(subject.excelx_value(3, 2, "Sheet1")).to eq("Bold, Underline together.") + expect(subject.excelx_value(3, 3, "Sheet1")).to eq("Bold, Superscript. xN") + expect(subject.excelx_value(3, 4, "Sheet1")).to eq("Bold, Subscript. Tabc") + expect(subject.excelx_value(3, 5, "Sheet1")).to eq("Italics, Underline together.") + expect(subject.excelx_value(3, 6, "Sheet1")).to eq("Italics, Superscript. Xabc") + expect(subject.excelx_value(3, 7, "Sheet1")).to eq("Italics, Subscript. Befg") + expect(subject.excelx_value(4, 1, "Sheet1")).to eq("Bold, italics underline, together.") + expect(subject.excelx_value(4, 2, "Sheet1")).to eq("Bold, italics, superscript. Xabc123") + expect(subject.excelx_value(4, 3, "Sheet1")).to eq("Bold, Italics, subscript. Mgha2") + expect(subject.excelx_value(4, 4, "Sheet1")).to eq("Bold, Underline, superscript. ABC123") + expect(subject.excelx_value(4, 5, "Sheet1")).to eq("Bold, Underline, subscript. GoodXYZ") + expect(subject.excelx_value(4, 6, "Sheet1")).to eq("Italics, Underline, superscript. Upswing") + expect(subject.excelx_value(4, 7, "Sheet1")).to eq("Italics, Underline, subscript. Tswing") + expect(subject.excelx_value(5, 1, "Sheet1")).to eq("Bold, italics, underline, superscript. GHJK1904") + expect(subject.excelx_value(5, 2, "Sheet1")).to eq("Bold, italics, underline, subscript. Mikedrop") + expect(subject.excelx_value(6, 1, "Sheet1")).to eq("See that regular html tags do not create html tags.\n
    \n
  1. Denver Broncos
  2. \n
  3. Carolina Panthers
  4. \n
  5. New England Patriots
  6. \n
  7. Arizona Panthers
  8. \n
") + expect(subject.excelx_value(7, 1, "Sheet1")).to eq("Does create html tags when formatting is used..\n
    \n
  1. Denver Broncos
  2. \n
  3. Carolina Panthers
  4. \n
  5. New England Patriots
  6. \n
  7. Arizona Panthers
  8. \n
") + end + end end end