Skip to content

Commit f77425b

Browse files
committed
Added paragraph alignment and font size recognition
1 parent 3c91a6b commit f77425b

5 files changed

Lines changed: 106 additions & 9 deletions

File tree

lib/docx/containers/paragraph.rb

100644100755
Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ def self.tag
1515

1616
# Child elements: pPr, r, fldSimple, hlink, subDoc
1717
# http://msdn.microsoft.com/en-us/library/office/ee364458(v=office.11).aspx
18-
def initialize(node)
18+
def initialize(node, document_properties = {})
1919
@node = node
2020
@properties_tag = 'pPr'
21+
@document_properties = document_properties
22+
@font_size = @document_properties[:font_size]
2123
end
2224

2325
# Set text of paragraph
@@ -41,15 +43,41 @@ def to_s
4143

4244
# Array of text runs contained within paragraph
4345
def text_runs
44-
@node.xpath('w:r|w:hyperlink/w:r').map {|r_node| Containers::TextRun.new(r_node) }
46+
@node.xpath('w:r|w:hyperlink/w:r').map { |r_node| Containers::TextRun.new(r_node, @document_properties) }
4547
end
4648

4749
# Iterate over each text run within a paragraph
4850
def each_text_run
4951
text_runs.each { |tr| yield(tr) }
5052
end
53+
54+
def aligned_left?
55+
['left', nil].include?(alignment)
56+
end
57+
58+
def aligned_right?
59+
alignment == 'right'
60+
end
61+
62+
def aligned_center?
63+
alignment == 'center'
64+
end
65+
66+
def font_size
67+
size_tag = @node.xpath('w:pPr//w:sz').first
68+
size_tag ? size_tag.attributes['val'].value.to_i / 2 : @font_size
69+
end
5170

5271
alias_method :text, :to_s
72+
73+
private
74+
75+
# Returns the alignment if any, or nil if left
76+
def alignment
77+
alignment_tag = @node.xpath('.//w:jc').first
78+
alignment_tag ? alignment_tag.attributes['val'].value : nil
79+
end
80+
5381
end
5482
end
5583
end

lib/docx/containers/text_run.rb

100644100755
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ def self.tag
2020
attr_reader :text
2121
attr_reader :formatting
2222

23-
def initialize(node)
23+
def initialize(node, document_properties = {})
2424
@node = node
2525
@text_nodes = @node.xpath('w:t').map {|t_node| Elements::Text.new(t_node) }
2626
@properties_tag = 'rPr'
2727
@text = parse_text || ''
2828
@formatting = parse_formatting || DEFAULT_FORMATTING
29+
@document_properties = document_properties
30+
@font_size = @document_properties[:font_size]
2931
end
3032

3133
# Set text of text run
@@ -66,6 +68,11 @@ def bolded?
6668
def underlined?
6769
@formatting[:underline]
6870
end
71+
72+
def font_size
73+
size_tag = @node.xpath('w:rPr//w:sz').first
74+
size_tag ? size_tag.attributes['val'].value.to_i / 2 : @font_size
75+
end
6976
end
7077
end
7178
end

lib/docx/document.rb

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,28 @@ module Docx
1818
# puts d.text
1919
# end
2020
class Document
21-
attr_reader :xml, :doc, :zip
21+
attr_reader :xml, :doc, :zip, :styles
2222

2323
def initialize(path, &block)
2424
@replace = {}
2525
@zip = Zip::ZipFile.open(path)
26-
@xml = @zip.read('word/document.xml')
27-
@doc = Nokogiri::XML(@xml)
26+
@document_xml = @zip.read('word/document.xml')
27+
@doc = Nokogiri::XML(@document_xml)
28+
@styles_xml = @zip.read('word/styles.xml')
29+
@styles = Nokogiri::XML(@styles_xml)
2830
if block_given?
2931
yield self
3032
@zip.close
3133
end
3234
end
33-
35+
36+
# This stores the current global document properties, for now
37+
def document_properties
38+
{
39+
font_size: font_size
40+
}
41+
end
42+
3443
# With no associated block, Docx::Document.open is a synonym for Docx::Document.new. If the optional code block is given, it will be passed the opened +docx+ file as an argument and the Docx::Document oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open.
3544
# call-seq:
3645
# open(filepath) => file
@@ -56,6 +65,13 @@ def tables
5665
@doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node }
5766
end
5867

68+
# Some documents have this set, others don't.
69+
# Values are returned as half-points, so to get points, that's why it's divided by 2.
70+
def font_size
71+
size_tag = @styles.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first
72+
size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil
73+
end
74+
5975
##
6076
# *Deprecated*
6177
#
@@ -106,7 +122,7 @@ def update
106122

107123
# generate Elements::Containers::Paragraph from paragraph XML node
108124
def parse_paragraph_from(p_node)
109-
Elements::Containers::Paragraph.new(p_node)
125+
Elements::Containers::Paragraph.new(p_node, document_properties)
110126
end
111127

112128
# generate Elements::Bookmark from bookmark XML node

spec/docx/document_spec.rb

100644100755
Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,13 +166,18 @@
166166
end
167167

168168
it 'should have the correct text' do
169-
@doc.paragraphs.size.should eq 6
169+
@doc.paragraphs.size.should eq 11
170170
@doc.paragraphs[0].text.should eq 'Normal'
171171
@doc.paragraphs[1].text.should eq 'Italic'
172172
@doc.paragraphs[2].text.should eq 'Bold'
173173
@doc.paragraphs[3].text.should eq 'Underline'
174174
@doc.paragraphs[4].text.should eq 'Normal'
175175
@doc.paragraphs[5].text.should eq 'This is a sentence with all formatting options in the middle of the sentence.'
176+
@doc.paragraphs[6].text.should eq 'This is a centered paragraph.'
177+
@doc.paragraphs[7].text.should eq 'This paragraph is aligned left.'
178+
@doc.paragraphs[8].text.should eq 'This paragraph is aligned right.'
179+
@doc.paragraphs[9].text.should eq 'This paragraph is 14 points.'
180+
@doc.paragraphs[10].text.should eq 'This paragraph has a word at 16 points.'
176181
end
177182

178183
it 'should contain a paragraph with multiple text runs' do
@@ -225,6 +230,47 @@
225230
@doc.paragraphs[5].text_runs[2].bolded?.should be_false
226231
@doc.paragraphs[5].text_runs[2].underlined?.should be_false
227232
end
233+
234+
it 'should detect centered paragraphs' do
235+
@doc.paragraphs[5].aligned_center?.should be_false
236+
@doc.paragraphs[6].aligned_center?.should be_true
237+
@doc.paragraphs[7].aligned_center?.should be_false
238+
end
239+
240+
it 'should detect left justified paragraphs' do
241+
@doc.paragraphs[6].aligned_left?.should be_false
242+
@doc.paragraphs[7].aligned_left?.should be_true
243+
@doc.paragraphs[8].aligned_left?.should be_false
244+
end
245+
246+
it 'should detect right justified paragraphs' do
247+
@doc.paragraphs[7].aligned_right?.should be_false
248+
@doc.paragraphs[8].aligned_right?.should be_true
249+
@doc.paragraphs[9].aligned_right?.should be_false
250+
end
251+
252+
# ECMA-376 Office Open XML spec (4th edition), 17.3.2.38, size is
253+
# defined in half-points, meaning 14pt text returns a value of 28.
254+
# http://www.ecma-international.org/publications/standards/Ecma-376.htm
255+
it 'should return proper font size for paragraphs' do
256+
@doc.font_size.should eq 11
257+
@doc.paragraphs[5].font_size.should eq 11
258+
paragraph = @doc.paragraphs[9]
259+
paragraph.font_size.should eq 14
260+
paragraph.text_runs[0].font_size.should eq 14
261+
end
262+
263+
it 'should return proper font size for runs' do
264+
@doc.font_size.should eq 11
265+
paragraph = @doc.paragraphs[10]
266+
paragraph.font_size.should eq 11
267+
text_runs = paragraph.text_runs
268+
text_runs[0].font_size.should eq 11
269+
text_runs[1].font_size.should eq 16
270+
text_runs[2].font_size.should eq 11
271+
text_runs[3].font_size.should eq 11
272+
text_runs[4].font_size.should eq 11
273+
end
228274
end
229275

230276
describe 'saving' do

spec/fixtures/formatting.docx

-1019 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)