Skip to content

Commit 21a4b22

Browse files
committed
Merge branch 'master' of https://github.com/higginsdragon/docx into higgins-merge
Conflicts: lib/docx/document.rb lib/docx/parser.rb spec/docx/document_spec.rb
2 parents c450f90 + 59465d7 commit 21a4b22

7 files changed

Lines changed: 287 additions & 67 deletions

File tree

lib/docx/containers/paragraph.rb

100644100755
Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ def self.tag
1515

1616
# Child elements: pPr, r, fldSimple, hlink, subDoc
1717
# http://msdn.microsoft.com/en-us/library/office/ee364458(v=office.11).aspx
18-
def initialize(node)
18+
def initialize(node, document_properties = {})
1919
@node = node
2020
@properties_tag = 'pPr'
21+
@document_properties = document_properties
22+
@font_size = @document_properties[:font_size]
2123
end
2224

2325
# Set text of paragraph
@@ -39,17 +41,55 @@ def to_s
3941
text_runs.map(&:text).join('')
4042
end
4143

44+
# Return paragraph as a <p></p> HTML fragment with formatting based on properties.
45+
def to_html
46+
html = ''
47+
text_runs.each do |text_run|
48+
html << text_run.to_html
49+
end
50+
styles = { 'font-size' => "#{font_size}pt" }
51+
styles['text-align'] = alignment if alignment
52+
html_tag(:p, content: html, styles: styles)
53+
end
54+
55+
4256
# Array of text runs contained within paragraph
4357
def text_runs
44-
@node.xpath('w:r|w:hyperlink/w:r').map {|r_node| Containers::TextRun.new(r_node) }
58+
@node.xpath('w:r|w:hyperlink/w:r').map { |r_node| Containers::TextRun.new(r_node, @document_properties) }
4559
end
4660

4761
# Iterate over each text run within a paragraph
4862
def each_text_run
4963
text_runs.each { |tr| yield(tr) }
5064
end
65+
66+
def aligned_left?
67+
['left', nil].include?(alignment)
68+
end
69+
70+
def aligned_right?
71+
alignment == 'right'
72+
end
73+
74+
def aligned_center?
75+
alignment == 'center'
76+
end
77+
78+
def font_size
79+
size_tag = @node.xpath('w:pPr//w:sz').first
80+
size_tag ? size_tag.attributes['val'].value.to_i / 2 : @font_size
81+
end
5182

5283
alias_method :text, :to_s
84+
85+
private
86+
87+
# Returns the alignment if any, or nil if left
88+
def alignment
89+
alignment_tag = @node.xpath('.//w:jc').first
90+
alignment_tag ? alignment_tag.attributes['val'].value : nil
91+
end
92+
5393
end
5494
end
5595
end

lib/docx/containers/text_run.rb

100644100755
Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ def self.tag
2020
attr_reader :text
2121
attr_reader :formatting
2222

23-
def initialize(node)
23+
def initialize(node, document_properties = {})
2424
@node = node
2525
@text_nodes = @node.xpath('w:t').map {|t_node| Elements::Text.new(t_node) }
2626
@properties_tag = 'rPr'
2727
@text = parse_text || ''
2828
@formatting = parse_formatting || DEFAULT_FORMATTING
29+
@document_properties = document_properties
30+
@font_size = @document_properties[:font_size]
2931
end
3032

3133
# Set text of text run
@@ -54,7 +56,20 @@ def parse_formatting
5456
def to_s
5557
@text
5658
end
57-
59+
60+
# Return text as a HTML fragment with formatting based on properties.
61+
def to_html
62+
html = @text
63+
html = html_tag(:em, content: html) if italicized?
64+
html = html_tag(:strong, content: html) if bolded?
65+
styles = {}
66+
styles['text-decoration'] = 'underline' if underlined?
67+
# No need to be granular with font size down to the span level if it doesn't vary.
68+
styles['font-size'] = "#{font_size}pt" if font_size != @font_size
69+
html = html_tag(:span, content: html, styles: styles) unless styles.empty?
70+
return html
71+
end
72+
5873
def italicized?
5974
@formatting[:italic]
6075
end
@@ -66,6 +81,11 @@ def bolded?
6681
def underlined?
6782
@formatting[:underline]
6883
end
84+
85+
def font_size
86+
size_tag = @node.xpath('w:rPr//w:sz').first
87+
size_tag ? size_tag.attributes['val'].value.to_i / 2 : @font_size
88+
end
6989
end
7090
end
7191
end

lib/docx/document.rb

100644100755
Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
require 'docx/parser'
2-
require 'zip'
3-
1+
require 'docx/containers'
2+
require 'docx/elements'
3+
require 'nokogiri'
4+
require 'zip/zip'
5+
46
module Docx
57
# The Document class wraps around a docx file and provides methods to
68
# interface with it.
@@ -16,17 +18,28 @@ module Docx
1618
# puts d.text
1719
# end
1820
class Document
19-
delegate :paragraphs, :bookmarks, :tables, :to => :@parser
20-
delegate :doc, :xml, :zip, :to => :@parser
21+
attr_reader :xml, :doc, :zip, :styles
22+
2123
def initialize(path, &block)
2224
@replace = {}
25+
@zip = Zip::ZipFile.open(path)
26+
@document_xml = @zip.read('word/document.xml')
27+
@doc = Nokogiri::XML(@document_xml)
28+
@styles_xml = @zip.read('word/styles.xml')
29+
@styles = Nokogiri::XML(@styles_xml)
2330
if block_given?
24-
@parser = Parser.new(File.expand_path(path), &block)
25-
else
26-
@parser = Parser.new(File.expand_path(path))
31+
yield self
32+
@zip.close
2733
end
2834
end
29-
35+
36+
# This stores the current global document properties, for now
37+
def document_properties
38+
{
39+
font_size: font_size
40+
}
41+
end
42+
3043
# With no associated block, Docx::Document.open is a synonym for Docx::Document.new. If the optional code block is given, it will be passed the opened +docx+ file as an argument and the Docx::Document oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open.
3144
# call-seq:
3245
# open(filepath) => file
@@ -35,6 +48,30 @@ def self.open(path, &block)
3548
self.new(path, &block)
3649
end
3750

51+
def paragraphs
52+
@doc.xpath('//w:document//w:body//w:p').map { |p_node| parse_paragraph_from p_node }
53+
end
54+
55+
def bookmarks
56+
bkmrks_hsh = Hash.new
57+
bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
58+
# auto-generated by office 2010
59+
bkmrks_ary.reject! {|b| b.name == "_GoBack" }
60+
bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b }
61+
bkmrks_hsh
62+
end
63+
64+
def tables
65+
@doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node }
66+
end
67+
68+
# Some documents have this set, others don't.
69+
# Values are returned as half-points, so to get points, that's why it's divided by 2.
70+
def font_size
71+
size_tag = @styles.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first
72+
size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil
73+
end
74+
3875
##
3976
# *Deprecated*
4077
#
@@ -51,6 +88,11 @@ def to_s
5188
paragraphs.map(&:to_s).join("\n")
5289
end
5390

91+
# Output entire document as a String HTML fragment
92+
def to_html
93+
paragraphs.map(&:to_html).join('\n')
94+
end
95+
5496
# Save document to provided path
5597
# call-seq:
5698
# save(filepath) => void
@@ -83,5 +125,18 @@ def update
83125
@replace["word/document.xml"] = doc.serialize :save_with => 0
84126
end
85127

128+
# generate Elements::Containers::Paragraph from paragraph XML node
129+
def parse_paragraph_from(p_node)
130+
Elements::Containers::Paragraph.new(p_node, document_properties)
131+
end
132+
133+
# generate Elements::Bookmark from bookmark XML node
134+
def parse_bookmark_from(b_node)
135+
Elements::Bookmark.new(b_node)
136+
end
137+
138+
def parse_table_from(t_node)
139+
Elements::Containers::Table.new(t_node)
140+
end
86141
end
87142
end

lib/docx/elements/element.rb

100644100755
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,30 @@ def copy
5555
self.class.new(@node.dup)
5656
end
5757

58+
# A method to wrap content in an HTML tag.
59+
# Currently used in paragraph and text_run for the to_html methods
60+
#
61+
# content:: The base text content for the tag.
62+
# styles:: Hash of the inline CSS styles to be applied. e.g.
63+
# { 'font-size' => '12pt', 'text-decoration' => 'underline' }
64+
#
65+
def html_tag(name, options = {})
66+
content = options[:content]
67+
styles = options[:styles]
68+
69+
html = "<#{name.to_s}"
70+
unless styles.nil? || styles.empty?
71+
styles_array = []
72+
styles.each do |property, value|
73+
styles_array << "#{property.to_s}:#{value};"
74+
end
75+
html << " style=\"#{styles_array.join('')}\""
76+
end
77+
html << ">"
78+
html << content if content
79+
html << "</#{name.to_s}>"
80+
end
81+
5882
module ClassMethods
5983
def create_with(element)
6084
# Need to somehow get the xml document accessible here by default, but this is alright in the interim

lib/docx/parser.rb

Lines changed: 0 additions & 54 deletions
This file was deleted.

0 commit comments

Comments
 (0)