Skip to content

Commit d3d443f

Browse files
committed
Merge branch 'master' of https://github.com/tmikoss/docx into tmikoss-merge
Conflicts: lib/docx/document.rb spec/docx/document_spec.rb
2 parents bd689e6 + 89d6d9c commit d3d443f

4 files changed

Lines changed: 168 additions & 137 deletions

File tree

lib/docx/document.rb

Lines changed: 143 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -3,140 +3,146 @@
33
require 'nokogiri'
44
require 'zip'
55

6-
module Docx
7-
# The Document class wraps around a docx file and provides methods to
8-
# interface with it.
9-
#
10-
# # get a Docx::Document for a docx file in the local directory
11-
# doc = Docx::Document.open("test.docx")
12-
#
13-
# # get the text from the document
14-
# puts doc.text
15-
#
16-
# # do the same thing in a block
17-
# Docx::Document.open("test.docx") do |d|
18-
# puts d.text
19-
# end
20-
class Document
21-
attr_reader :xml, :doc, :zip, :styles
22-
23-
def initialize(path, &block)
24-
@replace = {}
25-
@zip = Zip::File.open(path)
26-
@document_xml = @zip.read('word/document.xml')
27-
@doc = Nokogiri::XML(@document_xml)
28-
@styles_xml = @zip.read('word/styles.xml')
29-
@styles = Nokogiri::XML(@styles_xml)
30-
if block_given?
31-
yield self
32-
@zip.close
33-
end
34-
end
35-
36-
# This stores the current global document properties, for now
37-
def document_properties
38-
{
39-
font_size: font_size
40-
}
41-
end
42-
43-
# With no associated block, Docx::Document.open is a synonym for Docx::Document.new. If the optional code block is given, it will be passed the opened +docx+ file as an argument and the Docx::Document oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open.
44-
# call-seq:
45-
# open(filepath) => file
46-
# open(filepath) {|file| block } => obj
47-
def self.open(path, &block)
48-
self.new(path, &block)
49-
end
50-
51-
def paragraphs
52-
@doc.xpath('//w:document//w:body//w:p').map { |p_node| parse_paragraph_from p_node }
53-
end
54-
55-
def bookmarks
56-
bkmrks_hsh = Hash.new
57-
bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
58-
# auto-generated by office 2010
59-
bkmrks_ary.reject! {|b| b.name == "_GoBack" }
60-
bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b }
61-
bkmrks_hsh
62-
end
63-
64-
def tables
65-
@doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node }
66-
end
67-
68-
# Some documents have this set, others don't.
69-
# Values are returned as half-points, so to get points, that's why it's divided by 2.
70-
def font_size
71-
size_tag = @styles.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first
72-
size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil
73-
end
74-
75-
##
76-
# *Deprecated*
77-
#
78-
# Iterates over paragraphs within document
79-
# call-seq:
80-
# each_paragraph => Enumerator
81-
def each_paragraph
82-
paragraphs.each { |p| yield(p) }
83-
end
84-
85-
# call-seq:
86-
# to_s -> string
87-
def to_s
88-
paragraphs.map(&:to_s).join("\n")
89-
end
90-
91-
# Output entire document as a String HTML fragment
92-
def to_html
93-
paragraphs.map(&:to_html).join('\n')
94-
end
95-
96-
# Save document to provided path
97-
# call-seq:
98-
# save(filepath) => void
99-
def save(path)
100-
update
101-
Zip::OutputStream.open(path) do |out|
102-
zip.each do |entry|
103-
out.put_next_entry(entry.name)
104-
105-
if @replace[entry.name]
106-
out.write(@replace[entry.name])
107-
else
108-
out.write(zip.read(entry.name))
109-
end
110-
end
111-
end
112-
zip.close
113-
end
114-
115-
alias_method :text, :to_s
116-
117-
private
118-
119-
#--
120-
# TODO: Flesh this out to be compatible with other files
121-
# TODO: Method to set flag on files that have been edited, probably by inserting something at the
122-
# end of methods that make edits?
123-
#++
124-
def update
125-
@replace["word/document.xml"] = doc.serialize :save_with => 0
126-
end
127-
128-
# generate Elements::Containers::Paragraph from paragraph XML node
129-
def parse_paragraph_from(p_node)
130-
Elements::Containers::Paragraph.new(p_node, document_properties)
131-
end
132-
133-
# generate Elements::Bookmark from bookmark XML node
134-
def parse_bookmark_from(b_node)
135-
Elements::Bookmark.new(b_node)
136-
end
137-
138-
def parse_table_from(t_node)
139-
Elements::Containers::Table.new(t_node)
140-
end
141-
end
142-
end
6+
module Docx
7+
# The Document class wraps around a docx file and provides methods to
8+
# interface with it.
9+
#
10+
# # get a Docx::Document for a docx file in the local directory
11+
# doc = Docx::Document.open("test.docx")
12+
#
13+
# # get the text from the document
14+
# puts doc.text
15+
#
16+
# # do the same thing in a block
17+
# Docx::Document.open("test.docx") do |d|
18+
# puts d.text
19+
# end
20+
class Document
21+
attr_reader :xml, :doc, :zip, :styles
22+
23+
def initialize(path, &block)
24+
@replace = {}
25+
@zip = Zip::File.open(path)
26+
@document_xml = @zip.read('word/document.xml')
27+
@doc = Nokogiri::XML(@document_xml)
28+
@styles_xml = @zip.read('word/styles.xml')
29+
@styles = Nokogiri::XML(@styles_xml)
30+
if block_given?
31+
yield self
32+
@zip.close
33+
end
34+
end
35+
36+
37+
# This stores the current global document properties, for now
38+
def document_properties
39+
{
40+
font_size: font_size
41+
}
42+
end
43+
44+
45+
# With no associated block, Docx::Document.open is a synonym for Docx::Document.new. If the optional code block is given, it will be passed the opened +docx+ file as an argument and the Docx::Document oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open.
46+
# call-seq:
47+
# open(filepath) => file
48+
# open(filepath) {|file| block } => obj
49+
def self.open(path, &block)
50+
self.new(path, &block)
51+
end
52+
53+
def paragraphs
54+
@doc.xpath('//w:document//w:body//w:p').map { |p_node| parse_paragraph_from p_node }
55+
end
56+
57+
def bookmarks
58+
bkmrks_hsh = Hash.new
59+
bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
60+
# auto-generated by office 2010
61+
bkmrks_ary.reject! {|b| b.name == "_GoBack" }
62+
bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b }
63+
bkmrks_hsh
64+
end
65+
66+
def tables
67+
@doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node }
68+
end
69+
70+
# Some documents have this set, others don't.
71+
# Values are returned as half-points, so to get points, that's why it's divided by 2.
72+
def font_size
73+
size_tag = @styles.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first
74+
size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil
75+
end
76+
77+
##
78+
# *Deprecated*
79+
#
80+
# Iterates over paragraphs within document
81+
# call-seq:
82+
# each_paragraph => Enumerator
83+
def each_paragraph
84+
paragraphs.each { |p| yield(p) }
85+
end
86+
87+
# call-seq:
88+
# to_s -> string
89+
def to_s
90+
paragraphs.map(&:to_s).join("\n")
91+
end
92+
93+
# Output entire document as a String HTML fragment
94+
def to_html
95+
paragraphs.map(&:to_html).join('\n')
96+
end
97+
98+
# Save document to provided path
99+
# call-seq:
100+
# save(filepath) => void
101+
def save(path)
102+
update
103+
Zip::OutputStream.open(path) do |out|
104+
zip.each do |entry|
105+
out.put_next_entry(entry.name)
106+
107+
if @replace[entry.name]
108+
out.write(@replace[entry.name])
109+
else
110+
out.write(zip.read(entry.name))
111+
end
112+
end
113+
end
114+
zip.close
115+
end
116+
117+
alias_method :text, :to_s
118+
119+
def replace_entry(entry_path, file_contents)
120+
@replace[entry_path] = file_contents
121+
end
122+
123+
private
124+
125+
#--
126+
# TODO: Flesh this out to be compatible with other files
127+
# TODO: Method to set flag on files that have been edited, probably by inserting something at the
128+
# end of methods that make edits?
129+
#++
130+
def update
131+
replace_entry "word/document.xml", doc.serialize(:save_with => 0)
132+
end
133+
134+
# generate Elements::Containers::Paragraph from paragraph XML node
135+
def parse_paragraph_from(p_node)
136+
Elements::Containers::Paragraph.new(p_node, document_properties)
137+
end
138+
139+
# generate Elements::Bookmark from bookmark XML node
140+
def parse_bookmark_from(b_node)
141+
Elements::Bookmark.new(b_node)
142+
end
143+
144+
def parse_table_from(t_node)
145+
Elements::Containers::Table.new(t_node)
146+
end
147+
end
148+
end

spec/docx/document_spec.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,5 +392,30 @@
392392
end
393393

394394
end
395+
396+
describe 'replacing contents' do
397+
let(:replacement_file_path) { @fixtures_path + '/replacement.png' }
398+
let(:temp_file_path){ Tempfile.new(['docx_gem', '.docx']).path }
399+
let(:entry_path){ 'word/media/image1.png' }
400+
let(:doc){ Docx::Document.open(@fixtures_path + '/replacement.docx') }
401+
402+
it 'should replace existing file within the document' do
403+
File.open replacement_file_path, "rb" do |io|
404+
doc.replace_entry entry_path, io.read
405+
end
406+
407+
doc.save(temp_file_path)
408+
409+
File.open replacement_file_path, "rb" do |io|
410+
expect(Zip::File.open(temp_file_path).read entry_path).to eq io.read
411+
end
412+
end
413+
414+
after do
415+
if File.exists?(temp_file_path)
416+
File.delete(temp_file_path)
417+
end
418+
end
419+
end
395420
end
396421

spec/fixtures/replacement.docx

20.4 KB
Binary file not shown.

spec/fixtures/replacement.png

87.7 KB
Loading

0 commit comments

Comments
 (0)