Skip to content

Commit 12ab4fb

Browse files
committed
support for reading tables
1 parent 1707e83 commit 12ab4fb

11 files changed

Lines changed: 237 additions & 5 deletions

File tree

README.md

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# docx
22

3-
a ruby library/gem for interacting with `.docx` files. currently capabilities include reading paragraphs/bookmarks, inserting text at bookmarks, and saving the document.
3+
a ruby library/gem for interacting with `.docx` files. currently capabilities include reading paragraphs/bookmarks, inserting text at bookmarks, reading tables/rows/columns/cells and saving the document.
44

55
## usage
66

@@ -29,6 +29,36 @@ doc.bookmarks.each_pair do |bookmark_name, bookmark_object|
2929
end
3030
```
3131

32+
### reading tables
33+
34+
``` ruby
35+
require 'docx'
36+
37+
# Create a Docx::Document object for our existing docx file
38+
doc = Docx::Document.open('tables.docx')
39+
40+
first_table = doc.tables[0]
41+
puts first_table.row_count
42+
puts first_table.column_count
43+
puts first_table.rows[0].cells[0].text
44+
puts first_table.columns[0].cells[0].text
45+
46+
# Iterate through tables
47+
doc.tables.each do |table|
48+
table.rows.each do |row| # Row-based iteration
49+
row.cells.each do |cell|
50+
puts cell.text
51+
end
52+
end
53+
54+
table.columns.each do |column| # Column-based iteration
55+
column.cells.each do |cell|
56+
puts cell.text
57+
end
58+
end
59+
end
60+
```
61+
3262
### writing
3363

3464
``` ruby

docx.gemspec

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ Gem::Specification.new do |s|
66
s.version = Docx::VERSION
77
s.summary = 'a ruby library/gem for interacting with .docx files'
88
s.description = s.summary
9-
s.authors = ['Christopher Hunt', 'Marcus Ortiz']
9+
s.authors = ['Christopher Hunt', 'Marcus Ortiz', 'Yann Plancqueel']
1010
s.email = ['chrahunt@gmail.com']
11-
s.homepage = 'https://github.com/chrahunt/docx'
11+
s.homepage = 'https://github.com/yannp/docx'
1212
s.files = Dir["README.md", "LICENSE.md", "lib/**/*.rb"]
1313

1414
s.add_dependency 'nokogiri', '~> 1.5'

lib/docx/containers.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
require 'docx/containers/container'
22
require 'docx/containers/text_run'
3-
require 'docx/containers/paragraph'
3+
require 'docx/containers/paragraph'
4+
require 'docx/containers/table'

lib/docx/containers/table.rb

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
require 'docx/containers/table_row'
2+
require 'docx/containers/table_column'
3+
require 'docx/containers/container'
4+
5+
module Docx
6+
module Elements
7+
module Containers
8+
class Table
9+
include Container
10+
include Elements::Element
11+
12+
def self.tag
13+
'tbl'
14+
end
15+
16+
def initialize(node)
17+
@node = node
18+
@properties_tag = 'tblGrid'
19+
end
20+
21+
# Array of row
22+
def rows
23+
@node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node) }
24+
end
25+
26+
def row_count
27+
@node.xpath('w:tr').count
28+
end
29+
30+
# Array of column
31+
def columns
32+
columns_containers = []
33+
(0..(column_count-1)).each do |i|
34+
columns_containers[i] = Containers::TableColumn.new @node.xpath("w:tr//w:tc[#{i+1}]")
35+
end
36+
columns_containers
37+
end
38+
39+
def column_count
40+
@node.xpath('w:tblGrid/w:gridCol').count
41+
end
42+
43+
# Iterate over each row within a table
44+
def each_rows
45+
rows.each { |r| yield(r) }
46+
end
47+
48+
end
49+
end
50+
end
51+
end

lib/docx/containers/table_cell.rb

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
require 'docx/containers/text_run'
2+
require 'docx/containers/container'
3+
4+
module Docx
5+
module Elements
6+
module Containers
7+
class TableCell
8+
include Container
9+
include Elements::Element
10+
11+
def self.tag
12+
'tc'
13+
end
14+
15+
def initialize(node)
16+
@node = node
17+
@properties_tag = 'tcPr'
18+
end
19+
20+
# Return text of paragraph's cell
21+
def to_s
22+
paragraphs.map(&:text).join('')
23+
end
24+
25+
# Array of paragraphs contained within cell
26+
def paragraphs
27+
@node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node) }
28+
end
29+
30+
# Iterate over each text run within a paragraph's cell
31+
def each_paragraph
32+
paragraphs.each { |tr| yield(tr) }
33+
end
34+
35+
alias_method :text, :to_s
36+
end
37+
end
38+
end
39+
end
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
require 'docx/containers/table_cell'
2+
require 'docx/containers/container'
3+
4+
module Docx
5+
module Elements
6+
module Containers
7+
class TableColumn
8+
include Container
9+
include Elements::Element
10+
11+
def self.tag
12+
'w:gridCol'
13+
end
14+
15+
def initialize(cell_nodes)
16+
@node = ''
17+
@properties_tag = ''
18+
@cells = cell_nodes.map { |c_node| Containers::TableCell.new(c_node) }
19+
end
20+
21+
# Array of cells contained within row
22+
def cells
23+
@cells
24+
end
25+
26+
end
27+
end
28+
end
29+
end

lib/docx/containers/table_row.rb

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
require 'docx/containers/table_cell'
2+
require 'docx/containers/container'
3+
4+
module Docx
5+
module Elements
6+
module Containers
7+
class TableRow
8+
include Container
9+
include Elements::Element
10+
11+
def self.tag
12+
'tr'
13+
end
14+
15+
def initialize(node)
16+
@node = node
17+
@properties_tag = ''
18+
end
19+
20+
# Array of cells contained within row
21+
def cells
22+
@node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node) }
23+
end
24+
25+
end
26+
end
27+
end
28+
end

lib/docx/document.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ module Docx
1616
# puts d.text
1717
# end
1818
class Document
19-
delegate :paragraphs, :bookmarks, :to => :@parser
19+
delegate :paragraphs, :bookmarks, :tables, :to => :@parser
2020
delegate :doc, :xml, :zip, :to => :@parser
2121
def initialize(path, &block)
2222
@replace = {}

lib/docx/parser.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ def bookmarks
3030
bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b }
3131
bkmrks_hsh
3232
end
33+
34+
def tables
35+
@doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node }
36+
end
3337

3438
private
3539

@@ -42,5 +46,9 @@ def parse_paragraph_from(p_node)
4246
def parse_bookmark_from(b_node)
4347
Elements::Bookmark.new(b_node)
4448
end
49+
50+
def parse_table_from(t_node)
51+
Elements::Containers::Table.new(t_node)
52+
end
4553
end
4654
end

spec/docx/document_spec.rb

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,52 @@
3939
end
4040
end
4141

42+
describe 'read tables' do
43+
before do
44+
@doc = Docx::Document.open(@fixtures_path + '/tables.docx')
45+
end
46+
47+
it "should have tables with rows and cells" do
48+
expect(@doc.tables.count).to eq 2
49+
@doc.tables.each do |table|
50+
expect(table).to be_an_instance_of(Docx::Elements::Containers::Table)
51+
table.rows.each do |row|
52+
expect(row).to be_an_instance_of(Docx::Elements::Containers::TableRow)
53+
row.cells.each do |cell|
54+
expect(cell).to be_an_instance_of(Docx::Elements::Containers::TableCell)
55+
end
56+
end
57+
end
58+
end
59+
60+
it "should have tables with columns and cells" do
61+
@doc.tables.each do |table|
62+
table.columns.each do |column|
63+
expect(column).to be_an_instance_of(Docx::Elements::Containers::TableColumn)
64+
column.cells.each do |cell|
65+
expect(cell).to be_an_instance_of(Docx::Elements::Containers::TableCell)
66+
end
67+
end
68+
end
69+
end
70+
71+
it "should have proper count" do
72+
expect(@doc.tables[0].row_count).to eq 171
73+
expect(@doc.tables[1].row_count).to eq 2
74+
expect(@doc.tables[0].column_count).to eq 2
75+
expect(@doc.tables[1].column_count).to eq 2
76+
end
77+
78+
it "should have tables with proper text" do
79+
@doc.tables[0].rows[0].cells[0].text.should eq "ENGLISH"
80+
@doc.tables[0].rows[0].cells[1].text.should eq "FRANÇAIS"
81+
@doc.tables[1].rows[0].cells[0].text.should eq "Second table"
82+
@doc.tables[1].rows[0].cells[1].text.should eq "Second tableau"
83+
@doc.tables[0].columns[0].cells[5].text.should eq "aphids"
84+
@doc.tables[0].columns[1].cells[5].text.should eq "puceron"
85+
end
86+
end
87+
4288
describe 'editing' do
4389
before do
4490
@doc = Docx::Document.open(@fixtures_path + '/editing.docx')

0 commit comments

Comments
 (0)