Skip to content

Commit

Permalink
✨ detect merged cells, resolve #25
Browse files Browse the repository at this point in the history
  • Loading branch information
chfw committed Dec 8, 2017
1 parent 13ccfa7 commit 4cea637
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .moban.d/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
{%block description%}
**pyexcel-{{file_type}}** is a tiny wrapper library to read, manipulate and write data in {{file_type}} format and it can read xlsx and xlsm fromat. You are likely to use it with `pyexcel <https://github.com/pyexcel/pyexcel>`_.

:fire: New flag: `detect_merged_cells` allows you to spread the same value among all merged cells. But be aware that this may slow down its reading performance.

New flag: `skip_hidden_row_and_column` allows you to skip hidden rows and columns and is defaulted to **True**. It may slow down its reading performance. And it is only valid for 'xls' files. For 'xlsx' files, please use pyexcel-xlsx.

{%endblock%}
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Change log
================================================================================

0.5.5 - unreleased
--------------------------------------------------------------------------------

Added
********************************************************************************

#. `#25 <https://github.com/pyexcel/pyexcel-xls/issues/25>`_, detect merged
cell in .xls

0.5.4 - 2.11.2017
--------------------------------------------------------------------------------

Expand Down
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ pyexcel-xls - Let you focus on data, instead of xls format

**pyexcel-xls** is a tiny wrapper library to read, manipulate and write data in xls format and it can read xlsx and xlsm fromat. You are likely to use it with `pyexcel <https://github.com/pyexcel/pyexcel>`_.

:fire: New flag: `detect_merged_cells` allows you to spread the same value among all merged cells. But be aware that this may slow down its reading performance.

New flag: `skip_hidden_row_and_column` allows you to skip hidden rows and columns and is defaulted to **True**. It may slow down its reading performance. And it is only valid for 'xls' files. For 'xlsx' files, please use pyexcel-xlsx.


Expand Down
4 changes: 2 additions & 2 deletions pyexcel_xls.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
overrides: "pyexcel.yaml"
name: "pyexcel-xls"
nick_name: xls
version: 0.5.4
current_version: 0.5.4
version: 0.5.5
current_version: 0.5.5
release: 0.5.4
file_type: xls
dependencies:
Expand Down
37 changes: 35 additions & 2 deletions pyexcel_xls/xlsr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from pyexcel_io.book import BookReader
from pyexcel_io.sheet import SheetReader
from pyexcel_io._compact import OrderedDict
from pyexcel_io._compact import OrderedDict, irange
from pyexcel_io.service import has_no_digits_in_float


Expand All @@ -23,6 +23,21 @@
]


class MergedCell(object):
def __init__(self, row_low, row_high, column_low, column_high):
self.__rl = row_low
self.__rh = row_high
self.__cl = column_low
self.__ch = column_high
self.value = None

def register_cells(self, registry):
for rowx in irange(self.__rl, self.__rh):
for colx in irange(self.__cl, self.__ch):
key = "%s-%s" % (rowx, colx)
registry[key] = self


class XLSheet(SheetReader):
"""
xls, xlsx, xlsm sheet reader
Expand All @@ -34,6 +49,11 @@ def __init__(self, sheet, auto_detect_int=True, **keywords):
self.__auto_detect_int = auto_detect_int
self.__hidden_cols = []
self.__hidden_rows = []
self.__merged_cells = {}
if keywords.get('detect_merged_cells') is True:
for merged_cell_ranges in sheet.merged_cells:
merged_cells = MergedCell(*merged_cell_ranges)
merged_cells.register_cells(self.__merged_cells)
if keywords.get('skip_hidden_row_and_column') is True:
for col_index, info in self._native_sheet.colinfo_map.items():
if info.hidden == 1:
Expand Down Expand Up @@ -62,14 +82,23 @@ def cell_value(self, row, column):
"""
Random access to the xls cells
"""
row, column = self._offset_hidden_indices(row, column)
if self._keywords.get('skip_hidden_row_and_column') is True:
row, column = self._offset_hidden_indices(row, column)
cell_type = self._native_sheet.cell_type(row, column)
value = self._native_sheet.cell_value(row, column)

if cell_type == xlrd.XL_CELL_DATE:
value = xldate_to_python_date(value)
elif cell_type == xlrd.XL_CELL_NUMBER and self.__auto_detect_int:
if has_no_digits_in_float(value):
value = int(value)
if self.__merged_cells:
merged_cell = self.__merged_cells.get("%s-%s" % (row, column))
if merged_cell:
if merged_cell.value:
value = merged_cell.value
else:
merged_cell.value = value
return value

def _offset_hidden_indices(self, row, column):
Expand Down Expand Up @@ -97,6 +126,7 @@ def __init__(self):
self._file_content = None
self.__skip_hidden_sheets = True
self.__skip_hidden_row_column = True
self.__detect_merged_cells = False

def open(self, file_name, **keywords):
self.__parse_keywords(**keywords)
Expand All @@ -115,6 +145,7 @@ def __parse_keywords(self, **keywords):
self.__skip_hidden_sheets = keywords.get('skip_hidden_sheets', True)
self.__skip_hidden_row_column = keywords.get(
'skip_hidden_row_and_column', True)
self.__detect_merged_cells = keywords.get('detect_merged_cells', False)

def close(self):
if self._native_book:
Expand Down Expand Up @@ -163,6 +194,8 @@ def _get_book(self, on_demand=False):
raise IOError("No valid file name or file content found.")
if self.__skip_hidden_row_column and self._file_type == 'xls':
xlrd_params['formatting_info'] = True
if self.__detect_merged_cells:
xlrd_params['formatting_info'] = True
xls_book = xlrd.open_workbook(**xlrd_params)
return xls_book

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

NAME = 'pyexcel-xls'
AUTHOR = 'C.W.'
VERSION = '0.5.4'
VERSION = '0.5.5'
EMAIL = 'wangc_2011@hotmail.com'
LICENSE = 'New BSD'
DESCRIPTION = (
Expand Down
Binary file added tests/fixtures/merged-cell-sheet.xls
Binary file not shown.
11 changes: 11 additions & 0 deletions tests/test_merged_cells.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os
from pyexcel_xls import get_data
from nose.tools import eq_


def test_merged_cells():
data = get_data(os.path.join("tests", "fixtures", "merged-cell-sheet.xls"),
detect_merged_cells=True,
library="pyexcel-xls")
expected = [[1, 2, 3], [1, 5, 6], [1, 8, 9], [10, 11, 11]]
eq_(data['Sheet1'], expected)

0 comments on commit 4cea637

Please # to comment.