From b78369366ba8004e571cf40827d4b7cd1ac946aa Mon Sep 17 00:00:00 2001 From: Egor Date: Mon, 20 Jan 2025 18:45:14 +0500 Subject: [PATCH] feat: support column_width in xlsx format Co-authored-by: Andrew Graham-Yooll Co-authored-by: Hugo van Kemenade --- AUTHORS | 2 ++ HISTORY.md | 6 ++++++ docs/formats.rst | 12 +++++++++++ src/tablib/formats/_xlsx.py | 40 +++++++++++++++++++++++++++++++++-- tests/test_tablib.py | 42 +++++++++++++++++++++++++++++++++++++ 5 files changed, 100 insertions(+), 2 deletions(-) diff --git a/AUTHORS b/AUTHORS index ed4eec6d..c8fc9803 100644 --- a/AUTHORS +++ b/AUTHORS @@ -4,11 +4,13 @@ by the Jazzband GitHub team. Here is a list of past and present much-appreciated contributors: Alex Gaynor + Andrew Graham-Yooll Andrii Soldatenko Benjamin Wohlwend Bruno Soares Claude Paroz Daniel Santos + Egor Osokin Erik Youngren Hugo van Kemenade Iuri de Silvio diff --git a/HISTORY.md b/HISTORY.md index 077b3170..cf3b7020 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,11 @@ # History +## 3.8.0 (Unreleased) + +### Improvements + +- Add support for exporting XLSX with column width (#516) + ## 3.7.0 (2024-10-08) ### Improvements diff --git a/docs/formats.rst b/docs/formats.rst index 178edbda..ce546dda 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -250,6 +250,18 @@ The ``import_set()`` method also supports a ``skip_lines`` parameter that you can set to a number of lines that should be skipped before starting to read data. +The ``export_set()`` method supports a ``column_width`` parameter. Depending +on the value passed, the column width will be set accordingly. It can be +either ``None``, an integer, or default "adaptive". If "adaptive" is passed, +the column width will be unique and will be calculated based on values' length. +For example:: + + data = tablib.Dataset() + data.export('xlsx', column_width='adaptive') + +.. versionchanged:: 3.8.0 + The ``column_width`` parameter for ``export_set()`` was added. + .. versionchanged:: 3.1.0 The ``skip_lines`` parameter for ``import_set()`` was added. diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index 966ba5c8..1a75c20f 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -1,6 +1,5 @@ """ Tablib - XLSX Support. """ - import re from io import BytesIO @@ -35,7 +34,8 @@ def detect(cls, stream): return False @classmethod - def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False): + def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", + escape=False, column_width="adaptive"): """Returns XLSX representation of Dataset. If ``freeze_panes`` is True, Export will freeze panes only after first line. @@ -48,6 +48,12 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=F If ``escape`` is True, formulae will have the leading '=' character removed. This is a security measure to prevent formulae from executing by default in exported XLSX files. + + If ``column_width`` is set to "adaptive", the column width will be set to the maximum + width of the content in each column. If it is set to an integer, the column width will be + set to that integer value. If it is set to None, the column width will be set as the + default openpyxl.Worksheet width value. + """ wb = Workbook() ws = wb.worksheets[0] @@ -59,6 +65,8 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=F cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes, escape=escape) + cls._adapt_column_width(ws, column_width) + stream = BytesIO() wb.save(stream) return stream.getvalue() @@ -166,3 +174,31 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False): if escape and cell.data_type == 'f' and cell.value.startswith('='): cell.value = cell.value.replace("=", "") + + @classmethod + def _adapt_column_width(cls, worksheet, width): + if isinstance(width, str) and width != "adaptive": + msg = ( + f"Invalid value for column_width: {width}. " + "Must be 'adaptive' or an integer." + ) + raise ValueError(msg) + + if width is None: + return + + column_widths = [] + if width == "adaptive": + for row in worksheet.values: + for i, cell in enumerate(row): + cell_width = len(str(cell)) + if len(column_widths) > i: + if cell_width > column_widths[i]: + column_widths[i] = cell_width + else: + column_widths.append(cell_width) + else: + column_widths = [width] * worksheet.max_column + + for i, column_width in enumerate(column_widths, 1): # start at 1 + worksheet.column_dimensions[get_column_letter(i)].width = column_width diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 0df8e5d0..32119918 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1339,6 +1339,25 @@ def get_format_str(cell): class XLSXTests(BaseTestCase): + def _helper_export_column_width(self, column_width): + """check that column width adapts to value length""" + def _get_width(data, input_arg): + xlsx_content = data.export('xlsx', column_width=input_arg) + wb = load_workbook(filename=BytesIO(xlsx_content)) + ws = wb.active + return ws.column_dimensions['A'].width + + xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' + with xls_source.open('rb') as fh: + data = tablib.Dataset().load(fh) + width_before = _get_width(data, column_width) + data.append([ + 'verylongvalue-verylongvalue-verylongvalue-verylongvalue-' + 'verylongvalue-verylongvalue-verylongvalue-verylongvalue', + ]) + width_after = _get_width(data, width_before) + return width_before, width_after + def test_xlsx_format_detect(self): """Test the XLSX format detection.""" in_stream = self.founders.xlsx @@ -1483,6 +1502,29 @@ def test_xlsx_raise_ValueError_on_cell_write_during_export(self): wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('[1]', wb.active['A1'].value) + def test_xlsx_column_width_adaptive(self): + """ Test that column width adapts to value length""" + width_before, width_after = self._helper_export_column_width("adaptive") + self.assertEqual(width_before, 11) + self.assertEqual(width_after, 11) + + def test_xlsx_column_width_integer(self): + """Test that column width changes to integer length""" + width_before, width_after = self._helper_export_column_width(10) + self.assertEqual(width_before, 10) + self.assertEqual(width_after, 10) + + def test_xlsx_column_width_none(self): + """Test that column width does not change""" + width_before, width_after = self._helper_export_column_width(None) + self.assertEqual(width_before, 13) + self.assertEqual(width_after, 13) + + def test_xlsx_column_width_value_error(self): + """Raise ValueError if column_width is not a valid input""" + with self.assertRaises(ValueError): + self._helper_export_column_width("invalid input") + class JSONTests(BaseTestCase): def test_json_format_detect(self):