From b78369366ba8004e571cf40827d4b7cd1ac946aa Mon Sep 17 00:00:00 2001
From: Egor <Birdi7@users.noreply.github.com>
Date: Mon, 20 Jan 2025 18:45:14 +0500
Subject: [PATCH] feat: support column_width in xlsx format

Co-authored-by: Andrew Graham-Yooll <andrewgy8@gmail.com>
Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com>
---
 AUTHORS                     |  2 ++
 HISTORY.md                  |  6 ++++++
 docs/formats.rst            | 12 +++++++++++
 src/tablib/formats/_xlsx.py | 40 +++++++++++++++++++++++++++++++++--
 tests/test_tablib.py        | 42 +++++++++++++++++++++++++++++++++++++
 5 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index ed4eec6d..c8fc9803 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -4,11 +4,13 @@ by the Jazzband GitHub team.
 Here is a list of past and present much-appreciated contributors:
 
     Alex Gaynor
+    Andrew Graham-Yooll
     Andrii Soldatenko
     Benjamin Wohlwend
     Bruno Soares
     Claude Paroz
     Daniel Santos
+    Egor Osokin
     Erik Youngren
     Hugo van Kemenade
     Iuri de Silvio
diff --git a/HISTORY.md b/HISTORY.md
index 077b3170..cf3b7020 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,11 @@
 # History
 
+## 3.8.0 (Unreleased)
+
+### Improvements
+
+- Add support for exporting XLSX with column width (#516)
+
 ## 3.7.0 (2024-10-08)
 
 ### Improvements
diff --git a/docs/formats.rst b/docs/formats.rst
index 178edbda..ce546dda 100644
--- a/docs/formats.rst
+++ b/docs/formats.rst
@@ -250,6 +250,18 @@ The ``import_set()`` method also supports a ``skip_lines`` parameter that you
 can set to a number of lines that should be skipped before starting to read
 data.
 
+The ``export_set()`` method supports a ``column_width`` parameter. Depending
+on the value passed, the column width will be set accordingly. It can be
+either ``None``, an integer, or default "adaptive". If "adaptive" is passed,
+the column width will be unique and will be calculated based on values' length.
+For example::
+
+    data = tablib.Dataset()
+    data.export('xlsx', column_width='adaptive')
+
+.. versionchanged:: 3.8.0
+    The ``column_width`` parameter for ``export_set()`` was added.
+
 .. versionchanged:: 3.1.0
 
     The ``skip_lines`` parameter for ``import_set()`` was added.
diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py
index 966ba5c8..1a75c20f 100644
--- a/src/tablib/formats/_xlsx.py
+++ b/src/tablib/formats/_xlsx.py
@@ -1,6 +1,5 @@
 """ Tablib - XLSX Support.
 """
-
 import re
 from io import BytesIO
 
@@ -35,7 +34,8 @@ def detect(cls, stream):
             return False
 
     @classmethod
-    def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False):
+    def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-",
+                   escape=False, column_width="adaptive"):
         """Returns XLSX representation of Dataset.
 
         If ``freeze_panes`` is True, Export will freeze panes only after first line.
@@ -48,6 +48,12 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=F
         If ``escape`` is True, formulae will have the leading '=' character removed.
         This is a security measure to prevent formulae from executing by default
         in exported XLSX files.
+
+        If ``column_width`` is set to "adaptive", the column width will be set to the maximum
+        width of the content in each column. If it is set to an integer, the column width will be
+        set to that integer value. If it is set to None, the column width will be set as the
+        default openpyxl.Worksheet width value.
+
         """
         wb = Workbook()
         ws = wb.worksheets[0]
@@ -59,6 +65,8 @@ def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=F
 
         cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes, escape=escape)
 
+        cls._adapt_column_width(ws, column_width)
+
         stream = BytesIO()
         wb.save(stream)
         return stream.getvalue()
@@ -166,3 +174,31 @@ def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False):
 
                 if escape and cell.data_type == 'f' and cell.value.startswith('='):
                     cell.value = cell.value.replace("=", "")
+
+    @classmethod
+    def _adapt_column_width(cls, worksheet, width):
+        if isinstance(width, str) and width != "adaptive":
+            msg = (
+                f"Invalid value for column_width: {width}. "
+                "Must be 'adaptive' or an integer."
+            )
+            raise ValueError(msg)
+
+        if width is None:
+            return
+
+        column_widths = []
+        if width == "adaptive":
+            for row in worksheet.values:
+                for i, cell in enumerate(row):
+                    cell_width = len(str(cell))
+                    if len(column_widths) > i:
+                        if cell_width > column_widths[i]:
+                            column_widths[i] = cell_width
+                    else:
+                        column_widths.append(cell_width)
+        else:
+            column_widths = [width] * worksheet.max_column
+
+        for i, column_width in enumerate(column_widths, 1):  # start at 1
+            worksheet.column_dimensions[get_column_letter(i)].width = column_width
diff --git a/tests/test_tablib.py b/tests/test_tablib.py
index 0df8e5d0..32119918 100755
--- a/tests/test_tablib.py
+++ b/tests/test_tablib.py
@@ -1339,6 +1339,25 @@ def get_format_str(cell):
 
 
 class XLSXTests(BaseTestCase):
+    def _helper_export_column_width(self, column_width):
+        """check that column width adapts to value length"""
+        def _get_width(data, input_arg):
+            xlsx_content = data.export('xlsx', column_width=input_arg)
+            wb = load_workbook(filename=BytesIO(xlsx_content))
+            ws = wb.active
+            return ws.column_dimensions['A'].width
+
+        xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx'
+        with xls_source.open('rb') as fh:
+            data = tablib.Dataset().load(fh)
+        width_before = _get_width(data, column_width)
+        data.append([
+            'verylongvalue-verylongvalue-verylongvalue-verylongvalue-'
+            'verylongvalue-verylongvalue-verylongvalue-verylongvalue',
+        ])
+        width_after = _get_width(data, width_before)
+        return width_before, width_after
+
     def test_xlsx_format_detect(self):
         """Test the XLSX format detection."""
         in_stream = self.founders.xlsx
@@ -1483,6 +1502,29 @@ def test_xlsx_raise_ValueError_on_cell_write_during_export(self):
         wb = load_workbook(filename=BytesIO(_xlsx))
         self.assertEqual('[1]', wb.active['A1'].value)
 
+    def test_xlsx_column_width_adaptive(self):
+        """ Test that column width adapts to value length"""
+        width_before, width_after = self._helper_export_column_width("adaptive")
+        self.assertEqual(width_before, 11)
+        self.assertEqual(width_after, 11)
+
+    def test_xlsx_column_width_integer(self):
+        """Test that column width changes to integer length"""
+        width_before, width_after = self._helper_export_column_width(10)
+        self.assertEqual(width_before, 10)
+        self.assertEqual(width_after, 10)
+
+    def test_xlsx_column_width_none(self):
+        """Test that column width does not change"""
+        width_before, width_after = self._helper_export_column_width(None)
+        self.assertEqual(width_before, 13)
+        self.assertEqual(width_after, 13)
+
+    def test_xlsx_column_width_value_error(self):
+        """Raise ValueError if column_width is not a valid input"""
+        with self.assertRaises(ValueError):
+            self._helper_export_column_width("invalid input")
+
 
 class JSONTests(BaseTestCase):
     def test_json_format_detect(self):