implement overwriting headers

2019-01-31 23:37:53 +01:00 · 2019-01-31 23:37:53 +01:00 · f5db18cd59
parent 8949578994
commit f5db18cd59
4 changed files with 37 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -27,8 +27,20 @@ Usage
 from pandas_ods_reader import read_ods

 path = "path/to/file.ods"
+
+# load a sheet based on its index (1 based)
 sheet_idx = 1
-df1 = read_ods(path, sheet_idx)
+df = read_ods(path, sheet_idx)
+
+# load a sheet based on its name
 sheet_name = "sheet1"
-df2 = read_ods(path, sheet_name)
+df = read_ods(path, sheet_name)
+
+# load a file that does not contain a header row
+# if no columns are provided they will be numbered
+df = read_ods(path, 1, headers=False)
+
+# load a file and provide custom column names
+# if headers is True (the default), the header row will be overwritten
+df = read_ods(path, 1, columns=["A", "B", "C"])
 ```
--- a/pandas_ods_reader/parser.py
+++ b/pandas_ods_reader/parser.py
@ -19,21 +19,22 @@ def load_ods(doc, sheet, headers=True, columns=None):
    col_index = {}
    for i, row in enumerate(sheet.rows()):
        # row is a list of cells
-        if headers and i == 0:
+        if headers and i == 0 and not columns:
            # columns as lists in a dictionary
            df_dict = {cell.value: [] for cell in row if cell.value}
            # create index for the column headers
            col_index = {
                j: cell.value for j, cell in enumerate(row) if cell.value}
            continue
-        elif not headers and i == 0:
+        elif i == 0:
            columns = columns if columns else (
                ["column_%s" % j for j in range(len(row))])
            # columns as lists in a dictionary
            df_dict = {column: [] for column in columns}
            # create index for the column headers
            col_index = {j: column for j, column in enumerate(columns)}
-            continue
+            if headers:
+                continue
        for j, cell in enumerate(row):
            if j < len(col_index):
                # use header instead of column index
--- a/pandas_ods_reader/tests/test_read_ods.py
+++ b/pandas_ods_reader/tests/test_read_ods.py
@ -8,30 +8,40 @@ from pandas_ods_reader import read_ods
 root = os.path.dirname(os.path.abspath(__file__))
 rsc = os.path.join(root, "rsc")

+header_file = "example_headers.ods"
+no_header_file = "example_no_headers.ods"
+

 class TestOdsReader(object):
    def test_header_file_with_int(self):
-        example = "example_headers.ods"
-        path = os.path.join(rsc, example)
+        path = os.path.join(rsc, header_file)
        df = read_ods(path, 1)
        assert isinstance(df, pd.DataFrame)
+        assert len(df) == 10

    def test_header_file_with_str(self):
-        example = "example_headers.ods"
-        path = os.path.join(rsc, example)
+        path = os.path.join(rsc, header_file)
        df = read_ods(path, "Sheet1")
        assert isinstance(df, pd.DataFrame)
+        assert len(df) == 10
+
+    def test_header_file_with_cols(self):
+        path = os.path.join(rsc, header_file)
+        columns = ["One", "Two", "Three", "Four", "Five"]
+        df = read_ods(path, "Sheet1", columns=columns)
+        assert list(df.columns) == columns
+        assert len(df) == 10

    def test_no_header_file_no_cols(self):
-        example = "example_no_headers.ods"
-        path = os.path.join(rsc, example)
+        path = os.path.join(rsc, no_header_file)
        df = read_ods(path, 1, headers=False)
        assert list(df.columns) == [
            "column_%s" % i for i in range(len(df.columns))]
+        assert len(df) == 10

    def test_no_header_file_with_cols(self):
-        example = "example_headers.ods"
-        path = os.path.join(rsc, example)
+        path = os.path.join(rsc, no_header_file)
        columns = ["A", "B", "C", "D", "E"]
        df = read_ods(path, 1, headers=False, columns=columns)
        assert list(df.columns) == columns
+        assert len(df) == 10
--- a/setup.py
+++ b/setup.py
@ -1,7 +1,7 @@
 from setuptools import setup, find_packages


-VERSION = "0.0.5"
+VERSION = "0.0.6"

 with open("README.md", "r") as fh:
    long_description = fh.read()