From f5db18cd59d5238fb48154b64c6f7207094a4360 Mon Sep 17 00:00:00 2001 From: iuvbio Date: Thu, 31 Jan 2019 23:37:53 +0100 Subject: [PATCH] implement overwriting headers --- README.md | 16 +++++++++++++-- pandas_ods_reader/parser.py | 7 ++++--- pandas_ods_reader/tests/test_read_ods.py | 26 ++++++++++++++++-------- setup.py | 2 +- 4 files changed, 37 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 59ebada..7c2684b 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,20 @@ Usage from pandas_ods_reader import read_ods path = "path/to/file.ods" + +# load a sheet based on its index (1 based) sheet_idx = 1 -df1 = read_ods(path, sheet_idx) +df = read_ods(path, sheet_idx) + +# load a sheet based on its name sheet_name = "sheet1" -df2 = read_ods(path, sheet_name) +df = read_ods(path, sheet_name) + +# load a file that does not contain a header row +# if no columns are provided they will be numbered +df = read_ods(path, 1, headers=False) + +# load a file and provide custom column names +# if headers is True (the default), the header row will be overwritten +df = read_ods(path, 1, columns=["A", "B", "C"]) ``` diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index d9d11b0..1334eae 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -19,21 +19,22 @@ def load_ods(doc, sheet, headers=True, columns=None): col_index = {} for i, row in enumerate(sheet.rows()): # row is a list of cells - if headers and i == 0: + if headers and i == 0 and not columns: # columns as lists in a dictionary df_dict = {cell.value: [] for cell in row if cell.value} # create index for the column headers col_index = { j: cell.value for j, cell in enumerate(row) if cell.value} continue - elif not headers and i == 0: + elif i == 0: columns = columns if columns else ( ["column_%s" % j for j in range(len(row))]) # columns as lists in a dictionary df_dict = {column: [] for column in columns} # create index for the column headers col_index = {j: column for j, column in enumerate(columns)} - continue + if headers: + continue for j, cell in enumerate(row): if j < len(col_index): # use header instead of column index diff --git a/pandas_ods_reader/tests/test_read_ods.py b/pandas_ods_reader/tests/test_read_ods.py index 56b0235..4b7e843 100644 --- a/pandas_ods_reader/tests/test_read_ods.py +++ b/pandas_ods_reader/tests/test_read_ods.py @@ -8,30 +8,40 @@ from pandas_ods_reader import read_ods root = os.path.dirname(os.path.abspath(__file__)) rsc = os.path.join(root, "rsc") +header_file = "example_headers.ods" +no_header_file = "example_no_headers.ods" + class TestOdsReader(object): def test_header_file_with_int(self): - example = "example_headers.ods" - path = os.path.join(rsc, example) + path = os.path.join(rsc, header_file) df = read_ods(path, 1) assert isinstance(df, pd.DataFrame) + assert len(df) == 10 def test_header_file_with_str(self): - example = "example_headers.ods" - path = os.path.join(rsc, example) + path = os.path.join(rsc, header_file) df = read_ods(path, "Sheet1") assert isinstance(df, pd.DataFrame) + assert len(df) == 10 + + def test_header_file_with_cols(self): + path = os.path.join(rsc, header_file) + columns = ["One", "Two", "Three", "Four", "Five"] + df = read_ods(path, "Sheet1", columns=columns) + assert list(df.columns) == columns + assert len(df) == 10 def test_no_header_file_no_cols(self): - example = "example_no_headers.ods" - path = os.path.join(rsc, example) + path = os.path.join(rsc, no_header_file) df = read_ods(path, 1, headers=False) assert list(df.columns) == [ "column_%s" % i for i in range(len(df.columns))] + assert len(df) == 10 def test_no_header_file_with_cols(self): - example = "example_headers.ods" - path = os.path.join(rsc, example) + path = os.path.join(rsc, no_header_file) columns = ["A", "B", "C", "D", "E"] df = read_ods(path, 1, headers=False, columns=columns) assert list(df.columns) == columns + assert len(df) == 10 diff --git a/setup.py b/setup.py index 7d4bc71..e0072a9 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages -VERSION = "0.0.5" +VERSION = "0.0.6" with open("README.md", "r") as fh: long_description = fh.read()