diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index 83a3dee..f04063f 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -6,16 +6,16 @@ from collections import OrderedDict from .tools import sanitize_df -def load_ods(doc, sheet, headers=True, columns=None): +def load_ods(doc, sheet_id, headers=True, columns=None): # convert the sheet to a pandas.DataFrame - if isinstance(sheet, int): - sheet = doc.sheets[sheet - 1] - elif isinstance(sheet, str): + if not isinstance(sheet_id, (int, str)): + raise ValueError("Sheet id has to be either `str` or `int`") + if isinstance(sheet_id, str): sheets = [sheet.name for sheet in doc.sheets] - if sheet not in sheets: - raise ValueError("There is no sheet named {}".format(sheet)) - sheet_idx = sheets.index(sheet) - sheet = doc.sheets[sheet_idx] + if sheet_id not in sheets: + raise ValueError("There is no sheet named {}".format(sheet_id)) + sheet_id = sheets.index(sheet_id) + 1 + sheet = doc.sheets[sheet_id - 1] df_dict = OrderedDict() col_index = {} for i, row in enumerate(sheet.rows()): @@ -42,7 +42,7 @@ def load_ods(doc, sheet, headers=True, columns=None): continue elif i == 0: columns = columns if columns else ( - ["column_%s" % j for j in range(len(row))]) + [f"column_{j}" for j in range(len(row))]) # columns as lists in a dictionary df_dict = OrderedDict((column, []) for column in columns) # create index for the column headers @@ -62,8 +62,8 @@ def load_ods(doc, sheet, headers=True, columns=None): def read_ods(file_or_path, sheet, headers=True, columns=None): """ This function reads in the provided ods file and converts it to a - dictionary. The dictionary is converted to a DataFrame. Empty rows and - columns are dropped from the DataFrame, before it is returned. + dictionary. The dictionary is converted to a DataFrame. Trailing empty rows + and columns are dropped from the DataFrame, before it is returned. :param file_or_path: str the path to the ODS file diff --git a/pandas_ods_reader/tests/rsc/example_col_lengths.ods b/pandas_ods_reader/tests/rsc/example_col_lengths.ods new file mode 100644 index 0000000..9dd3fa7 Binary files /dev/null and b/pandas_ods_reader/tests/rsc/example_col_lengths.ods differ diff --git a/pandas_ods_reader/tests/test_read_ods.py b/pandas_ods_reader/tests/test_read_ods.py index 321e423..2d79b81 100644 --- a/pandas_ods_reader/tests/test_read_ods.py +++ b/pandas_ods_reader/tests/test_read_ods.py @@ -11,6 +11,7 @@ rsc = os.path.join(root, "rsc") header_file = "example_headers.ods" no_header_file = "example_no_headers.ods" duplicated_column_names_file = "example_duplicated_column_names.ods" +col_len_file = "example_col_lengths.ods" class TestOdsReader(object): @@ -19,12 +20,14 @@ class TestOdsReader(object): df = read_ods(path, 1) assert isinstance(df, pd.DataFrame) assert len(df) == 10 + assert (len(df.columns) == 5) def test_header_file_with_str(self): path = os.path.join(rsc, header_file) df = read_ods(path, "Sheet1") assert isinstance(df, pd.DataFrame) assert len(df) == 10 + assert (len(df.columns) == 5) def test_header_file_with_cols(self): path = os.path.join(rsc, header_file) @@ -32,13 +35,15 @@ class TestOdsReader(object): df = read_ods(path, "Sheet1", columns=columns) assert list(df.columns) == columns assert len(df) == 10 + assert (len(df.columns) == 5) def test_no_header_file_no_cols(self): path = os.path.join(rsc, no_header_file) df = read_ods(path, 1, headers=False) assert list(df.columns) == [ - "column_%s" % i for i in range(len(df.columns))] + f"column_{i}" for i in range(len(df.columns))] assert len(df) == 10 + assert (len(df.columns) == 5) def test_no_header_file_with_cols(self): path = os.path.join(rsc, no_header_file) @@ -53,3 +58,10 @@ class TestOdsReader(object): assert isinstance(df, pd.DataFrame) assert len(df.columns) == 4 assert "website.1" in df.columns + + def test_header_file_col_len(self): + path = os.path.join(rsc, col_len_file) + df = read_ods(path, 1) + assert isinstance(df, pd.DataFrame) + assert len(df) == 10 + assert (len(df.columns) == 5)