diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index 771c0d6..b5132b5 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -24,15 +24,15 @@ def load_ods(doc, sheet_id, headers=True, columns=None): # columns as lists in a dictionary columns = [] for cell in row: - if cell.value: - if cell.value not in columns: - columns.append(cell.value) - else: - # add count to column name - idx = 1 - while "{}.{}".format(cell.value, idx) in columns: - idx += 1 - columns.append("{}.{}".format(cell.value, idx)) + if cell.value and cell.value not in columns: + columns.append(cell.value) + else: + column_name = cell.value if cell.value else "unnamed" + # add count to column name + idx = 1 + while "{}.{}".format(column_name, idx) in columns: + idx += 1 + columns.append("{}.{}".format(column_name, idx)) df_dict = OrderedDict((column, []) for column in columns) # create index for the column headers diff --git a/pandas_ods_reader/tests/rsc/example_missing_header.ods b/pandas_ods_reader/tests/rsc/example_missing_header.ods new file mode 100644 index 0000000..2b104d9 Binary files /dev/null and b/pandas_ods_reader/tests/rsc/example_missing_header.ods differ diff --git a/pandas_ods_reader/tests/test_read_ods.py b/pandas_ods_reader/tests/test_read_ods.py index 01bc1f8..a31841c 100644 --- a/pandas_ods_reader/tests/test_read_ods.py +++ b/pandas_ods_reader/tests/test_read_ods.py @@ -13,6 +13,7 @@ header_file = "example_headers.ods" no_header_file = "example_no_headers.ods" duplicated_column_names_file = "example_duplicated_column_names.ods" col_len_file = "example_col_lengths.ods" +missing_header_file = "example_missing_header.ods" class TestOdsReader(object): @@ -79,3 +80,11 @@ class TestOdsReader(object): with pytest.raises(ValueError) as e_info: read_ods(path, sheet_name) assert e_info.match(f"There is no sheet named {sheet_name}") + + def test_missing_header(self): + path = os.path.join(rsc, missing_header_file) + df = read_ods(path, 1) + assert isinstance(df, pd.DataFrame) + assert len(df) == 10 + assert (len(df.columns) == 5) + assert df.columns[2] == "unnamed.1"