handle missing header

This commit is contained in:
iuvbio 2019-06-08 15:46:38 +02:00
parent 1dc86970ae
commit 78edd09076
3 changed files with 18 additions and 9 deletions

View File

@ -24,15 +24,15 @@ def load_ods(doc, sheet_id, headers=True, columns=None):
# columns as lists in a dictionary
columns = []
for cell in row:
if cell.value:
if cell.value not in columns:
columns.append(cell.value)
else:
# add count to column name
idx = 1
while "{}.{}".format(cell.value, idx) in columns:
idx += 1
columns.append("{}.{}".format(cell.value, idx))
if cell.value and cell.value not in columns:
columns.append(cell.value)
else:
column_name = cell.value if cell.value else "unnamed"
# add count to column name
idx = 1
while "{}.{}".format(column_name, idx) in columns:
idx += 1
columns.append("{}.{}".format(column_name, idx))
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers

Binary file not shown.

View File

@ -13,6 +13,7 @@ header_file = "example_headers.ods"
no_header_file = "example_no_headers.ods"
duplicated_column_names_file = "example_duplicated_column_names.ods"
col_len_file = "example_col_lengths.ods"
missing_header_file = "example_missing_header.ods"
class TestOdsReader(object):
@ -79,3 +80,11 @@ class TestOdsReader(object):
with pytest.raises(ValueError) as e_info:
read_ods(path, sheet_name)
assert e_info.match(f"There is no sheet named {sheet_name}")
def test_missing_header(self):
path = os.path.join(rsc, missing_header_file)
df = read_ods(path, 1)
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert (len(df.columns) == 5)
assert df.columns[2] == "unnamed.1"