Improve login in `parse_data`.

This commit is contained in:
ljnsn 2022-11-10 00:44:46 +01:00
parent 5b9ea785e2
commit 9bf4415a9f
1 changed files with 32 additions and 28 deletions

View File

@ -12,40 +12,44 @@ def parse_data(backend, rows, headers=True, columns=None, skiprows=0):
next(rows) next(rows)
for i, row in enumerate(rows): for i, row in enumerate(rows):
# row is a list of cells # row is a list of cells
if headers and i == 0 and not columns:
repeat_until = -1
repeat_value = None
# columns as lists in a dictionary
columns = []
# parse the first row as column names
for k, cell in enumerate(row):
value, n_repeated = backend.get_value(cell)
if n_repeated > 0:
repeat_value = value
repeat_until = n_repeated + k
if not value and k <= repeat_until:
value = repeat_value
if k == repeat_until:
# reset to allow for more than one repeated column
repeat_until = -1
if value and value not in columns:
columns.append(value)
else:
column_name = value if value else "unnamed"
# add count to column name
idx = 1
while f"{column_name}.{idx}" in columns:
idx += 1
columns.append(f"{column_name}.{idx}")
elif i == 0:
# without headers, assign generic numbered column names
columns = columns if columns else [f"column.{j}" for j in range(len(row))]
if i == 0: if i == 0:
if not columns:
if headers:
repeat_until = -1
repeat_value = None
# columns as lists in a dictionary
columns = []
# parse the first row as column names
for k, cell in enumerate(row):
value, n_repeated = backend.get_value(cell)
if n_repeated > 0:
repeat_value = value
repeat_until = n_repeated + k
if not value and k <= repeat_until:
value = repeat_value
if k == repeat_until:
# reset to allow for more than one repeated column
repeat_until = -1
if value and value not in columns:
columns.append(value)
else:
column_name = value if value else "unnamed"
# add count to column name
idx = 1
while f"{column_name}.{idx}" in columns:
idx += 1
columns.append(f"{column_name}.{idx}")
else:
# without headers, assign generic numbered column names
columns = [f"column.{j}" for j in range(len(row))]
df_dict = OrderedDict((column, []) for column in columns) df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers # create index for the column headers
col_index = {j: column for j, column in enumerate(columns)} col_index = {j: column for j, column in enumerate(columns)}
if headers: if headers:
continue continue
for j, cell in enumerate(row): for j, cell in enumerate(row):
if j < len(col_index): if j < len(col_index):
value, _ = backend.get_value(cell, parsed=True) value, _ = backend.get_value(cell, parsed=True)