generate a Series dataframe from the dict list to allow for columns with

different lengths
This commit is contained in:
fatz 2019-06-03 17:57:55 +02:00
parent d95de80f58
commit 89f5d5c30a
1 changed files with 9 additions and 4 deletions

View File

@ -21,7 +21,7 @@ def load_ods(doc, sheet, headers=True, columns=None):
# row is a list of cells # row is a list of cells
if headers and i == 0 and not columns: if headers and i == 0 and not columns:
# columns as lists in a dictionary # columns as lists in a dictionary
df_dict = {cell.value: pd.Series() for cell in row if cell.value} df_dict = {cell.value: [] for cell in row if cell.value}
# create index for the column headers # create index for the column headers
col_index = { col_index = {
j: cell.value for j, cell in enumerate(row) if cell.value} j: cell.value for j, cell in enumerate(row) if cell.value}
@ -30,7 +30,7 @@ def load_ods(doc, sheet, headers=True, columns=None):
columns = columns if columns else ( columns = columns if columns else (
["column_%s" % j for j in range(len(row))]) ["column_%s" % j for j in range(len(row))])
# columns as lists in a dictionary # columns as lists in a dictionary
df_dict = {column: pd.Series() for column in columns} df_dict = {column: [] for column in columns}
# create index for the column headers # create index for the column headers
col_index = {j: column for j, column in enumerate(columns)} col_index = {j: column for j, column in enumerate(columns)}
if headers: if headers:
@ -38,11 +38,16 @@ def load_ods(doc, sheet, headers=True, columns=None):
for j, cell in enumerate(row): for j, cell in enumerate(row):
if j < len(col_index): if j < len(col_index):
# use header instead of column index # use header instead of column index
df_dict[col_index[j]].append(pd.Series([cell.value])) print(cell.value)
df_dict[col_index[j]].append(cell.value)
else: else:
continue continue
# convert lists to pd.Series
df_series = {}
for col in df_dict.keys():
df_series[col] = pd.Series(df_dict[col])
# and convert to a DataFrame # and convert to a DataFrame
df = pd.DataFrame(df_dict) df = pd.DataFrame(df_series)
return df return df