generate a Series dataframe from the dict list to allow for columns with

different lengths
2019-06-03 17:57:55 +02:00 · 2019-06-03 17:57:55 +02:00 · 89f5d5c30a
parent d95de80f58
commit 89f5d5c30a
1 changed files with 9 additions and 4 deletions
--- a/pandas_ods_reader/parser.py
+++ b/pandas_ods_reader/parser.py
@ -21,7 +21,7 @@ def load_ods(doc, sheet, headers=True, columns=None):
        # row is a list of cells
        if headers and i == 0 and not columns:
            # columns as lists in a dictionary
-            df_dict = {cell.value: pd.Series() for cell in row if cell.value}
+            df_dict = {cell.value: [] for cell in row if cell.value}
            # create index for the column headers
            col_index = {
                j: cell.value for j, cell in enumerate(row) if cell.value}
@ -30,7 +30,7 @@ def load_ods(doc, sheet, headers=True, columns=None):
            columns = columns if columns else (
                ["column_%s" % j for j in range(len(row))])
            # columns as lists in a dictionary
-            df_dict = {column: pd.Series() for column in columns}
+            df_dict = {column: [] for column in columns}
            # create index for the column headers
            col_index = {j: column for j, column in enumerate(columns)}
            if headers:
@ -38,11 +38,16 @@ def load_ods(doc, sheet, headers=True, columns=None):
        for j, cell in enumerate(row):
            if j < len(col_index):
                # use header instead of column index
-                df_dict[col_index[j]].append(pd.Series([cell.value]))
+                print(cell.value)
                df_dict[col_index[j]].append(cell.value)
            else:
                continue
    # convert lists to pd.Series
    df_series = {}
    for col  in df_dict.keys():
        df_series[col] = pd.Series(df_dict[col])
    # and convert to a DataFrame
-    df = pd.DataFrame(df_dict)
+    df = pd.DataFrame(df_series)
    return df