From aeec0fa66a97a882db1017ef3771bd0c5018646c Mon Sep 17 00:00:00 2001 From: fatz Date: Mon, 3 Jun 2019 17:27:39 +0200 Subject: [PATCH 1/5] using Series to initialize dataframe --- pandas_ods_reader/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index 1334eae..c4d600d 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -30,7 +30,7 @@ def load_ods(doc, sheet, headers=True, columns=None): columns = columns if columns else ( ["column_%s" % j for j in range(len(row))]) # columns as lists in a dictionary - df_dict = {column: [] for column in columns} + df_dict = {column: pd.Series([] for column in columns)} # create index for the column headers col_index = {j: column for j, column in enumerate(columns)} if headers: From d95de80f5841cae92842bce0953b3cc4d56db0b2 Mon Sep 17 00:00:00 2001 From: fatz Date: Mon, 3 Jun 2019 17:40:30 +0200 Subject: [PATCH 2/5] append Series --- pandas_ods_reader/parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index c4d600d..ec9d49f 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -21,7 +21,7 @@ def load_ods(doc, sheet, headers=True, columns=None): # row is a list of cells if headers and i == 0 and not columns: # columns as lists in a dictionary - df_dict = {cell.value: [] for cell in row if cell.value} + df_dict = {cell.value: pd.Series() for cell in row if cell.value} # create index for the column headers col_index = { j: cell.value for j, cell in enumerate(row) if cell.value} @@ -30,7 +30,7 @@ def load_ods(doc, sheet, headers=True, columns=None): columns = columns if columns else ( ["column_%s" % j for j in range(len(row))]) # columns as lists in a dictionary - df_dict = {column: pd.Series([] for column in columns)} + df_dict = {column: pd.Series() for column in columns} # create index for the column headers col_index = {j: column for j, column in enumerate(columns)} if headers: @@ -38,7 +38,7 @@ def load_ods(doc, sheet, headers=True, columns=None): for j, cell in enumerate(row): if j < len(col_index): # use header instead of column index - df_dict[col_index[j]].append(cell.value) + df_dict[col_index[j]].append(pd.Series([cell.value])) else: continue # and convert to a DataFrame From 89f5d5c30a57eb5dcfef7b85a8a12c028424d856 Mon Sep 17 00:00:00 2001 From: fatz Date: Mon, 3 Jun 2019 17:57:55 +0200 Subject: [PATCH 3/5] generate a Series dataframe from the dict list to allow for columns with different lengths --- pandas_ods_reader/parser.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index ec9d49f..f40702e 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -21,7 +21,7 @@ def load_ods(doc, sheet, headers=True, columns=None): # row is a list of cells if headers and i == 0 and not columns: # columns as lists in a dictionary - df_dict = {cell.value: pd.Series() for cell in row if cell.value} + df_dict = {cell.value: [] for cell in row if cell.value} # create index for the column headers col_index = { j: cell.value for j, cell in enumerate(row) if cell.value} @@ -30,7 +30,7 @@ def load_ods(doc, sheet, headers=True, columns=None): columns = columns if columns else ( ["column_%s" % j for j in range(len(row))]) # columns as lists in a dictionary - df_dict = {column: pd.Series() for column in columns} + df_dict = {column: [] for column in columns} # create index for the column headers col_index = {j: column for j, column in enumerate(columns)} if headers: @@ -38,11 +38,16 @@ def load_ods(doc, sheet, headers=True, columns=None): for j, cell in enumerate(row): if j < len(col_index): # use header instead of column index - df_dict[col_index[j]].append(pd.Series([cell.value])) + print(cell.value) + df_dict[col_index[j]].append(cell.value) else: continue + # convert lists to pd.Series + df_series = {} + for col in df_dict.keys(): + df_series[col] = pd.Series(df_dict[col]) # and convert to a DataFrame - df = pd.DataFrame(df_dict) + df = pd.DataFrame(df_series) return df From 580c6951daf4628167dfe64b8a1158e5462c7061 Mon Sep 17 00:00:00 2001 From: fatz Date: Mon, 3 Jun 2019 17:58:34 +0200 Subject: [PATCH 4/5] removed logging --- pandas_ods_reader/parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index f40702e..c533eb5 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -38,7 +38,6 @@ def load_ods(doc, sheet, headers=True, columns=None): for j, cell in enumerate(row): if j < len(col_index): # use header instead of column index - print(cell.value) df_dict[col_index[j]].append(cell.value) else: continue From 9240a1b3aee0a6ade55b09200b4cb2f97d8aa8ab Mon Sep 17 00:00:00 2001 From: fatzh Date: Thu, 6 Jun 2019 15:27:11 +0200 Subject: [PATCH 5/5] Replaced the column dict with an OrderedDict, support multiple columns with same names by appending a numbered suffix automatically. Column ordered is preserved, fixes test_header_file_with_cols that was failing. Addded test case for duplicated column names. - fixed test_header_file_with_cols, the order was wrong --- pandas_ods_reader/parser.py | 29 ++++++++++++------ .../rsc/example_duplicated_column_names.ods | Bin 0 -> 9949 bytes pandas_ods_reader/tests/test_read_ods.py | 8 +++++ 3 files changed, 27 insertions(+), 10 deletions(-) create mode 100644 pandas_ods_reader/tests/rsc/example_duplicated_column_names.ods diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index c533eb5..83a3dee 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -1,6 +1,7 @@ """Imports an ods file into a DataFrame object""" import ezodf import pandas as pd +from collections import OrderedDict from .tools import sanitize_df @@ -15,22 +16,35 @@ def load_ods(doc, sheet, headers=True, columns=None): raise ValueError("There is no sheet named {}".format(sheet)) sheet_idx = sheets.index(sheet) sheet = doc.sheets[sheet_idx] - df_dict = {} + df_dict = OrderedDict() col_index = {} for i, row in enumerate(sheet.rows()): # row is a list of cells if headers and i == 0 and not columns: # columns as lists in a dictionary - df_dict = {cell.value: [] for cell in row if cell.value} + columns = [] + for cell in row: + if cell.value: + if cell.value not in columns: + columns.append(cell.value) + else: + # add count to column name + idx = 1 + while "{}.{}".format(cell.value, idx) in columns: + idx +=1 + columns.append("{}.{}".format(cell.value, idx)) + + df_dict = OrderedDict((column, []) for column in columns) # create index for the column headers col_index = { - j: cell.value for j, cell in enumerate(row) if cell.value} + j: column for j, column in enumerate(columns) + } continue elif i == 0: columns = columns if columns else ( ["column_%s" % j for j in range(len(row))]) # columns as lists in a dictionary - df_dict = {column: [] for column in columns} + df_dict = OrderedDict((column, []) for column in columns) # create index for the column headers col_index = {j: column for j, column in enumerate(columns)} if headers: @@ -41,12 +55,7 @@ def load_ods(doc, sheet, headers=True, columns=None): df_dict[col_index[j]].append(cell.value) else: continue - # convert lists to pd.Series - df_series = {} - for col in df_dict.keys(): - df_series[col] = pd.Series(df_dict[col]) - # and convert to a DataFrame - df = pd.DataFrame(df_series) + df = pd.DataFrame(df_dict) return df diff --git a/pandas_ods_reader/tests/rsc/example_duplicated_column_names.ods b/pandas_ods_reader/tests/rsc/example_duplicated_column_names.ods new file mode 100644 index 0000000000000000000000000000000000000000..01c48d80ddee644b8eaa9998245def914d320edf GIT binary patch literal 9949 zcmds-byQr-wy&E6cZUR*5ZocSd*c=~Xyee(Gz51D?hssqy9W&roS?xW5FkMC;0_ng zx%(x1-{ib^|9!K^8f$frp1)N!Ypq#TUzIZOF)S_sfCvE4f_fAKZFobO0RX_masLy* z*3uRPb+rc>+1uM#nixSX!FDXpcBaf=BS%X|X0ScT&J=9oWDBx`GCSHsKt`sH79bE* z`EQzCLw9Hf@BqNWL3ppJZsBBWY-eO?yp{t&OAg}Q5p^TDf0ba&}3yKRPWF2 z000aC@$voDczOue{png+UR{z9=MmYH$4u04tTb?3^l-e)@B*yx)YQ}r3=Eu{oB{#@ zqMQg4+=x;6x0ET3cH~AP{$VcONqhUkeN$bIbrM%wSv0a0jedC#*zQ>{L(eOdp)w0Nlbic%`AZ z^|APk3HWWv1RbdaooR%h-VsieJYA?FTCO8nYb4%oCEjZ%IqD+49wfURBKP#~swjx7_U^uNY!9$IwLj8I`RDT+#hWP2Y zDUVqBmv>)oZw<`&Svl$w2~C0ZS?kl@m|2JUSDRi}eT()5gPAtq@39@LI2Xd!yFyIm z7^WI^-;5lbn!yV?fMK^6s;!5WiYR>_fCt;2qD!kO!<3ccDW;Y`&U&#`^*tQiznqTi zet^*tKQl~ci?+^8BWj%EFZ3)X5$fmacb8WFAT&d!S1WN*P9ZlHeZp>j1%+>*VWl4+iIMgZ&XOMt`fGZS$AG<|{|i%~m9r($YAwoM}5vz6gE? zt!FPAa#r8(hZGM+deAMsZi?V8dS51&`MAlr*TDcLn6Y7P`(xziifEP>0^KMgyTcc& z8rXO~I{Q+k>CV%Sd4{$UsKdl#^mr%p&zVKsGj|%u-np&TKvf7*C)@hGSuomS7Fut$ z^|$t3D1zTfmATKx>KNx%;eN#$At`M@KENDtr`$8-At$A{Ig^ZoF*xY^8iF8Q1#g>_YfCv5Dx?ph0CTiDZ*{3hV+g;K7`bngm zNj2M`M`F~ixRa2A5NTEkUD<>3X(s>Wfj$L4R02-kgqUu9DCXirUd>5>;eBF3rf` z`)Wsr<0nve)cYqcSe45J`huN=Ij44k&yv6C(7cl??rkZ*&IPm?lLQKciDh~-^U)Z< zAbK#>2RfA?bNUynD?}=I=Ii#(O2Y&&HuTEb1vwY1mJ!0F6t5{2NDJ|^O_y284=Lx0 z<$n_`+~_6ad~#at^V}0t%g8Rz7Z3ODS^Zjt1#lq$ph;t>lr3E-%764}O^jfw*9^6n zN`Ad90)KsY+b8r(`B~=2&JI+$6=pX0Z==K#-w}Zc(FmqBG#q@q^-9wiNeD6}L(~me z(8if`Lte~#NUgf)a9R+LY5+jE%0rwU&n6k`-t&(*<`G9%OU#>IaRHSWfoo{v(xFC) zkIl?VduV`S!qrKFWIV-hN&xaRrgtT))Lef3^67}{0d%#vS#zhdhSe_Ul{pu(#*$w4 z?Vo18A)0bfu=BRdNsmgUpn{y(dS3dDpA$>)=>gpfo z-=+*&CQ$fFCQuSdXrySY1HRam?3I_pcXl!`_-(G{)yIz);Ui~kq`MS64`)Hnlgm>@ z+_QU|5By##q6w?+uuk1*44BtO z%r&B3?TLC)qA%qN3Z!#i16UGcQr@=VjbH?h%=h)1Sal$mda8Vt%v7nAt_rKbP$6uw zwpIdCG*bF#9&IXLM&}EC&&Z&9hkBTiQ_(;O1Zea5c%!{V>A8|OEcz}S!hWh{s>{RXSeajDZ$|_r1u;0Zc z2-w!?o&NAn(4wZ%oyJ;$f?`^}YC434 z#nws5yqBN;^3t+4RWz*R2E_(OB=b{a74}YgmKc~7ChxW zcanb8MnY+ZoCbFpcAVbZyU&)42jwQ+&*dgG5@0WhA5j`yV?mm|+0x=*l|w+vYqb7M zam44tI0*})qzFb`Uvkqw!U!YE=^J-0sI-PJ-SJqP zgf8baMuw-w%)6I$g0=2*+-kEd6y{yuC^Albkis5(%><%a-ha@;Q>W2@iwkyd}zcNTLI{61m(*ioO_XQ|L*{N=0oN(RnRoP{_0qDu(In*G(q3I>r_c9PUG>2%u7A=L zTeuOmrH_l@O0WRIb{HcbSX;9x&`qw7$)RvNqFEkyG)@PNK;$-j0dZ;o>sVdWLJd~an8=!cs z1`Rz8P7z^0YL$7bR&;b-8cCy7LSt)}G3x$311IE~>?dE!ThgzllcCNIM~k2 z(%cF1uc?6}JBvL8Yz_fAIvN{6SpMw!Fb(*pClm~}`OjWYf8%8jws*3>pMN<0hr5ye z#?R8$$QV;IYOBsre;HNLw5eGm;w6&fuP|T5f(T& z2<ojyAPJA z;tkW-_msAp`CaceZkb2!o~%^VI!oAy4Ck6(MsL&}l4(d+JiIxzXBOjdFaUu6{iOBJ zBMy}NC(;CL2Yr|~-w!y3w3op1+?e+R4x{(&i>5mDpSyHoDXoZa67WbTzdaZwGB7LX;C-z(*OBTv; zbGsYn$aMp~8sx$d-@;+}>V+RC!7HSMd!6RvOxY^t#xW$Z;=ovRj7^NQNw>60Cri&X zBIVxmvWjvtBSQ0qp{{O>IhUrkiEfh%|6&cy{G4z5g%&6xFl%m$f~@44F&PIE?3QLM z$U)C#rMycs!m-pvPeVK3~ zR!@?hQ3?7q&KN8{B!B)^5x;kOg?y~b=j$l5Iui@d^kyH+#DNSJ)P7k6$k0T@IYeUP zjgZ)D@sbA8@;6-Uh>ElCR#OJ9O>Zi zAe&vI(G2nrRgYR7$5(D0PwQ@ReQd-5(!$faWw<3eUoB!D9Pv0djk;nwGdZKTvaG3| ztewsq$P5yD+IDs;yH)oDb|2TLpoP97S82uU4F4K1^ITEHg9cm?Mry#K6uO^sHgv40 zj9Lf3+wiC>emB)gR|A+9OkJsBfxup%syJqJl*yK>E9y?n8T-2FTyA-Q1CHIbD`dyv zgh8?Ky3C&eU;2Fm7M+HW9+DDaPVUGE!-PEM^CcW)f0ASY0d&arqu%IwpybL~EoIq6p8=RwR?Z#ZX`rTE_;4$7VTL*4Cf|`@w)%Q#z zJ+?x+Ws$4(d_)c1J1d20q1kRY0{fW$c&Qo`<6-<_xg?x7lRL&RZ5T&G)DAAi7U9Vz z3=AB=_8u$qE!#2K2C z*_R750Q-OQok5-QaX+%De*tQ@DIq#7-cZtTSg3JuvV?hhI|>Xz9;)rl-PEb7M4=K}$huwi-f+=-w@T?3Y~*GuvN>Uei$#!(?wQW+dfe@sjaJ7>bx zGrXr)alN~!uct|`H@beuv$oneJBJcf2=IVF6*ZU!nzm#DLj)nsOhVMJq zKPU1BSm_9LwE_Km;;6R`UggHV^YRYNh6}}iUHwYYxVg%I?^VNVkr!IfhWAl$R4VK$ zvbeN!ikH)5WU=ys3iDNQ^Ii?5NIRt zd-e^4Q`A=NE*f-+gZkba0j{=T^Cv!Ss-YfXnJ6AL?&uL(#}2dgsR${FK04w(+2HZF z?B{%0v|&s;OBt|PjbfaT+9_Q;_{q|`<*TT-sc0z#0oKMTnV0FKItZhR`N)Jcm4e$d z&D19+2tSHV?oN-8L9V6|WZN)*Fhyj7tvzhRT_XX`t27t`S!1IVw>KD~UKlp6g_D%L zQy_Ad)aF{_oV4#$0LCIYMD;N6 zSVr#;Wmg=Lmx8zHI+Fwik$YOOsZ!thXFM%%T(P=#DBTMYTMD|Ld1F zvg!&56t4<=<@u4|JwB0(wxD$R*}jU)l9CUP!qomm5gMRlB+1HLQ^sDThT3Bmn@S53 z4XSDE)E~SSIJ0=m&~|HfAQU%3UuuWHN*h;X%{Z9M(PcA*>WeW1xO8Z1Nx-YHBJH;> z|JHdPLf%g4iP;4C?jJn)v9Cgos~<&cKo{ zytsTc-eOX?FA{V_Mg*-^F>y(t6rV7MgFGhW6KFasq!A&$$)Kzs@Xb{+aC5T8-l9E= zS4&FE`C?u#bB;J}al9s8VT?RfvuS$LZ8^qF;E}fngWAEzoamePO-x$FSe~q>N+s6Z z$0yjw24u!KLY^I5Mgk(pHB+@WgQqPZUmKL^B{6u`k4~HM%2q1dLa8#)g_Of%irl!B z6%S2_fi|A?#>*AVDVbCn>0gV~mPNM~PwibckRfI`d$8QblvqJ+d_MEU(PfknLC&eiwJ6>OCie z%F3Z+L~Jn)c}WfpTC^S~SfnLr>F$TVgtwkLDU5bcsJ{z%O28ReWUpb}Vfs6RP)=-D zlC9lhVD``}k2EkJ&jODBM}97z1q%CxL4-GF#A{#Wr3AxzTX6)1E~W&88tr*qhC0}I z*?96FcThy&i$vHltrdSqQXx*gUPS&e8)+;3*}{7)rNqsB!u}R1idz$ ziGAbGNs#bP7i>a^sQ~~QA*(eZh_!Z+4PO;~az|4+@KTA)C=|>K=GUT=GZ*y=X3zs({d<;vMD<*w=Yk47$uDV?A~x5IE{&U_t@?V$KjinVOM}844g`4#0g@ zNn{Fo@zJ9>-$q4O54kfCYn9WA&t-8pgcve;1*^rR&pDB*9@C3Wa{?1cqo~m0OC5gk zF&DF|vARNQnXCUK-ZNtAm443?PpR#9%mj>h@5=QzJcx8GQTB&y^bFNIF7}^sFGZ;Z9leNz_02C z3ix4RNI{MXaK0+C7uU*0Vj_bYxDKg0qXKaXGqIUa60n-i5O{S$|xB-m@}M8P(rCocvZ!u`gY zi-x=}Gb`y8kGo3NB(B00uY=hXG7)yXnvoHqJ7Tz9ecnM69>#8ct1jl8;TyyO2)yz! z{hdnR5rI#!*aWF{aeu^F8Y(zoNM@?`ik$~OY8oC%SpHy0O#G}_c{_>t%rj<{ugGG4 zdYa_qIIYM9V>+HzI+7VyPURdbl03wWKHzSDK)Y;j@o42Ly=IRW<^qF!vx=dghPGm$ zG83ue+aX@IN922gSmhQ)zi*XwCrWK2jGoC%A7_W2&alb&QN2Kgv*_&-^X&>WULHN3 z>6ogSq&4KXD{y#ZgHQ@Kz~u&Ga_^5bB&2IOs0AYc_Y~ye*%x7_%ba-UMKQQvLWMAeD7| zk@`Uk%Nw;A3!a0`iVT=P+_#yAbesLHGG`MoiyX-p$9=-WWdLx~XII&O$e++4tCi~j@6 z$zj+t{cRfjhdh-~ zrE_k5K6GPtoi8Nn*c&&yhm-;dqkrssU61O%BM;1vJgFb@rnkUFj%Q8Mp%`aBs_Pxy zhPCz<&~3#l{w^c(Dx`2rbFA;#JOXZ@jf7sz6=tu6QKlXWQ( zlE5iI_OkO{*JWsK+Dw_-sCusVdOa?Io~GI43}HSUxwI}-ZsQwp1JmGz@T@yTwvTyPPg_4v09E~{?qF%Gml+`mQ;q3 z#uf2T-Cz*(CE?Vrd*ckFpji>>%IAr}S$R;jt$8S@q}fgD%hJ2r>wBVsfVeKoERll* z0EEc?r;**iR=+_|qlfuv&D#)=zwc7xZqiEnenI%C{dzLldw526IUU_S+k4bD)p zCLb@o-H5NPDeUSw^Lpc0&hqVSzo5t6zrLhwmf|VqH(wRa>Gk*GBUdb5L3?3Jjb+6; zzN4Ba&sm3sed89%xzdw#`6n8rv}PyMSr1{;?d4`Tk~ApmYm>s)2i(I9D9M_cK#d z_Z+$AMU_nrmBGMscZ!Q`zPr=>xQ6aJc%F)ap;EQDN0{4Jz*-Eq3ExT9r5q3d$)xs@ zIExPsMoNKAGo#yx@8)gG?yJKlq9e)>jF6Ov1HpvIh~>sV<9$^%iwE5e>b`0M7re$> zI*a#2Ba1z@bhvnPPOJVw{jok)&w;`9c+w^nA>p@rrnX5CO4R4$Bsk7szaK~b>QPE8 zZ{CKJXzkbre{8H17U4}}V`7jR2*oTk?RlrTf{TqFV6B;GSxmU0ng9$EcCNWOq1szu z72mL0Q*e4d2=4)MUk&QcYAnpMEU&Ytn}d9+N<`|Ej1=C%T)H#}U3zHkYXiZg_xC`? z_CEp{y!&yyf~2}AldPf?%l`pe_f21>CTjPQ9J6Hu4}W@KbX`50hgV_?WgJ1dhDXLl zv{IsfKdg+$K|?Y$Z$ZmBDLEZ4^-JUHNhITHQlit2C2hDFH`^-fnX~1RThr!UtIrIz z@YwyE$_cNN>_8tO5}O*;vN>B6vQ~K*K64F?2-Ft@EYf+pTpX2VFReC~1rB-?6(xGi zCqj`g>0vhjW9Uq4?_TuOlbfUFjyr37xnCF%)&F_`d0|-X=%fs_NpktVP5Q{9#HtC_ zA_=);)|r!9vG5`ZCNpYSEjSCx_XraQSV|n(cqar+_D ze-JzO*oxcRQ72B(A>p`>I^#V=RtCa6!Ug;6XRCl>#+$^*syX)(W1`4yA@S?2+i|Fr(!===|C{%5fVaQ@Ta z(f((Ueq!|hR~3QZsQkq0e^&W>fFQq7`4zMOS?BK!U;G=Lzhn15EB(Dh-+rU?D~A8K z&R-AluZ;P=jeap=^!r5j3zq-uA^ywrSE%v8pnsY!#@~qaKOKMVGas18Pout1oj(c4 zKmC5qz7IX>PrD_(FY15yumAM=HG4c{=AX7h{)arR41~LXfdK%l`w!?o=@{RyTmb<8 E1@{|*{{R30 literal 0 HcmV?d00001 diff --git a/pandas_ods_reader/tests/test_read_ods.py b/pandas_ods_reader/tests/test_read_ods.py index 4b7e843..321e423 100644 --- a/pandas_ods_reader/tests/test_read_ods.py +++ b/pandas_ods_reader/tests/test_read_ods.py @@ -10,6 +10,7 @@ rsc = os.path.join(root, "rsc") header_file = "example_headers.ods" no_header_file = "example_no_headers.ods" +duplicated_column_names_file = "example_duplicated_column_names.ods" class TestOdsReader(object): @@ -45,3 +46,10 @@ class TestOdsReader(object): df = read_ods(path, 1, headers=False, columns=columns) assert list(df.columns) == columns assert len(df) == 10 + + def test_duplicated_column_names(self): + path = os.path.join(rsc, duplicated_column_names_file) + df = read_ods(path, 1) + assert isinstance(df, pd.DataFrame) + assert len(df.columns) == 4 + assert "website.1" in df.columns