tmp = [1,2,3,4] print("create with default index and column") print(pd.DataFrame(tmp)) print("create with own index and column") print(pd.DataFrame(tmp, columns=["name"], index=["num1","num2","num3","num4"])) #二维列表创建 tmp_dim2 = [[1,2,3],[2,3,4]] print("create use dimension 2 list") # print(pd.DataFrame(tmp_dim2)
1 2 3 4 5 6 7 8 9 10 11 12 13
create with default index and column 0 0 1 1 2 2 3 3 4 create with own index and column name num1 1 num2 2 num3 3 num4 4 create use dimension 2 list
通过numpy数组创建
1 2 3 4 5 6 7
import numpy as np tmp = np.array([[1,2,3],[1,2,3]]) print(pd.DataFrame(tmp)) np.random.randint(12) tmp = np.reshape(np.random.random(12),(3,4)) print("numpy array use random") print(pd.DataFrame(tmp))
print("字符串信息统计\n%s"%train_data["姓名"].describe()) print("数值信息统计\n%s"%train_data["年龄"].describe()) print("count is "%train_data["年龄"].describe().count()) print("max num is "%train_data["年龄"].describe().max())
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
字符串信息统计 count 8 unique 8 top 张二 freq 1 Name: 姓名, dtype: object 数值信息统计 count 8.00000 mean 24.50000 std 2.44949 min 21.00000 25% 22.75000 50% 24.50000 75% 26.25000 max 28.00000 Name: 年龄, dtype: float64 count is max num is