Python Pandas Dataframe
Python Pandas Dataframe
Pandas Dataframe
import pandas as pd
# Dictionary를 이용한 DataFrame 생성
df = pd.DataFrame({'KOSPI': [1915, 1961, 2026, 2467, 2041],
'KOSDAQ': [542, 682, 631, 798, 675]})
print(df)
# KOSPI KOSDAQ
# 0 1915 542
# 1 1961 682
# 2 2026 631
# 3 2467 798
# 4 2041 675
df = pd.DataFrame({'KOSPI': [1915, 1961, 2026, 2467, 2041],
'KOSDAQ': [542, 682, 631, 798, 675]},
index=[2014, 2015, 2016, 2017, 2018])
print(df)
# KOSPI KOSDAQ
# 2014 1915 542
# 2015 1961 682
# 2016 2026 631
# 2017 2467 798
# 2018 2041 675
print(df.describe())
# KOSPI KOSDAQ
# count 5.000000 5.000000
# mean 2082.000000 665.600000
# std 221.117616 92.683871
# min 1915.000000 542.000000
# 25% 1961.000000 631.000000
# 50% 2026.000000 675.000000
# 75% 2041.000000 682.000000
# max 2467.000000 798.000000
print(df.info())
# <class 'pandas.core.frame.DataFrame'>
# Int64Index: 5 entries, 2014 to 2018
# Data columns (total 2 columns):
# # Column Non-Null Count Dtype
# --- ------ -------------- -----
# 0 KOSPI 5 non-null int64
# 1 KOSDAQ 5 non-null int64
# dtypes: int64(2)
# memory usage: 120.0 bytes
# None
# Series를 이용한 DataFrame 생성
kospi = pd.Series([1915, 1961, 2026, 2467, 2041],
index=[2014, 2015, 2016, 2017, 2018],
name='KOSPI')
print(kospi)
# 2014 1915
# 2015 1961
# 2016 2026
# 2017 2467
# 2018 2041
# Name: KOSPI, dtype: int64
kosdaq = pd.Series([542, 682, 631, 798, 675],
index=[2014, 2015, 2016, 2017, 2018],
name='KOSDAQ')
print(kosdaq)
# 2014 542
# 2015 682
# 2016 631
# 2017 798
# 2018 675
# Name: KOSDAQ, dtype: int64
df = pd.DataFrame({kospi.name: kospi, kosdaq.name: kosdaq})
print(df)
# KOSPI KOSDAQ
# 2014 1915 542
# 2015 1961 682
# 2016 2026 631
# 2017 2467 798
# 2018 2041 675
# List를 이용한 DataFrame 생성
colums = ['KOSPI', 'KOSDAQ']
index = [2014, 2015, 2016, 2017, 2018]
rows = []
rows.append([1915, 542])
rows.append([1961, 682])
rows.append([2026, 631])
rows.append([2467, 798])
rows.append([2041, 675])
print(df)
# KOSPI KOSDAQ
# 2014 1915 542
# 2015 1961 682
# 2016 2026 631
# 2017 2467 798
# 2018 2041 675
# DataFrame Loop
for i in df.index:
print(i, df['KOSPI'][i], df['KOSDAQ'][i])
# 2014 1915 542
# 2015 1961 682
# 2016 2026 631
# 2017 2467 798
# 2018 2041 675
for row in df.itertuples(name='KRX'):
print(row)
# KRX(Index=2014, KOSPI=1915, KOSDAQ=542)
# KRX(Index=2015, KOSPI=1961, KOSDAQ=682)
# KRX(Index=2016, KOSPI=2026, KOSDAQ=631)
# KRX(Index=2017, KOSPI=2467, KOSDAQ=798)
# KRX(Index=2018, KOSPI=2041, KOSDAQ=675)
# itertuples()메소드가 iterrows()보다 더 빠르다.
for row in df.itertuples():
print(row[0], row[1], row[2])
# 2014 1915 542
# 2015 1961 682
# 2016 2026 631
# 2017 2467 798
# 2018 2041 675
for idx, row in df.iterrows():
print(idx, row[0], row[1])
# 2014 1915 542
# 2015 1961 682
# 2016 2026 631
# 2017 2467 798
# 2018 2041 675
댓글남기기