import pandas as pd
import requests
import io

from IPython.display import display
    
url = "https://raw.githubusercontent.com/wikibook/pyda100/master/1%EC%9E%A5/item_master.csv" 
download = requests.get(url).content
df_raw_item_master = pd.read_csv(io.StringIO(download.decode('utf-8')))
display (df_raw_item_master.head())


url = "https://raw.githubusercontent.com/wikibook/pyda100/master/1%EC%9E%A5/customer_master.csv" 
download = requests.get(url).content
df_raw_customer_master = pd.read_csv(io.StringIO(download.decode('utf-8')))
display (df_raw_customer_master.head())


url = "https://raw.githubusercontent.com/wikibook/pyda100/master/1%EC%9E%A5/transaction_1.csv" 
download = requests.get(url).content
df_raw_transaction_1 = pd.read_csv(io.StringIO(download.decode('utf-8')))
display (df_raw_transaction_1.head())

url = "https://raw.githubusercontent.com/wikibook/pyda100/master/1%EC%9E%A5/transaction_2.csv" 
download = requests.get(url).content
df_raw_transaction_2 = pd.read_csv(io.StringIO(download.decode('utf-8')))
display (df_raw_transaction_2.head())


df_raw_transaction_1['price'].unique()

array([210000,  50000, 120000, 170000, 180000,  85000, 150000, 100000,
       295000, 205000, 480000, 240000, 200000, 220000, 255000, 265000,
       390000, 360000, 420000, 440000, 380000, 570000, 280000, 320000,
       230000, 235000, 270000, 750000, 135000, 260000, 345000, 630000,
       330000, 185000, 310000, 350000, 290000, 300000, 355000, 325000,
       460000, 675000, 465000, 470000, 410000, 340000, 445000])


url = "https://raw.githubusercontent.com/wikibook/pyda100/master/1%EC%9E%A5/transaction_detail_1.csv" 
download = requests.get(url).content
df_raw_transaction_detail_1 = pd.read_csv(io.StringIO(download.decode('utf-8')))
display (df_raw_transaction_detail_1.head())

url = "https://raw.githubusercontent.com/wikibook/pyda100/master/1%EC%9E%A5/transaction_detail_2.csv" 
download = requests.get(url).content
df_raw_transaction_detail_2 = pd.read_csv(io.StringIO(download.decode('utf-8')))
display (df_raw_transaction_detail_2.head())


df_transaction = pd.concat([df_raw_transaction_1, df_raw_transaction_2])
df_transaction_detail = pd.concat([df_raw_transaction_detail_1, df_raw_transaction_detail_2])
display(df_transaction.head())
display(df_transaction_detail.head())


df_transaction_combined = pd.merge(df_transaction_detail, df_transaction, on="transaction_id", how="left")
df_transaction_combined.head()


df_transaction_plus_customer = pd.merge(df_transaction_combined, df_raw_customer_master, on="customer_id", how="left")
display(df_transaction_plus_customer.head())
df_transaction_master = pd.merge(df_transaction_plus_customer, df_raw_item_master, on="item_id", how="left")
df_transaction_master


df_transaction_master['payment_date'] = pd.to_datetime(df_transaction_master['payment_date'])
print(df_transaction_master.info())
df_transaction_master.head(2)


Int64Index: 7144 entries, 0 to 7143
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   detail_id          7144 non-null   int64         
 1   transaction_id     7144 non-null   object        
 2   item_id            7144 non-null   object        
 3   quantity           7144 non-null   int64         
 4   price              7144 non-null   int64         
 5   payment_date       7144 non-null   datetime64[ns]
 6   customer_id        7144 non-null   object        
 7   customer_name      7144 non-null   object        
 8   registration_date  7144 non-null   object        
 9   email              7144 non-null   object        
 10  gender             7144 non-null   object        
 11  age                7144 non-null   int64         
 12  birth              7144 non-null   object        
 13  pref               7144 non-null   object        
 14  item_name          7144 non-null   object        
 15  item_price         7144 non-null   int64         
dtypes: datetime64[ns](1), int64(5), object(10)
memory usage: 948.8+ KB
None


df_transaction_master['payment_month'] = df_transaction_master['payment_date'].dt.month
df_transaction_master['payment_year'] = pd.DatetimeIndex(df_transaction_master['payment_date']).year
df_transaction_master.head()


cols = list(df_transaction_master.columns)
cols

['detail_id',
 'transaction_id',
 'item_id',
 'quantity',
 'price',
 'payment_date',
 'customer_id',
 'customer_name',
 'registration_date',
 'email',
 'gender',
 'age',
 'birth',
 'pref',
 'item_name',
 'item_price',
 'payment_month',
 'payment_year']


df_transaction_master = df_transaction_master[['detail_id', 'transaction_id', 'item_id', 'quantity', 'price', 'payment_year', 'payment_month', 'payment_date', 'customer_id', 'customer_name', 'registration_date', 'email', 'gender', 'age', 'birth', 'pref', 'item_name', 'item_price']]
df_transaction_master.head(2)


df_transaction_master['payment_year'].value_counts()

2019    7144
Name: payment_year, dtype: int64


df_transaction_master['payment_month'].value_counts()

7    1243
6    1202
4    1184
3    1181
5    1170
2    1164
Name: payment_month, dtype: int64


df_revenue_month = pd.DataFrame(df_transaction_master.groupby(by=['payment_month'])['price'].sum())
df_revenue_month


df_revenue_month.plot(figsize=(10,4), grid=True)


df_revenue_detail = pd.DataFrame(df_transaction_master.groupby(by=["payment_month", "item_name"])['price'].sum())
df_revenue_detail.head()


df_revenue_detail.unstack()


idx = df_revenue_detail.unstack().columns.names[1]
print(idx)
cols = [col[1] for col in df_revenue_detail.unstack().columns]
print(cols)

item_name
['PC-A', 'PC-B', 'PC-C', 'PC-D', 'PC-E']


import matplotlib.pyplot as plt

ax = df_revenue_detail.unstack().plot(kind='line', figsize=(10,4), grid=True)#, stacked=True)
ax.set_xlabel(idx)
ax.set_ylabel("price")
ax.ticklabel_format(style="plain") # ax.get_yaxis().get_major_formatter().set_scientific(False)
plt.legend(cols)
plt.show()

	customer_id	customer_name	registration_date	email	gender	age	birth	pref
0	IK152942	김서준	2019-01-01 0:25	hirata_yuujirou@example.com	M	29	1990-06-10	대전광역시
1	TS808488	김예준	2019-01-01 1:13	tamura_shiori@example.com	F	33	1986-05-20	인천광역시
2	AS834628	김도윤	2019-01-01 2:00	hisano_yuki@example.com	F	63	1956-01-02	광주광역시
3	AS345469	김시우	2019-01-01 4:48	tsuruoka_kaoru@example.com	M	74	1945-03-25	인천광역시
4	GD892565	김주원	2019-01-01 4:54	oouchi_takashi@example.com	M	54	1965-08-05	울산광역시

	transaction_id	price	payment_date	customer_id
0	T0000000113	210000	2019-02-01 01:36:57	PL563502
1	T0000000114	50000	2019-02-01 01:37:23	HD678019
2	T0000000115	120000	2019-02-01 02:34:19	HD298120
3	T0000000116	210000	2019-02-01 02:47:23	IK452215
4	T0000000117	170000	2019-02-01 04:33:46	PL542865

	transaction_id	price	payment_date	customer_id
0	T0000005113	295000	2019-06-15 07:20:27	TS169261
1	T0000005114	50000	2019-06-15 07:35:47	HI599892
2	T0000005115	85000	2019-06-15 07:56:36	HI421757
3	T0000005116	50000	2019-06-15 08:40:55	OA386378
4	T0000005117	120000	2019-06-15 08:44:23	TS506913

	detail_id	transaction_id	item_id	quantity
0	0	T0000000113	S005	1
1	1	T0000000114	S001	1
2	2	T0000000115	S003	1
3	3	T0000000116	S005	1
4	4	T0000000117	S002	2

	detail_id	transaction_id	item_id	quantity
0	5000	T0000004870	S002	3
1	5001	T0000004871	S003	1
2	5002	T0000004872	S001	2
3	5003	T0000004873	S004	1
4	5004	T0000004874	S003	2

	item_id	item_name	item_price
0	S001	PC-A	50000
1	S002	PC-B	85000
2	S003	PC-C	120000
3	S004	PC-D	180000
4	S005	PC-E	210000

	price
payment_month
2	179190000
3	175375000
4	176720000
5	171385000
6	183395000
7	188685000

		price
payment_month	item_name
2	PC-A	29270000
	PC-B	27785000
	PC-C	22905000
	PC-D	35010000
	PC-E	64220000

	detail_id	transaction_id	item_id	quantity	price	payment_date	customer_id	customer_name	registration_date	email	gender	age	birth	pref	item_name	item_price
0	0	T0000000113	S005	1	210000	2019-02-01 01:36:57	PL563502	김태경	2019-01-07 14:34	imoto_yoshimasa@example.com	M	30	1989-07-15	대전광역시	PC-E	210000
1	1	T0000000114	S001	1	50000	2019-02-01 01:37:23	HD678019	김영웅	2019-01-27 18:00	mifune_rokurou@example.com	M	73	1945-11-29	서울특별시	PC-A	50000
2	2	T0000000115	S003	1	120000	2019-02-01 02:34:19	HD298120	김강현	2019-01-11 8:16	yamane_kogan@example.com	M	42	1977-05-17	광주광역시	PC-C	120000
3	3	T0000000116	S005	1	210000	2019-02-01 02:47:23	IK452215	김주한	2019-01-10 5:07	ikeda_natsumi@example.com	F	47	1972-03-17	인천광역시	PC-E	210000
4	4	T0000000117	S002	2	170000	2019-02-01 04:33:46	PL542865	김영빈	2019-01-25 6:46	kurita_kenichi@example.com	M	74	1944-12-17	광주광역시	PC-B	85000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
7139	7139	T0000006894	S004	1	180000	2019-07-31 21:20:44	HI400734	김윤성	2019-01-04 13:24	shishido_akira@example.com	M	64	1955-01-13	대구광역시	PC-D	180000
7140	7140	T0000006895	S002	1	85000	2019-07-31 21:52:48	AS339451	김무경	2019-02-11 19:34	aihara_miki@example.com	F	74	1945-02-03	대구광역시	PC-B	85000
7141	7141	T0000006896	S001	2	100000	2019-07-31 23:35:25	OA027325	박준석	2019-04-17 9:23	matsuda_saki@example.com	F	40	1979-05-25	서울특별시	PC-A	50000
7142	7142	T0000006897	S002	1	85000	2019-07-31 23:39:35	TS624738	이가빈	2019-02-20 18:15	shinndou_masatoshi@example.com	M	56	1963-02-21	인천광역시	PC-B	85000
7143	7143	T0000006898	S002	1	85000	2019-07-31 23:41:38	AS834214	이승채	2019-04-07 3:20	tahara_yuuko@example.com	F	74	1944-12-18	대전광역시	PC-B	85000

	price
item_name	PC-A	PC-B	PC-C	PC-D	PC-E
payment_month
2	29270000	27785000	22905000	35010000	64220000
3	30440000	28540000	20275000	27205000	68915000
4	31600000	26850000	23665000	26045000	68560000
5	29575000	28660000	21885000	27485000	63780000
6	30650000	28020000	24865000	32550000	67310000
7	30225000	31350000	21715000	28170000	77225000

티스토리툴바