import pandas as pd
import requests
import io

from IPython.display import display
pd.set_option('mode.chained_assignment',  None) # <==== SettingWithCopyWarning 경고를 끈다
    
url = "https://raw.githubusercontent.com/yoonkt200/python-data-analysis/master/data/chipotle.tsv" 
file_name = "chipotle.tsv"
with open(file_name, "wb") as dfile :
    response = requests.get(url)
    dfile.write(response.content)


!head "./chipotle.tsv" -n 5

==> ./chipotle.tsv <==
order_id	quantity	item_name	choice_description	item_price
1	1	Chips and Fresh Tomato Salsa	NULL	$2.39 
1	1	Izze	[Clementine]	$3.39 
1	1	Nantucket Nectar	[Apple]	$3.39 
1	1	Chips and Tomatillo-Green Chili Salsa	NULL	$2.39 
2	2	Chicken Bowl	[Tomatillo-Red Chili Salsa (Hot), [Black Beans, Rice, Cheese, Sour Cream]]	$16.98 
3	1	Chicken Bowl	[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sour Cream, Guacamole, Lettuce]]	$10.98 
3	1	Side of Chips	NULL	$1.69 
4	1	Steak Burrito	[Tomatillo Red Chili Salsa, [Fajita Vegetables, Black Beans, Pinto Beans, Cheese, Sour Cream, Guacamole, Lettuce]]	$11.75 
4	1	Steak Soft Tacos	[Tomatillo Green Chili Salsa, [Pinto Beans, Cheese, Sour Cream, Lettuce]]	$9.25 
head: -n: No such file or directory
head: 5: No such file or directory


import pandas as pd

df_raw = pd.read_csv(file_name, sep="\t")
df_raw.head()


df_data = df_raw.copy()
df_data['item_name'].value_counts()[0:10]

Chicken Bowl           726
Chicken Burrito        553
Chips and Guacamole    479
Steak Burrito          368
Canned Soft Drink      301
Steak Bowl             211
Chips                  211
Bottled Water          162
Chicken Soft Tacos     115
Chicken Salad Bowl     110
Name: item_name, dtype: int64


df_data.groupby(['item_name'])['quantity'].sum().nlargest(10)

item_name
Chicken Bowl                    761
Chicken Burrito                 591
Chips and Guacamole             506
Steak Burrito                   386
Canned Soft Drink               351
Chips                           230
Steak Bowl                      221
Bottled Water                   211
Chips and Fresh Tomato Salsa    130
Canned Soda                     126
Name: quantity, dtype: int64


df_data.info()


RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4622 non-null   int64 
 1   quantity            4622 non-null   int64 
 2   item_name           4622 non-null   object
 3   choice_description  3376 non-null   object
 4   item_price          4622 non-null   object
dtypes: int64(2), object(3)
memory usage: 180.7+ KB


df_data['item_price'] = df_data['item_price'].apply(lambda row: float(row.replace("$", "")))


df_data.info()


RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   order_id            4622 non-null   int64  
 1   quantity            4622 non-null   int64  
 2   item_name           4622 non-null   object 
 3   choice_description  3376 non-null   object 
 4   item_price          4622 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 180.7+ KB


df_data.groupby('order_id')['item_price'].sum().mean()

18.811428571428568

	order_id	quantity	item_name	choice_description	item_price
0	1	1	Chips and Fresh Tomato Salsa	NaN	$2.39
1	1	1	Izze	[Clementine]	$3.39
2	1	1	Nantucket Nectar	[Apple]	$3.39
3	1	1	Chips and Tomatillo-Green Chili Salsa	NaN	$2.39
4	2	2	Chicken Bowl	[Tomatillo-Red Chili Salsa (Hot), [Black Beans...	$16.98

티스토리툴바