## With Statement and Context Manager

In [66]:
import time

class TestContextManager:
    def __enter__(self):
        print("doing __enter__")

    def __exit__(self, type, value, traceback):
        print("doing __exit__")

with TestContextManager() as t:
    time.sleep(2)

doing __enter__
doing __exit__


## Series Operations

In [67]:
import pandas as pd

In [68]:
s = pd.Series([1,4,9,16])

0     1
1     4
2     9
3    16
dtype: int64

In [69]:
s.index

RangeIndex(start=0, stop=4, step=1)

In [70]:
s.values

array([ 1,  4,  9, 16])

In [71]:
s = pd.Series([1,4,9,16],index=['a','b','c','d'])

a     1
b     4
c     9
d    16
dtype: int64

In [72]:
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [73]:
states = pd.Series({'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000})

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [74]:
states2 = pd.Series({'California': None, 'Ohio': 35000, 'Oregon': 16000, 'Texas': 71000})

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [75]:
states_sum = states + states2

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [76]:
states.add(states2,fill_value=0)

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah            5000.0
dtype: float64

In [77]:
states_sum.index.name = 'state'
states_sum.name = 'population'

In [78]:
states_sum

state
California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
Name: population, dtype: float64

In [79]:
import numpy as np

In [80]:
s = pd.Series(np.arange(4.), index=[4,3,2,1])

4    0.0
3    1.0
2    2.0
1    3.0
dtype: float64

In [81]:
s[3]

1.0

In [82]:
s.loc[3]

1.0

In [83]:
s.iloc[3]

3.0

In [84]:
s2 = pd.Series(np.arange(4), index=['a','b','c','d'])

a    0
b    1
c    2
d    3
dtype: int64

In [85]:
s2[3]

3

In [86]:
s3 = s2.drop('a')

b    1
c    2
d    3
dtype: int64

In [87]:
s2

a    0
b    1
c    2
d    3
dtype: int64

## Food Inspections

In [88]:
# read the dataset using pandas
import pandas as pd
df = pd.read_csv("Food_Inspections.csv")

Unnamed: 0,Inspection ID,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
0,2528320,LOS AMANTES,LOS AMANTES,67265.0,Restaurant,Risk 1 (High),4753 W 47TH ST,CHICAGO,IL,60632.0,08/20/2021,Non-Inspection,No Entry,,41.807533,-87.742906,"(-87.74290644979058, 41.80753274427624)"
1,2528300,LAKELAND INC,ALDEN LAKELAND NURSING HOME,2204175.0,Long Term Care,Risk 1 (High),820 W LAWRENCE AVE,CHICAGO,IL,60640.0,08/20/2021,Canvass,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.969249,-87.651102,"(-87.65110161256266, 41.96924936435865)"
2,2528284,NOBLE THAI,NOBLE THAI,2802728.0,Restaurant,Risk 1 (High),1371 W CHICAGO AVE,CHICAGO,IL,60642.0,08/19/2021,License,No Entry,,41.896044,-87.662187,"(-87.6621873276828, 41.89604365076047)"
3,2528277,FAMILY DOLLAR STORE #7078,FAMILY DOLLAR STORE #7078,1682197.0,Grocery Store,Risk 3 (Low),3916 W FULLERTON AVE,CHICAGO,IL,60647.0,08/19/2021,Complaint Re-Inspection,Pass,,41.924618,-87.725020,"(-87.72502013417774, 41.92461775381773)"
4,2528273,EL PADRE NUESTRO INC,EL PADRE NUESTRO,2703927.0,Restaurant,Risk 1 (High),4959 N KEDZIE AVE,CHICAGO,IL,60625.0,08/19/2021,Canvass,Out of Business,,41.972002,-87.708429,"(-87.70842939666973, 41.972002099134194)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224582,98385,SUBWAY 26199,SUBWAY,1488911.0,Restaurant,Risk 1 (High),2851 N MILWAUKEE AVE,CHICAGO,IL,60618.0,02/18/2010,Canvass,Pass,"35. WALLS, CEILINGS, ATTACHED EQUIPMENT CONSTR...",41.933099,-87.713780,"(-87.71377975845846, 41.9330986774443)"
224583,68070,JAI YEN,JAIYEN RESTAURANT,1678980.0,Restaurant,Risk 1 (High),3734-3736 N BROADWAY,CHICAGO,IL,60613.0,01/28/2010,Canvass,Pass w/ Conditions,4. SOURCE OF CROSS CONTAMINATION CONTROLLED I....,41.950232,-87.649181,"(-87.64918094440476, 41.950232386786)"
224584,197228,"ITALIAN VILLAGE RESTAURANT, INC.",ITALIAN VILLAGE / VIVERE,1237.0,Restaurant,Risk 1 (High),71 W MONROE ST,CHICAGO,IL,60603.0,01/21/2010,Complaint Re-Inspection,Pass,,41.880592,-87.630442,"(-87.63044232703336, 41.880591895360816)"
224585,67842,STAR EAST,STAR EAST,1248852.0,Restaurant,Risk 1 (High),5712 W FULLERTON AVE,CHICAGO,IL,60639.0,01/12/2010,Canvass,Pass,32. FOOD AND NON-FOOD CONTACT SURFACES PROPERL...,41.924043,-87.769032,"(-87.769032210279, 41.92404294400852)"


In [89]:
# just the beginning of the dataset
df.head(5)

Unnamed: 0,Inspection ID,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
0,2528320,LOS AMANTES,LOS AMANTES,67265.0,Restaurant,Risk 1 (High),4753 W 47TH ST,CHICAGO,IL,60632.0,08/20/2021,Non-Inspection,No Entry,,41.807533,-87.742906,"(-87.74290644979058, 41.80753274427624)"
1,2528300,LAKELAND INC,ALDEN LAKELAND NURSING HOME,2204175.0,Long Term Care,Risk 1 (High),820 W LAWRENCE AVE,CHICAGO,IL,60640.0,08/20/2021,Canvass,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.969249,-87.651102,"(-87.65110161256266, 41.96924936435865)"
2,2528284,NOBLE THAI,NOBLE THAI,2802728.0,Restaurant,Risk 1 (High),1371 W CHICAGO AVE,CHICAGO,IL,60642.0,08/19/2021,License,No Entry,,41.896044,-87.662187,"(-87.6621873276828, 41.89604365076047)"
3,2528277,FAMILY DOLLAR STORE #7078,FAMILY DOLLAR STORE #7078,1682197.0,Grocery Store,Risk 3 (Low),3916 W FULLERTON AVE,CHICAGO,IL,60647.0,08/19/2021,Complaint Re-Inspection,Pass,,41.924618,-87.72502,"(-87.72502013417774, 41.92461775381773)"
4,2528273,EL PADRE NUESTRO INC,EL PADRE NUESTRO,2703927.0,Restaurant,Risk 1 (High),4959 N KEDZIE AVE,CHICAGO,IL,60625.0,08/19/2021,Canvass,Out of Business,,41.972002,-87.708429,"(-87.70842939666973, 41.972002099134194)"


In [90]:
# number of records
len(df)

224587

### Accessing Records & Columns

In [91]:
# a single record by numeric index
df.loc[0]

Inspection ID                                      2528320
DBA Name                                       LOS AMANTES
AKA Name                                       LOS AMANTES
License #                                          67265.0
Facility Type                                   Restaurant
Risk                                         Risk 1 (High)
Address                                    4753 W 47TH ST 
City                                               CHICAGO
State                                                   IL
Zip                                                60632.0
Inspection Date                                 08/20/2021
Inspection Type                             Non-Inspection
Results                                           No Entry
Violations                                             NaN
Latitude                                         41.807533
Longitude                                       -87.742906
Location           (-87.74290644979058, 41.8075327442762

In [94]:
# Inpsection ID looks to be unique
df["Inspection ID"].nunique()

224587

In [96]:
# data about the data frame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 224587 entries, 0 to 224586
Data columns (total 17 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Inspection ID    224587 non-null  int64  
 1   DBA Name         224585 non-null  object 
 2   AKA Name         222111 non-null  object 
 3   License #        224570 non-null  float64
 4   Facility Type    219652 non-null  object 
 5   Risk             224516 non-null  object 
 6   Address          224587 non-null  object 
 7   City             224419 non-null  object 
 8   State            224529 non-null  object 
 9   Zip              224533 non-null  float64
 10  Inspection Date  224587 non-null  object 
 11  Inspection Type  224586 non-null  object 
 12  Results          224587 non-null  object 
 13  Violations       164414 non-null  object 
 14  Latitude         223825 non-null  float64
 15  Longitude        223825 non-null  float64
 16  Location         223825 non-null  obje

In [97]:
# set a new index
# !!! note the assignment, not "inplace" which is ok !!!
df = df.set_index("Inspection ID")

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2528320,LOS AMANTES,LOS AMANTES,67265.0,Restaurant,Risk 1 (High),4753 W 47TH ST,CHICAGO,IL,60632.0,08/20/2021,Non-Inspection,No Entry,,41.807533,-87.742906,"(-87.74290644979058, 41.80753274427624)"
2528300,LAKELAND INC,ALDEN LAKELAND NURSING HOME,2204175.0,Long Term Care,Risk 1 (High),820 W LAWRENCE AVE,CHICAGO,IL,60640.0,08/20/2021,Canvass,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.969249,-87.651102,"(-87.65110161256266, 41.96924936435865)"
2528284,NOBLE THAI,NOBLE THAI,2802728.0,Restaurant,Risk 1 (High),1371 W CHICAGO AVE,CHICAGO,IL,60642.0,08/19/2021,License,No Entry,,41.896044,-87.662187,"(-87.6621873276828, 41.89604365076047)"
2528277,FAMILY DOLLAR STORE #7078,FAMILY DOLLAR STORE #7078,1682197.0,Grocery Store,Risk 3 (Low),3916 W FULLERTON AVE,CHICAGO,IL,60647.0,08/19/2021,Complaint Re-Inspection,Pass,,41.924618,-87.725020,"(-87.72502013417774, 41.92461775381773)"
2528273,EL PADRE NUESTRO INC,EL PADRE NUESTRO,2703927.0,Restaurant,Risk 1 (High),4959 N KEDZIE AVE,CHICAGO,IL,60625.0,08/19/2021,Canvass,Out of Business,,41.972002,-87.708429,"(-87.70842939666973, 41.972002099134194)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98385,SUBWAY 26199,SUBWAY,1488911.0,Restaurant,Risk 1 (High),2851 N MILWAUKEE AVE,CHICAGO,IL,60618.0,02/18/2010,Canvass,Pass,"35. WALLS, CEILINGS, ATTACHED EQUIPMENT CONSTR...",41.933099,-87.713780,"(-87.71377975845846, 41.9330986774443)"
68070,JAI YEN,JAIYEN RESTAURANT,1678980.0,Restaurant,Risk 1 (High),3734-3736 N BROADWAY,CHICAGO,IL,60613.0,01/28/2010,Canvass,Pass w/ Conditions,4. SOURCE OF CROSS CONTAMINATION CONTROLLED I....,41.950232,-87.649181,"(-87.64918094440476, 41.950232386786)"
197228,"ITALIAN VILLAGE RESTAURANT, INC.",ITALIAN VILLAGE / VIVERE,1237.0,Restaurant,Risk 1 (High),71 W MONROE ST,CHICAGO,IL,60603.0,01/21/2010,Complaint Re-Inspection,Pass,,41.880592,-87.630442,"(-87.63044232703336, 41.880591895360816)"
67842,STAR EAST,STAR EAST,1248852.0,Restaurant,Risk 1 (High),5712 W FULLERTON AVE,CHICAGO,IL,60639.0,01/12/2010,Canvass,Pass,32. FOOD AND NON-FOOD CONTACT SURFACES PROPERL...,41.924043,-87.769032,"(-87.769032210279, 41.92404294400852)"


In [99]:
# slicing
df.loc[98385:197241]
# df.iloc[0:2]

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
98385,SUBWAY 26199,SUBWAY,1488911.0,Restaurant,Risk 1 (High),2851 N MILWAUKEE AVE,CHICAGO,IL,60618.0,02/18/2010,Canvass,Pass,"35. WALLS, CEILINGS, ATTACHED EQUIPMENT CONSTR...",41.933099,-87.71378,"(-87.71377975845846, 41.9330986774443)"
68070,JAI YEN,JAIYEN RESTAURANT,1678980.0,Restaurant,Risk 1 (High),3734-3736 N BROADWAY,CHICAGO,IL,60613.0,01/28/2010,Canvass,Pass w/ Conditions,4. SOURCE OF CROSS CONTAMINATION CONTROLLED I....,41.950232,-87.649181,"(-87.64918094440476, 41.950232386786)"
197228,"ITALIAN VILLAGE RESTAURANT, INC.",ITALIAN VILLAGE / VIVERE,1237.0,Restaurant,Risk 1 (High),71 W MONROE ST,CHICAGO,IL,60603.0,01/21/2010,Complaint Re-Inspection,Pass,,41.880592,-87.630442,"(-87.63044232703336, 41.880591895360816)"
67842,STAR EAST,STAR EAST,1248852.0,Restaurant,Risk 1 (High),5712 W FULLERTON AVE,CHICAGO,IL,60639.0,01/12/2010,Canvass,Pass,32. FOOD AND NON-FOOD CONTACT SURFACES PROPERL...,41.924043,-87.769032,"(-87.769032210279, 41.92404294400852)"
197241,COSI,COSI,1597223.0,Restaurant,Risk 1 (High),33 N DEARBORN ST,CHICAGO,IL,60602.0,02/01/2010,Canvass,Pass,"35. WALLS, CEILINGS, ATTACHED EQUIPMENT CONSTR...",41.882806,-87.629281,"(-87.62928149568309, 41.88280622661767)"


In [102]:
# a single column -> a series
df['Longitude']

Inspection ID
2528320   -87.742906
2528300   -87.651102
2528284   -87.662187
2528277   -87.725020
2528273   -87.708429
             ...    
98385     -87.713780
68070     -87.649181
197228    -87.630442
67842     -87.769032
197241    -87.629281
Name: Longitude, Length: 224587, dtype: float64

In [103]:
# multiple columns (a list inside the brackets) -> a data frame
df[['Longitude','Latitude']]

Unnamed: 0_level_0,Longitude,Latitude
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1
2528320,-87.742906,41.807533
2528300,-87.651102,41.969249
2528284,-87.662187,41.896044
2528277,-87.725020,41.924618
2528273,-87.708429,41.972002
...,...,...
98385,-87.713780,41.933099
68070,-87.649181,41.950232
197228,-87.630442,41.880592
67842,-87.769032,41.924043


In [104]:
# the possible values for a result
df["Results"].unique()

array(['No Entry', 'Pass w/ Conditions', 'Pass', 'Out of Business',
       'Fail', 'Not Ready', 'Business Not Located'], dtype=object)

In [105]:
# boolean series of whether an inspection failed or not
df['Results'] == "Fail"

Inspection ID
2528320    False
2528300    False
2528284    False
2528277    False
2528273    False
           ...  
98385      False
68070      False
197228     False
67842      False
197241     False
Name: Results, Length: 224587, dtype: bool

In [106]:
# show only those rows
df[df['Results'] == "Fail"]

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2528252,FURAMA RESTAURANT INC,FURAMA RESTAURANT,9821.0,Restaurant,Risk 1 (High),4936 N BROADWAY,CHICAGO,IL,60640.0,08/19/2021,Canvass,Fail,5. PROCEDURES FOR RESPONDING TO VOMITING AND D...,41.972613,-87.659913,"(-87.65991334912181, 41.97261317872583)"
2528232,TANUKI,TANUKI,2220756.0,Restaurant,Risk 1 (High),3006 N SHEFFIELD AVE,CHICAGO,IL,60657.0,08/18/2021,Canvass,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.936489,-87.654137,"(-87.65413668138018, 41.93648903398478)"
2528213,LAS ISLAS MARIAS,LAS ISLAS MARIAS,1095992.0,Restaurant,Risk 1 (High),4770 W GRAND AVE,CHICAGO,IL,60639.0,08/18/2021,Canvass,Fail,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.913999,-87.745864,"(-87.74586369218258, 41.91399920930635)"
2524155,THE ROOST CAROLINA KITCHEN,THE ROOST CHICKEN AND BISCUITS,2609334.0,Restaurant,Risk 1 (High),3474 N CLARK ST,CHICAGO,IL,60657.0,08/17/2021,Canvass,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.945380,-87.655220,"(-87.65522014489368, 41.94537989344811)"
2524012,BAR COCINA,BAR COCINA,2334704.0,Restaurant,Risk 1 (High),2901 N SHEFFIELD AVE,CHICAGO,IL,60657.0,08/12/2021,Complaint,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.934501,-87.653775,"(-87.65377549058545, 41.93450053757158)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231210,HONG KONG MARKET,HONG KONG MARKET,2009448.0,Grocery Store,Risk 3 (Low),520 W 24TH PL,CHICAGO,IL,60616.0,02/09/2010,No Entry,Fail,,41.848237,-87.640956,"(-87.64095552751985, 41.848237165925426)"
67968,MCDONALDS,MCDONALD'S,84425.0,Restaurant,Risk 2 (Medium),4038 W Belmont AVE,CHICAGO,IL,60641.0,01/21/2010,Canvass,Fail,24. DISH WASHING FACILITIES: PROPERLY DESIGNED...,41.939183,-87.728760,"(-87.72876043531848, 41.93918260682479)"
197276,DUNKIN' DONUTS,DUNKIN' DONUTS,32957.0,Restaurant,Risk 2 (Medium),100 W RANDOLPH ST,CHICAGO,IL,60601.0,02/17/2010,Out of Business,Fail,,41.884586,-87.631010,"(-87.63101044588599, 41.88458626715456)"
114290,Eggsperience,Eggsperience,2009133.0,Restaurant,Risk 1 (High),33 W ONTARIO ST,CHICAGO,IL,60654.0,01/21/2010,License,Fail,2. FACILITIES TO MAINTAIN PROPER TEMPERATURE -...,41.893142,-87.629531,"(-87.62953060218635, 41.8931417574988)"


In [107]:
# another way to query failed inspections
df.query('Results == "Fail"')

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2528252,FURAMA RESTAURANT INC,FURAMA RESTAURANT,9821.0,Restaurant,Risk 1 (High),4936 N BROADWAY,CHICAGO,IL,60640.0,08/19/2021,Canvass,Fail,5. PROCEDURES FOR RESPONDING TO VOMITING AND D...,41.972613,-87.659913,"(-87.65991334912181, 41.97261317872583)"
2528232,TANUKI,TANUKI,2220756.0,Restaurant,Risk 1 (High),3006 N SHEFFIELD AVE,CHICAGO,IL,60657.0,08/18/2021,Canvass,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.936489,-87.654137,"(-87.65413668138018, 41.93648903398478)"
2528213,LAS ISLAS MARIAS,LAS ISLAS MARIAS,1095992.0,Restaurant,Risk 1 (High),4770 W GRAND AVE,CHICAGO,IL,60639.0,08/18/2021,Canvass,Fail,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.913999,-87.745864,"(-87.74586369218258, 41.91399920930635)"
2524155,THE ROOST CAROLINA KITCHEN,THE ROOST CHICKEN AND BISCUITS,2609334.0,Restaurant,Risk 1 (High),3474 N CLARK ST,CHICAGO,IL,60657.0,08/17/2021,Canvass,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.945380,-87.655220,"(-87.65522014489368, 41.94537989344811)"
2524012,BAR COCINA,BAR COCINA,2334704.0,Restaurant,Risk 1 (High),2901 N SHEFFIELD AVE,CHICAGO,IL,60657.0,08/12/2021,Complaint,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.934501,-87.653775,"(-87.65377549058545, 41.93450053757158)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231210,HONG KONG MARKET,HONG KONG MARKET,2009448.0,Grocery Store,Risk 3 (Low),520 W 24TH PL,CHICAGO,IL,60616.0,02/09/2010,No Entry,Fail,,41.848237,-87.640956,"(-87.64095552751985, 41.848237165925426)"
67968,MCDONALDS,MCDONALD'S,84425.0,Restaurant,Risk 2 (Medium),4038 W Belmont AVE,CHICAGO,IL,60641.0,01/21/2010,Canvass,Fail,24. DISH WASHING FACILITIES: PROPERLY DESIGNED...,41.939183,-87.728760,"(-87.72876043531848, 41.93918260682479)"
197276,DUNKIN' DONUTS,DUNKIN' DONUTS,32957.0,Restaurant,Risk 2 (Medium),100 W RANDOLPH ST,CHICAGO,IL,60601.0,02/17/2010,Out of Business,Fail,,41.884586,-87.631010,"(-87.63101044588599, 41.88458626715456)"
114290,Eggsperience,Eggsperience,2009133.0,Restaurant,Risk 1 (High),33 W ONTARIO ST,CHICAGO,IL,60654.0,01/21/2010,License,Fail,2. FACILITIES TO MAINTAIN PROPER TEMPERATURE -...,41.893142,-87.629531,"(-87.62953060218635, 41.8931417574988)"


In [108]:
# all inspections that failed
failed = df[df['Results'] == "Fail"].copy()

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2528252,FURAMA RESTAURANT INC,FURAMA RESTAURANT,9821.0,Restaurant,Risk 1 (High),4936 N BROADWAY,CHICAGO,IL,60640.0,08/19/2021,Canvass,Fail,5. PROCEDURES FOR RESPONDING TO VOMITING AND D...,41.972613,-87.659913,"(-87.65991334912181, 41.97261317872583)"
2528232,TANUKI,TANUKI,2220756.0,Restaurant,Risk 1 (High),3006 N SHEFFIELD AVE,CHICAGO,IL,60657.0,08/18/2021,Canvass,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.936489,-87.654137,"(-87.65413668138018, 41.93648903398478)"
2528213,LAS ISLAS MARIAS,LAS ISLAS MARIAS,1095992.0,Restaurant,Risk 1 (High),4770 W GRAND AVE,CHICAGO,IL,60639.0,08/18/2021,Canvass,Fail,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.913999,-87.745864,"(-87.74586369218258, 41.91399920930635)"
2524155,THE ROOST CAROLINA KITCHEN,THE ROOST CHICKEN AND BISCUITS,2609334.0,Restaurant,Risk 1 (High),3474 N CLARK ST,CHICAGO,IL,60657.0,08/17/2021,Canvass,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.945380,-87.655220,"(-87.65522014489368, 41.94537989344811)"
2524012,BAR COCINA,BAR COCINA,2334704.0,Restaurant,Risk 1 (High),2901 N SHEFFIELD AVE,CHICAGO,IL,60657.0,08/12/2021,Complaint,Fail,"1. PERSON IN CHARGE PRESENT, DEMONSTRATES KNOW...",41.934501,-87.653775,"(-87.65377549058545, 41.93450053757158)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231210,HONG KONG MARKET,HONG KONG MARKET,2009448.0,Grocery Store,Risk 3 (Low),520 W 24TH PL,CHICAGO,IL,60616.0,02/09/2010,No Entry,Fail,,41.848237,-87.640956,"(-87.64095552751985, 41.848237165925426)"
67968,MCDONALDS,MCDONALD'S,84425.0,Restaurant,Risk 2 (Medium),4038 W Belmont AVE,CHICAGO,IL,60641.0,01/21/2010,Canvass,Fail,24. DISH WASHING FACILITIES: PROPERLY DESIGNED...,41.939183,-87.728760,"(-87.72876043531848, 41.93918260682479)"
197276,DUNKIN' DONUTS,DUNKIN' DONUTS,32957.0,Restaurant,Risk 2 (Medium),100 W RANDOLPH ST,CHICAGO,IL,60601.0,02/17/2010,Out of Business,Fail,,41.884586,-87.631010,"(-87.63101044588599, 41.88458626715456)"
114290,Eggsperience,Eggsperience,2009133.0,Restaurant,Risk 1 (High),33 W ONTARIO ST,CHICAGO,IL,60654.0,01/21/2010,License,Fail,2. FACILITIES TO MAINTAIN PROPER TEMPERATURE -...,41.893142,-87.629531,"(-87.62953060218635, 41.8931417574988)"


In [109]:
# business names that are most frequent in failed inspections
failed["DBA Name"].value_counts()

SUBWAY                    383
DUNKIN DONUTS             238
MCDONALD'S                120
7-ELEVEN                   71
MCDONALDS                  60
                         ... 
NITECAP COFFEE BAR LLC      1
REDMOND'S PUB               1
MI LINDO SAN JOSE           1
MARISCOS EL KORA, INC.      1
YOUR KITCHEN                1
Name: DBA Name, Length: 16981, dtype: int64

In [110]:
# remove single quote and convert to uppercase
failed["DBA Name"] = failed["DBA Name"].str.replace("'", "").str.upper()

In [111]:
# updated value counts
failed["DBA Name"].value_counts()

SUBWAY                 412
DUNKIN DONUTS          259
MCDONALDS              232
7-ELEVEN                80
JIMMY JOHNS             72
                      ... 
BURGER KING #7674        1
ARMITAGE FOOD            1
TASTE OF TRINIDAD        1
PALERMOS RESTAURANT      1
YOUR KITCHEN             1
Name: DBA Name, Length: 16758, dtype: int64

In [113]:
# this gives a multiindex (index with multiple columns)
results = df.groupby(["DBA Name", "Results"]).size()

DBA Name                Results             
#1 CHINA EXPRESS, LTD.  Out of Business          1
                        Pass                     1
#1 CHOP SUEY            Fail                     8
                        Out of Business          1
                        Pass                    21
                                                ..
subway restaurant 1     Pass w/ Conditions       1
tien giang restaurant   Fail                     3
                        Pass                     1
unknown                 Business Not Located     1
vitino pizzeria         Out of Business          1
Length: 76396, dtype: int64

In [114]:
# accessing a multiindex
results.loc['#1 CHINA EXPRESS, LTD.']

Results
Out of Business    1
Pass               1
dtype: int64

In [115]:
# accessing a multiindex
results.loc[('#1 CHINA EXPRESS, LTD.','Pass')]

1

### Inspecting Results by Business

In [116]:
# reorganize table to show inspection result counts in table
r = results.unstack(fill_value=0)
r

Results,Business Not Located,Fail,No Entry,Not Ready,Out of Business,Pass,Pass w/ Conditions
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"#1 CHINA EXPRESS, LTD.",0,0,0,0,1,1,0
#1 CHOP SUEY,0,8,0,0,1,21,3
#1 CHOP SUEY RESTAURANT,0,0,0,0,1,0,0
"#1 CHOP SUEY RESTAURANT, INC",0,0,1,0,0,1,0
#1 DELI,0,0,0,0,0,0,2
...,...,...,...,...,...,...,...
stockton,0,3,0,0,1,4,0
subway restaurant 1,0,0,0,0,1,2,1
tien giang restaurant,0,3,0,0,0,1,0
unknown,1,0,0,0,0,0,0


In [117]:
r['NotInspected'] = r['Business Not Located'] + r['No Entry'] + r['Not Ready'] + r['Out of Business']

In [118]:
r

Results,Business Not Located,Fail,No Entry,Not Ready,Out of Business,Pass,Pass w/ Conditions,NotInspected
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"#1 CHINA EXPRESS, LTD.",0,0,0,0,1,1,0,1
#1 CHOP SUEY,0,8,0,0,1,21,3,1
#1 CHOP SUEY RESTAURANT,0,0,0,0,1,0,0,1
"#1 CHOP SUEY RESTAURANT, INC",0,0,1,0,0,1,0,1
#1 DELI,0,0,0,0,0,0,2,0
...,...,...,...,...,...,...,...,...
stockton,0,3,0,0,1,4,0,1
subway restaurant 1,0,0,0,0,1,2,1,1
tien giang restaurant,0,3,0,0,0,1,0,0
unknown,1,0,0,0,0,0,0,1


In [119]:
r = r.drop(columns=['Business Not Located','No Entry','Not Ready','Out of Business'])

Results,Fail,Pass,Pass w/ Conditions,NotInspected
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"#1 CHINA EXPRESS, LTD.",0,1,0,1
#1 CHOP SUEY,8,21,3,1
#1 CHOP SUEY RESTAURANT,0,0,0,1
"#1 CHOP SUEY RESTAURANT, INC",0,1,0,1
#1 DELI,0,0,2,0
...,...,...,...,...
stockton,3,4,0,1
subway restaurant 1,0,2,1,1
tien giang restaurant,3,1,0,0
unknown,0,0,0,1


In [120]:
# create a total inspections column
r["Total"] = r.sum(axis=1)
r

Results,Fail,Pass,Pass w/ Conditions,NotInspected,Total
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"#1 CHINA EXPRESS, LTD.",0,1,0,1,2
#1 CHOP SUEY,8,21,3,1,33
#1 CHOP SUEY RESTAURANT,0,0,0,1,1
"#1 CHOP SUEY RESTAURANT, INC",0,1,0,1,2
#1 DELI,0,0,2,0,2
...,...,...,...,...,...
stockton,3,4,0,1,8
subway restaurant 1,0,2,1,1,4
tien giang restaurant,3,1,0,0,4
unknown,0,0,0,1,1


In [121]:
# compute the percentage of failures and put that in a new column
r["PctFail"] = r.Fail / r.Total

In [122]:
r

Results,Fail,Pass,Pass w/ Conditions,NotInspected,Total,PctFail
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"#1 CHINA EXPRESS, LTD.",0,1,0,1,2,0.000000
#1 CHOP SUEY,8,21,3,1,33,0.242424
#1 CHOP SUEY RESTAURANT,0,0,0,1,1,0.000000
"#1 CHOP SUEY RESTAURANT, INC",0,1,0,1,2,0.000000
#1 DELI,0,0,2,0,2,0.000000
...,...,...,...,...,...,...
stockton,3,4,0,1,8,0.375000
subway restaurant 1,0,2,1,1,4,0.000000
tien giang restaurant,3,1,0,0,4,0.750000
unknown,0,0,0,1,1,0.000000


In [123]:
r['Total'].describe()

count    29380.000000
mean         7.644146
std         23.173338
min          1.000000
25%          2.000000
50%          5.000000
75%         10.000000
max       3035.000000
Name: Total, dtype: float64

In [124]:
# which have high failure rates?
r.sort_values(by="PctFail",  ascending=False)

Results,Fail,Pass,Pass w/ Conditions,NotInspected,Total,PctFail
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"HOT RED GRILL, INC",1,0,0,0,1,1.0
CONGRESS THEATRE,2,0,0,0,2,1.0
VILLA D'ORO,1,0,0,0,1,1.0
COMMONWEALTH OF ISRAEL DOLLAR STORE & GROCERIES,1,0,0,0,1,1.0
GREAT HUNNAN RESTAURANT,1,0,0,0,1,1.0
...,...,...,...,...,...,...
JORDAN STEAK AND LEMONADE,0,0,0,3,3,0.0
JORDAN'S GROCERY & HOUSEHOLD GOODS,0,1,0,0,1,0.0
JORDAN'S GYROS,0,1,0,1,2,0.0
JORGE'S PLACE,0,1,0,1,2,0.0


In [125]:
# for business names with > 100 inspections, which have high failure rates?
top20_fail = r[r.Total > 100].sort_values(by="PctFail", ascending=False)[:20]

Results,Fail,Pass,Pass w/ Conditions,NotInspected,Total,PctFail
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAS ISLAS MARIAS,50,62,37,5,154,0.324675
CITGO,57,70,30,28,185,0.308108
PAPA JOHN'S PIZZA,38,61,24,15,138,0.275362
McDONALD'S,30,68,9,5,112,0.267857
TREASURE ISLAND FOODS,27,48,22,6,103,0.262136
HAROLD'S CHICKEN SHACK,49,96,25,18,188,0.260638
DUNKIN DONUTS / BASKIN ROBBINS,35,87,20,3,145,0.241379
JIMMY JOHNS,41,99,35,3,178,0.230337
"WENDY'S PROPERTIES, LLC",27,66,24,2,119,0.226891
POPEYES,33,86,24,6,149,0.221477


In [126]:
r['PctNotInspected'] = r.NotInspected / r.Total
r

Results,Fail,Pass,Pass w/ Conditions,NotInspected,Total,PctFail,PctNotInspected
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"#1 CHINA EXPRESS, LTD.",0,1,0,1,2,0.000000,0.500000
#1 CHOP SUEY,8,21,3,1,33,0.242424,0.030303
#1 CHOP SUEY RESTAURANT,0,0,0,1,1,0.000000,1.000000
"#1 CHOP SUEY RESTAURANT, INC",0,1,0,1,2,0.000000,0.500000
#1 DELI,0,0,2,0,2,0.000000,0.000000
...,...,...,...,...,...,...,...
stockton,3,4,0,1,8,0.375000,0.125000
subway restaurant 1,0,2,1,1,4,0.000000,0.250000
tien giang restaurant,3,1,0,0,4,0.750000,0.000000
unknown,0,0,0,1,1,0.000000,1.000000


In [127]:
top20_not_inspected = r[r.Total > 100].sort_values(by="PctNotInspected", ascending=False)[:20]

Results,Fail,Pass,Pass w/ Conditions,NotInspected,Total,PctFail,PctNotInspected
DBA Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SPORTSERVICE SOLDIER FIELD,1,117,1,57,176,0.005682,0.323864
PIZZA HUT,30,80,35,32,177,0.169492,0.180791
CHARTWELLS,26,67,26,24,143,0.181818,0.167832
KENTUCKY FRIED CHICKEN,33,88,19,25,165,0.2,0.151515
CITGO,57,70,30,28,185,0.308108,0.151351
ARAMARK,23,55,22,16,116,0.198276,0.137931
SHARKS FISH & CHICKEN,31,86,41,22,180,0.172222,0.122222
7-ELEVEN,71,253,103,56,483,0.146998,0.115942
KFC,35,110,30,22,197,0.177665,0.111675
PAPA JOHN'S PIZZA,38,61,24,15,138,0.275362,0.108696


In [128]:
top20_fail['Fail'] + top20_not_inspected['NotInspected']

DBA Name
7-ELEVEN                           NaN
ARAMARK                           39.0
ARGO TEA                           NaN
AU BON PAIN                        NaN
CERMAK PRODUCE                     NaN
CHARTWELLS                        50.0
CITGO                             85.0
Chipotle Mexican Grill             NaN
DOMINO'S PIZZA                     NaN
DUNKIN DONUTS / BASKIN ROBBINS     NaN
DUNKIN DONUTS/BASKIN ROBBINS       NaN
FRESHII                            NaN
HALSTED STREET DELI                NaN
HAROLD'S CHICKEN SHACK            67.0
J & J FISH                         NaN
JIMMY JOHNS                        NaN
KENTUCKY FRIED CHICKEN            58.0
KFC                                NaN
LAS ISLAS MARIAS                   NaN
MCDONALD'S                         NaN
McDONALD'S                         NaN
PAPA JOHN'S PIZZA                 53.0
PIZZA HUT                          NaN
POPEYES                            NaN
POTBELLY SANDWICH WORKS LLC        NaN
PRET A MANGER   

In [129]:
top20_fail['Fail'].add(top20_not_inspected['NotInspected'], fill_value=0)

DBA Name
7-ELEVEN                           56.0
ARAMARK                            39.0
ARGO TEA                           13.0
AU BON PAIN                        18.0
CERMAK PRODUCE                     21.0
CHARTWELLS                         50.0
CITGO                              85.0
Chipotle Mexican Grill             22.0
DOMINO'S PIZZA                     18.0
DUNKIN DONUTS / BASKIN ROBBINS     35.0
DUNKIN DONUTS/BASKIN ROBBINS       50.0
FRESHII                            20.0
HALSTED STREET DELI                 9.0
HAROLD'S CHICKEN SHACK             67.0
J & J FISH                         13.0
JIMMY JOHNS                        41.0
KENTUCKY FRIED CHICKEN             58.0
KFC                                22.0
LAS ISLAS MARIAS                   50.0
MCDONALD'S                        120.0
McDONALD'S                         30.0
PAPA JOHN'S PIZZA                  53.0
PIZZA HUT                          32.0
POPEYES                            33.0
POTBELLY SANDWICH WORKS LLC    

### Date Conversion

In [131]:
df['Inspection Date'] # strings

Inspection ID
2528320    08/20/2021
2528300    08/20/2021
2528284    08/19/2021
2528277    08/19/2021
2528273    08/19/2021
              ...    
98385      02/18/2010
68070      01/28/2010
197228     01/21/2010
67842      01/12/2010
197241     02/01/2010
Name: Inspection Date, Length: 224587, dtype: object

In [132]:
df[df['Inspection Date'] > '2020'] # can't do nice comparisons

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1


In [134]:
# convert to datetime
df['Inspection Date'] = pd.to_datetime(df['Inspection Date'])

In [135]:
df['Inspection Date'].head(5)

Inspection ID
2528320   2021-08-20
2528300   2021-08-20
2528284   2021-08-19
2528277   2021-08-19
2528273   2021-08-19
Name: Inspection Date, dtype: datetime64[ns]

In [136]:
df['Inspection Date'].dtype

dtype('<M8[ns]')

In [138]:
# now we can do nice comparisons (even with strings that can be interpreted as dates!)
df[df['Inspection Date'] > '2020-02']

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2528320,LOS AMANTES,LOS AMANTES,67265.0,Restaurant,Risk 1 (High),4753 W 47TH ST,CHICAGO,IL,60632.0,2021-08-20,Non-Inspection,No Entry,,41.807533,-87.742906,"(-87.74290644979058, 41.80753274427624)"
2528300,LAKELAND INC,ALDEN LAKELAND NURSING HOME,2204175.0,Long Term Care,Risk 1 (High),820 W LAWRENCE AVE,CHICAGO,IL,60640.0,2021-08-20,Canvass,Pass w/ Conditions,"3. MANAGEMENT, FOOD EMPLOYEE AND CONDITIONAL E...",41.969249,-87.651102,"(-87.65110161256266, 41.96924936435865)"
2528284,NOBLE THAI,NOBLE THAI,2802728.0,Restaurant,Risk 1 (High),1371 W CHICAGO AVE,CHICAGO,IL,60642.0,2021-08-19,License,No Entry,,41.896044,-87.662187,"(-87.6621873276828, 41.89604365076047)"
2528277,FAMILY DOLLAR STORE #7078,FAMILY DOLLAR STORE #7078,1682197.0,Grocery Store,Risk 3 (Low),3916 W FULLERTON AVE,CHICAGO,IL,60647.0,2021-08-19,Complaint Re-Inspection,Pass,,41.924618,-87.725020,"(-87.72502013417774, 41.92461775381773)"
2528273,EL PADRE NUESTRO INC,EL PADRE NUESTRO,2703927.0,Restaurant,Risk 1 (High),4959 N KEDZIE AVE,CHICAGO,IL,60625.0,2021-08-19,Canvass,Out of Business,,41.972002,-87.708429,"(-87.70842939666973, 41.972002099134194)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2363098,BROOKDALE LAKE SHORE DRIVE,BROOKDALE LAKE SHORE DRIVE,2294196.0,Restaurant,Risk 1 (High),2960 N LAKE SHORE DR,CHICAGO,IL,60657.0,2020-02-28,Suspected Food Poisoning,Pass w/ Conditions,10. ADEQUATE HANDWASHING SINKS PROPERLY SUPPLI...,41.936172,-87.636939,"(-87.63693905225722, 41.936171648587916)"
2359806,Jenner Academy,Jenner Academy for the Arts,23951.0,School,Risk 1 (High),1119 N Cleveland AVE,CHICAGO,IL,60610.0,2020-02-04,Canvass,Pass,10. ADEQUATE HANDWASHING SINKS PROPERLY SUPPLI...,41.902058,-87.641019,"(-87.64101894094065, 41.90205805089518)"
2365878,PALETERIA OSO POLAR II,PALETERIA OSO POLAR,2245951.0,Mobile Frozen Desserts Vendor,Risk 3 (Low),6538 N CLARK ST,CHICAGO,IL,60626.0,2020-03-17,Canvass,Out of Business,,42.001027,-87.671773,"(-87.67177268428709, 42.00102743599076)"
2362965,TACO BELL CANTINA,TACO BELL CANTINA,2657738.0,Restaurant,Risk 1 (High),2432 N MILWAUKEE AVE,CHICAGO,IL,60647.0,2020-02-27,License,Pass,,41.925332,-87.701541,"(-87.70154132847459, 41.92533164821854)"


### Null Zipcodes (which are otherwise integers)

In [139]:
# floating-point values?
df.Zip.head(5)

Inspection ID
2528320    60632.0
2528300    60640.0
2528284    60642.0
2528277    60647.0
2528273    60625.0
Name: Zip, dtype: float64

In [140]:
df[df.Zip.isnull()].head(5)

Unnamed: 0_level_0,DBA Name,AKA Name,License #,Facility Type,Risk,Address,City,State,Zip,Inspection Date,Inspection Type,Results,Violations,Latitude,Longitude,Location
Inspection ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2200428,LEO'S FOOD AND LIQUOR,LEO'S LIQUOR,2535068.0,Liquor,Risk 3 (Low),4471 W LAWRENCE AVE,,IL,,2018-08-02,License,Not Ready,,41.967993,-87.740726,"(-87.74072634914224, 41.96799335620968)"
1396156,THINK SIMPLE FOODS,,2308286.0,Restaurant,Risk 2 (Medium),141 W CHICAGO AVE,,IL,,2014-02-11,License Re-Inspection,Pass,,41.896504,-87.632784,"(-87.6327844082446, 41.89650357363852)"
2528256,PETER RUBI MARKET,PETER RUBI MARKET,2802402.0,Grocery Store,Risk 1 (High),804 W MONTROSE AVE,,IL,,2021-08-19,License,Pass w/ Conditions,5. PROCEDURES FOR RESPONDING TO VOMITING AND D...,41.961951,-87.650132,"(-87.6501320506605, 41.96195055112621)"
2523149,PETER RUBI MARKET,PETER RUBI MARKET,2802402.0,Grocery Store,Risk 1 (High),804 W MONTROSE AVE,,IL,,2021-07-26,License,Not Ready,,41.961951,-87.650132,"(-87.6501320506605, 41.96195055112621)"
2509619,47th NUTRITION,,2786354.0,,,1942 W 47TH ST,,IL,,2021-05-12,License,Not Ready,,41.808627,-87.673955,"(-87.67395513726541, 41.808626641462254)"


In [141]:
# replace nan with -1
df.loc[df.Zip.isnull(),'Zip'] = -1

In [142]:
# convert to integer
df['Zip'] = df['Zip'].astype(int)

In [143]:
df.Zip.head(5)

Inspection ID
2528320    60632
2528300    60640
2528284    60642
2528277    60647
2528273    60625
Name: Zip, dtype: int64