### Transpose and Stacking

In [4]:
import numpy as np

arr = np.arange(9).reshape(3,3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [5]:
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [6]:
arr.T

array([[0, 3, 6],
       [1, 4, 7],
       [2, 5, 8]])

In [7]:
a1 = np.arange(3)
a2 = np.arange(3,6)
a1,a2

(array([0, 1, 2]), array([3, 4, 5]))

In [8]:
np.vstack([a1, a2])

array([[0, 1, 2],
       [3, 4, 5]])

In [10]:
a1.shape, a2.shape

((3,), (3,))

In [11]:
a1.T

array([0, 1, 2])

In [9]:
np.hstack([a1.T, a2.T])

array([0, 1, 2, 3, 4, 5])

In [12]:
a1.reshape(-1,1)

array([[0],
       [1],
       [2]])

In [13]:
np.hstack([a1.reshape(-1,1), a2.reshape(-1,1)])

array([[0, 3],
       [1, 4],
       [2, 5]])

In [14]:
np.vstack([a1, a2]).T

array([[0, 3],
       [1, 4],
       [2, 5]])

In [None]:
a1.T.shape

### Boolean Indexing

In [15]:
d = np.array([1,2,-1,3,-3])

array([ 1,  2, -1,  3, -3])

In [16]:
d[np.array([True, False, False, True, False])]

array([1, 3])

In [17]:
(d < 0)

array([False, False,  True, False,  True])

In [18]:
d[d < 0]

array([-1, -3])

In [19]:
d[d < 0] = 0

In [20]:
d

array([1, 2, 0, 3, 0])

In [21]:
d = np.array([1,2,-1,3,-3])
d[d < 0] *= -1
d

array([1, 2, 1, 3, 3])

## Series Operations

In [22]:
import pandas as pd

In [30]:
s = pd.Series([1,4,9,16])

0     1
1     4
2     9
3    16
dtype: int64

In [31]:
s[1]

4

In [24]:
s.index

RangeIndex(start=0, stop=4, step=1)

In [25]:
list(s.index)

[0, 1, 2, 3]

In [26]:
s.values

array([ 1,  4,  9, 16])

In [27]:
s = pd.Series([1,4,9,16],index=['d','b','c','a'])

d     1
b     4
c     9
a    16
dtype: int64

In [28]:
s.index.values

array(['d', 'b', 'c', 'a'], dtype=object)

In [32]:
s = pd.Series([1,4,9,16],index=['d','b','c','d'])

d     1
b     4
c     9
d    16
dtype: int64

In [33]:
s['b']

4

In [34]:
s['d']

d     1
d    16
dtype: int64

In [35]:
states = pd.Series({'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000})

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [36]:
states['Ohio']

35000

In [37]:
states[1]

  states[1]


71000

In [38]:
s = pd.Series([3,4,1], index=[3,2,1])

3    3
2    4
1    1
dtype: int64

In [39]:
s[2]

4

In [40]:
s[0]

KeyError: 0

In [42]:
states

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [41]:
states2 = pd.Series({'California': None, 'Ohio': 35000, 'Oregon': 16000, 'Texas': 71000}) # , 'Utah': None})

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [43]:
states2.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [44]:
states2[states2.isnull()]

California   NaN
dtype: float64

In [46]:
states

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [47]:
states2

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [45]:
states_sum = states + states2

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [48]:
states.add(states2,fill_value=0)

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah            5000.0
dtype: float64

In [49]:
states4 = pd.Series([3000, 5000, 8000], index=["Texas", "Texas", "Ohio"])

Texas    3000
Texas    5000
Ohio     8000
dtype: int64

In [50]:
states3 = pd.Series([3000, None, 7000, 8000], index=["Texas", "Texas", "Texas", "Ohio"])

Texas    3000.0
Texas       NaN
Texas    7000.0
Ohio     8000.0
dtype: float64

In [51]:
states + states3

Ohio      43000.0
Oregon        NaN
Texas     74000.0
Texas         NaN
Texas     78000.0
Utah          NaN
dtype: float64

In [52]:
states3 + states4

Ohio     16000.0
Texas     6000.0
Texas     8000.0
Texas        NaN
Texas        NaN
Texas    10000.0
Texas    12000.0
dtype: float64

In [53]:
states_sum.index.name = 'state'
states_sum.name = 'population'

In [54]:
states_sum

state
California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
Name: population, dtype: float64

## Data Frames

In [55]:
df = pd.DataFrame({'state': ['Ohio','Ohio','Ohio','California'], 'count': [35, 40, 54, 102], 'year': [1980,1990,2000,2000]})

Unnamed: 0,state,count,year
0,Ohio,35,1980
1,Ohio,40,1990
2,Ohio,54,2000
3,California,102,2000


In [56]:
df.values

array([['Ohio', 35, 1980],
       ['Ohio', 40, 1990],
       ['Ohio', 54, 2000],
       ['California', 102, 2000]], dtype=object)

In [57]:
df

Unnamed: 0,state,count,year
0,Ohio,35,1980
1,Ohio,40,1990
2,Ohio,54,2000
3,California,102,2000


In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   state   4 non-null      object
 1   count   4 non-null      int64 
 2   year    4 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 228.0+ bytes


In [59]:
df

Unnamed: 0,state,count,year
0,Ohio,35,1980
1,Ohio,40,1990
2,Ohio,54,2000
3,California,102,2000


In [62]:
pd.Series([3,2,1,0],index=[3,2,1,0])

3    3
2    2
1    1
0    0
dtype: int64

In [60]:
# df['count2'] = np.array([3,2,1,0])
df['count2'] = pd.Series([3,2,1,0],index=[3,2,1,0])

In [61]:
df

Unnamed: 0,state,count,year,count2
0,Ohio,35,1980,0
1,Ohio,40,1990,1
2,Ohio,54,2000,2
3,California,102,2000,3


In [64]:
df['year']

0    1980
1    1990
2    2000
3    2000
Name: year, dtype: int64

In [65]:
df.year

0    1980
1    1990
2    2000
3    2000
Name: year, dtype: int64

### Penguins Data

In [67]:
import pandas as pd
df = pd.read_csv('penguins_lter.csv')

Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
0,PAL0708,1,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A1,Yes,11/11/07,39.1,18.7,181.0,3750.0,MALE,,,Not enough blood for isotopes.
1,PAL0708,2,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,11/11/07,39.5,17.4,186.0,3800.0,FEMALE,8.94956,-24.69454,
2,PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195.0,3250.0,FEMALE,8.36821,-25.33302,
3,PAL0708,4,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A2,Yes,11/16/07,,,,,,,,Adult not sampled.
4,PAL0708,5,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N3A1,Yes,11/16/07,36.7,19.3,193.0,3450.0,FEMALE,8.76651,-25.32426,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,PAL0910,120,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N38A2,No,12/1/09,,,,,,,,
340,PAL0910,121,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A1,Yes,11/22/09,46.8,14.3,215.0,4850.0,FEMALE,8.41151,-26.13832,
341,PAL0910,122,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A2,Yes,11/22/09,50.4,15.7,222.0,5750.0,MALE,8.30166,-26.04117,
342,PAL0910,123,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N43A1,Yes,11/22/09,45.2,14.8,212.0,5200.0,FEMALE,8.24246,-26.11969,


In [68]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   studyName            344 non-null    object 
 1   Sample Number        344 non-null    int64  
 2   Species              344 non-null    object 
 3   Region               344 non-null    object 
 4   Island               344 non-null    object 
 5   Stage                344 non-null    object 
 6   Individual ID        344 non-null    object 
 7   Clutch Completion    344 non-null    object 
 8   Date Egg             344 non-null    object 
 9   Culmen Length (mm)   342 non-null    float64
 10  Culmen Depth (mm)    342 non-null    float64
 11  Flipper Length (mm)  342 non-null    float64
 12  Body Mass (g)        342 non-null    float64
 13  Sex                  334 non-null    object 
 14  Delta 15 N (o/oo)    330 non-null    float64
 15  Delta 13 C (o/oo)    331 non-null    flo

In [69]:
df[df.Island == "Biscoe"]

Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
20,PAL0708,21,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N11A1,Yes,11/12/07,37.8,18.3,174.0,3400.0,FEMALE,8.73762,-25.09383,
21,PAL0708,22,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N11A2,Yes,11/12/07,37.7,18.7,180.0,3600.0,MALE,8.66271,-25.06390,
22,PAL0708,23,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N12A1,Yes,11/12/07,35.9,19.2,189.0,3800.0,FEMALE,9.22286,-25.03474,
23,PAL0708,24,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N12A2,Yes,11/12/07,38.2,18.1,185.0,3950.0,MALE,8.43423,-25.22664,
24,PAL0708,25,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N13A1,Yes,11/10/07,38.8,17.2,180.0,3800.0,MALE,9.63954,-25.29856,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,PAL0910,120,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N38A2,No,12/1/09,,,,,,,,
340,PAL0910,121,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A1,Yes,11/22/09,46.8,14.3,215.0,4850.0,FEMALE,8.41151,-26.13832,
341,PAL0910,122,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A2,Yes,11/22/09,50.4,15.7,222.0,5750.0,MALE,8.30166,-26.04117,
342,PAL0910,123,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N43A1,Yes,11/22/09,45.2,14.8,212.0,5200.0,FEMALE,8.24246,-26.11969,


In [70]:
df[df["Culmen Length (mm)"] > 40]

Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
2,PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195.0,3250.0,FEMALE,8.36821,-25.33302,
9,PAL0708,10,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N5A2,Yes,11/9/07,42.0,20.2,190.0,4250.0,,9.13362,-25.09368,No blood sample obtained for sexing.
12,PAL0708,13,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N7A1,Yes,11/15/07,41.1,17.6,182.0,3200.0,FEMALE,,,Not enough blood for isotopes.
17,PAL0708,18,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N9A2,Yes,11/12/07,42.5,20.7,197.0,4500.0,MALE,8.67538,-25.13993,
19,PAL0708,20,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N10A2,Yes,11/16/07,46.0,21.5,194.0,4200.0,MALE,9.11616,-24.77227,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,PAL0910,119,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N38A1,No,12/1/09,47.2,13.7,214.0,4925.0,FEMALE,7.99184,-26.20538,
340,PAL0910,121,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A1,Yes,11/22/09,46.8,14.3,215.0,4850.0,FEMALE,8.41151,-26.13832,
341,PAL0910,122,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A2,Yes,11/22/09,50.4,15.7,222.0,5750.0,MALE,8.30166,-26.04117,
342,PAL0910,123,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N43A1,Yes,11/22/09,45.2,14.8,212.0,5200.0,FEMALE,8.24246,-26.11969,


In [71]:
sdf = df.set_index("Sample Number").sort_index()

Unnamed: 0_level_0,studyName,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
Sample Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,PAL0708,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A1,Yes,11/11/07,39.1,18.7,181.0,3750.0,MALE,,,Not enough blood for isotopes.
1,PAL0708,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N31A1,Yes,11/27/07,46.1,13.2,211.0,4500.0,FEMALE,7.99300,-25.51390,
1,PAL0708,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N61A1,No,11/19/07,46.5,17.9,192.0,3500.0,FEMALE,9.03935,-24.30229,
2,PAL0708,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N61A2,No,11/19/07,50.0,19.5,196.0,3900.0,MALE,8.92069,-24.23592,
2,PAL0708,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N31A2,Yes,11/27/07,50.0,16.3,230.0,5700.0,MALE,8.14756,-25.39369,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148,PAL0910,Adelie Penguin (Pygoscelis adeliae),Anvers,Dream,"Adult, 1 Egg Stage",N83A2,Yes,11/13/09,36.6,18.4,184.0,3475.0,FEMALE,8.68744,-25.83060,
149,PAL0910,Adelie Penguin (Pygoscelis adeliae),Anvers,Dream,"Adult, 1 Egg Stage",N84A1,Yes,11/17/09,36.0,17.8,195.0,3450.0,FEMALE,8.94332,-25.79189,
150,PAL0910,Adelie Penguin (Pygoscelis adeliae),Anvers,Dream,"Adult, 1 Egg Stage",N84A2,Yes,11/17/09,37.8,18.1,193.0,3750.0,MALE,8.97533,-26.03495,
151,PAL0910,Adelie Penguin (Pygoscelis adeliae),Anvers,Dream,"Adult, 1 Egg Stage",N85A1,Yes,11/17/09,36.0,17.1,187.0,3700.0,FEMALE,8.93465,-26.07081,


In [72]:
sdf.index.unique()

Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,
       ...
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152],
      dtype='int64', name='Sample Number', length=152)

In [74]:
len(sdf)

344

In [76]:
df.loc[1:2]

Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
1,PAL0708,2,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,11/11/07,39.5,17.4,186.0,3800.0,FEMALE,8.94956,-24.69454,
2,PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195.0,3250.0,FEMALE,8.36821,-25.33302,


In [79]:
sdf.loc[1]

Unnamed: 0_level_0,studyName,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
Sample Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,PAL0708,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A1,Yes,11/11/07,39.1,18.7,181.0,3750.0,MALE,,,Not enough blood for isotopes.
1,PAL0708,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N31A1,Yes,11/27/07,46.1,13.2,211.0,4500.0,FEMALE,7.993,-25.5139,
1,PAL0708,Chinstrap penguin (Pygoscelis antarctica),Anvers,Dream,"Adult, 1 Egg Stage",N61A1,No,11/19/07,46.5,17.9,192.0,3500.0,FEMALE,9.03935,-24.30229,


In [80]:
sdf.iloc[1]

studyName                                        PAL0708
Species                Gentoo penguin (Pygoscelis papua)
Region                                            Anvers
Island                                            Biscoe
Stage                                 Adult, 1 Egg Stage
Individual ID                                      N31A1
Clutch Completion                                    Yes
Date Egg                                        11/27/07
Culmen Length (mm)                                  46.1
Culmen Depth (mm)                                   13.2
Flipper Length (mm)                                211.0
Body Mass (g)                                     4500.0
Sex                                               FEMALE
Delta 15 N (o/oo)                                  7.993
Delta 13 C (o/oo)                               -25.5139
Comments                                             NaN
Name: 1, dtype: object