### Array Transformations

In [1]:
import numpy as np

In [2]:
arr = np.arange(16).reshape(4,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [3]:
arr.resize(8,2)

In [4]:
arr

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15]])

In [5]:
barr = arr.reshape(8,2)

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15]])

In [6]:
barr[:3,:] = -1

In [7]:
barr

array([[-1, -1],
       [-1, -1],
       [-1, -1],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15]])

In [8]:
arr

array([[-1, -1],
       [-1, -1],
       [-1, -1],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15]])

In [9]:
arr = np.arange(16).reshape(4,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [10]:
flat_arr = arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [11]:
flat_arr[:4] = -1

In [12]:
flat_arr

array([-1, -1, -1, -1,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [13]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [14]:
carr = arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [15]:
carr[:4] = -1
carr

array([-1, -1, -1, -1,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [16]:
arr

array([[-1, -1, -1, -1],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

### Transpose and Stacking

In [17]:
import numpy as np

arr = np.arange(9).reshape(3,3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [18]:
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [19]:
arr.T

array([[0, 3, 6],
       [1, 4, 7],
       [2, 5, 8]])

In [20]:
a1 = np.arange(3)
a2 = np.arange(3,6)
a1,a2

(array([0, 1, 2]), array([3, 4, 5]))

In [21]:
np.vstack([a1, a2])

array([[0, 1, 2],
       [3, 4, 5]])

In [22]:
a1.shape, a2.shape

((3,), (3,))

In [24]:
a1.T

array([0, 1, 2])

In [23]:
np.hstack([a1.T, a2.T])

array([0, 1, 2, 3, 4, 5])

In [25]:
a1.reshape(-1,1)

array([[0],
       [1],
       [2]])

In [26]:
np.hstack([a1.reshape(-1,1), a2.reshape(-1,1)])

array([[0, 3],
       [1, 4],
       [2, 5]])

In [27]:
np.vstack([a1, a2]).T

array([[0, 3],
       [1, 4],
       [2, 5]])

In [29]:
a1.shape, a1.T.shape

((3,), (3,))

### Boolean Indexing

In [30]:
d = np.array([1,2,-1,3,-3])

array([ 1,  2, -1,  3, -3])

In [31]:
d[np.array([True, False, False, True, False])]

array([1, 3])

In [32]:
(d < 0)

array([False, False,  True, False,  True])

In [33]:
d[d < 0]

array([-1, -3])

In [34]:
d[d < 0] = 0

In [35]:
d

array([1, 2, 0, 3, 0])

In [36]:
d = np.array([1,2,-1,3,-3])
d[d < 0] *= -1
d

array([1, 2, 1, 3, 3])

### Series

##### polars

In [37]:
import polars as pl

s = pl.Series([1,2,3])

1
2
3


In [38]:
s = pl.Series('num', [1,2,3])

num
i64
1
2
3


In [39]:
s = pl.Series('num', [1,2,3], dtype=pl.Float64)

num
f64
1.0
2.0
3.0


In [40]:
s[0]

1.0

##### pandas

In [41]:
import pandas as pd

t = pd.Series([1,2,3])

0    1
1    2
2    3
dtype: int64

In [42]:
t = pd.Series([1,2,3], name='num')

0    1
1    2
2    3
Name: num, dtype: int64

In [43]:
t = pd.Series([1,2,3], name='num', dtype='float')

0    1.0
1    2.0
2    3.0
Name: num, dtype: float64

In [44]:
t = pd.Series([1,2,3], name='num', dtype='float64')

0    1.0
1    2.0
2    3.0
Name: num, dtype: float64

In [45]:
t[0]

np.float64(1.0)

In [46]:
t.index

RangeIndex(start=0, stop=3, step=1)

In [47]:
list(t.index)

[0, 1, 2]

In [48]:
t = pd.Series([1,2,3],['a','b','c'])

a    1
b    2
c    3
dtype: int64

In [49]:
t = pd.Series([1,2,3],index=['a','b','c'])

a    1
b    2
c    3
dtype: int64

In [50]:
t = pd.Series({'a': 1, 'b': 2, 'c': 3})

a    1
b    2
c    3
dtype: int64

In [51]:
t['a']

np.int64(1)

In [52]:
t[0]

  t[0]


np.int64(1)

In [53]:
t.iloc[0]

np.int64(1)

#### Operations

##### polars

In [54]:
s + pl.Series([1,2,3])

num
f64
2.0
4.0
6.0


In [55]:
s + pl.Series([1,2,3,4])

InvalidOperationError: cannot do arithmetic operation on series of different lengths: got 3 and 4

In [56]:
s + 4

num
f64
5.0
6.0
7.0


In [57]:
pl.Series(['a','b']) + pl.Series(['c','d'])

"""ac"""
"""bd"""


##### pandas

In [59]:
pd.Series([1,2,3]) + pd.Series([1,2,3])

0    2
1    4
2    6
dtype: int64

In [60]:
t

a    1
b    2
c    3
dtype: int64

In [58]:
t + pd.Series([1,2,3])

a   NaN
b   NaN
c   NaN
0   NaN
1   NaN
2   NaN
dtype: float64

In [64]:
pd.Series([1,2,3],index=list('cba'))

c    1
b    2
a    3
dtype: int64

In [62]:
t + pd.Series([1,2,3],index=list('cba'))

a    4
b    4
c    4
dtype: int64

In [65]:
pd.Series(['a','b']) + pd.Series(['c','d'])

0    ac
1    bd
dtype: object

### Data Frames

In [66]:
df = pl.DataFrame({'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada'],
                'year': [2000, 2001, 2002, 2001],
                'pop': [1.5, 1.7, 3.6, 2.4]})

state,year,pop
str,i64,f64
"""Ohio""",2000,1.5
"""Ohio""",2001,1.7
"""Ohio""",2002,3.6
"""Nevada""",2001,2.4


In [67]:
df = pl.DataFrame([pl.Series('state', ['Ohio', 'Ohio', 'Ohio', 'Nevada']),
                pl.Series('year', [2000, 2001, 2002, 2001]),
                pl.Series('pop', [1.5, 1.7, 3.6, 2.4])])

state,year,pop
str,i64,f64
"""Ohio""",2000,1.5
"""Ohio""",2001,1.7
"""Ohio""",2002,3.6
"""Nevada""",2001,2.4


In [68]:
dfa = pd.DataFrame({'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada'],
                  'year': [2000, 2001, 2002, 2001],
                  'pop': [1.5, 1.7, 3.6, 2.4]})

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4


In [69]:
dfa = pd.DataFrame([pd.Series(['Ohio', 'Ohio', 'Ohio', 'Nevada'], name='state'),
                pd.Series([2000, 2001, 2002, 2001], name='year'),
                pd.Series([1.5, 1.7, 3.6, 2.4], name='pop')])

Unnamed: 0,0,1,2,3
state,Ohio,Ohio,Ohio,Nevada
year,2000,2001,2002,2001
pop,1.5,1.7,3.6,2.4


In [70]:
dfa.T

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4


In [71]:
dfa = pd.DataFrame({'state': pd.Series(['Ohio', 'Ohio', 'Ohio', 'Nevada']),
                'year': pd.Series([2000, 2001, 2002, 2001]),
                'pop': pd.Series([1.5, 1.7, 3.6, 2.4])})

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4


In [72]:
dfa = pd.DataFrame({'state': pd.Series(['Ohio', 'Ohio', 'Ohio', 'Nevada']),
                'year': pd.Series([2000, 2001, 2002, 2001]),
                'pop': pd.Series([1.5, 1.7, 3.6, 2.4])})
dfa = dfa.set_index('state')

Unnamed: 0_level_0,year,pop
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Ohio,2000,1.5
Ohio,2001,1.7
Ohio,2002,3.6
Nevada,2001,2.4


In [76]:
dfa = pd.DataFrame({'state': pd.Series(['Ohio', 'Ohio', 'Ohio', 'Nevada']),
                'year': pd.Series([2000, 2001, 2002, 2001]),
                'pop': pd.Series([1.5, 1.7, 3.6, 2.4])})

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4


#### Accessing a Column

##### polars

In [73]:
df['state']

state
str
"""Ohio"""
"""Ohio"""
"""Ohio"""
"""Nevada"""


In [74]:
df.state

AttributeError: 'DataFrame' object has no attribute 'state'

##### pandas

In [77]:
dfa['state']

0      Ohio
1      Ohio
2      Ohio
3    Nevada
Name: state, dtype: object

In [78]:
dfa.state

0      Ohio
1      Ohio
2      Ohio
3    Nevada
Name: state, dtype: object

#### Assigning to a Column

##### polars

In [79]:
df['state'] = ['Ohio', 'Ohio', 'Texas','Nevada']

TypeError: DataFrame object does not support `Series` assignment by index

Use `DataFrame.with_columns`.

In [80]:
df.with_columns(pl.Series('state',['Ohio', 'Ohio', 'Texas','Nevada']))

state,year,pop
str,i64,f64
"""Ohio""",2000,1.5
"""Ohio""",2001,1.7
"""Texas""",2002,3.6
"""Nevada""",2001,2.4


In [81]:
df # does not change!

state,year,pop
str,i64,f64
"""Ohio""",2000,1.5
"""Ohio""",2001,1.7
"""Ohio""",2002,3.6
"""Nevada""",2001,2.4


In [82]:
df.select('state','year')

state,year
str,i64
"""Ohio""",2000
"""Ohio""",2001
"""Ohio""",2002
"""Nevada""",2001


##### pandas

In [83]:
dfa['state'] = ['Ohio', 'Ohio', 'Texas','Nevada'] # not recommended!
dfa

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Texas,2002,3.6
3,Nevada,2001,2.4


In [84]:
dfa.assign(state=['Ohio','Ohio','Illinois','Nevada'])

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Illinois,2002,3.6
3,Nevada,2001,2.4


In [85]:
dfa # does not change!

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Texas,2002,3.6
3,Nevada,2001,2.4


In [86]:
dfa[['state','year']]

Unnamed: 0,state,year
0,Ohio,2000
1,Ohio,2001
2,Texas,2002
3,Nevada,2001


#### Indexing

##### polars

In [87]:
df[0]

state,year,pop
str,i64,f64
"""Ohio""",2000,1.5


In [89]:
df[0:1]

state,year,pop
str,i64,f64
"""Ohio""",2000,1.5


In [88]:
df[0:2]

state,year,pop
str,i64,f64
"""Ohio""",2000,1.5
"""Ohio""",2001,1.7


##### pandas

In [90]:
dfa[0]

KeyError: 0

In [91]:
dfa.loc[0]

state    Ohio
year     2000
pop       1.5
Name: 0, dtype: object

In [92]:
dfa[0:2]

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7


In [93]:
dfi = dfa.set_index('state')

Unnamed: 0_level_0,year,pop
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Ohio,2000,1.5
Ohio,2001,1.7
Texas,2002,3.6
Nevada,2001,2.4


In [94]:
dfi

Unnamed: 0_level_0,year,pop
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Ohio,2000,1.5
Ohio,2001,1.7
Texas,2002,3.6
Nevada,2001,2.4


In [96]:
dfi.loc['Ohio']

Unnamed: 0_level_0,year,pop
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Ohio,2000,1.5
Ohio,2001,1.7


In [97]:
dfi.iloc[:2]

Unnamed: 0_level_0,year,pop
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Ohio,2000,1.5
Ohio,2001,1.7


In [98]:
dfi.loc['Texas']

year    2002.0
pop        3.6
Name: Texas, dtype: float64

In [99]:
dfi.loc['Ohio':'Texas'] # inclusive!

Unnamed: 0_level_0,year,pop
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Ohio,2000,1.5
Ohio,2001,1.7
Texas,2002,3.6


In [100]:
dfi.iloc[:2] # exclusive

Unnamed: 0_level_0,year,pop
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Ohio,2000,1.5
Ohio,2001,1.7


#### Filtering

In [101]:
df['pop'] > 2

pop
bool
False
False
True
True


In [102]:
df[df['pop'] > 2]

ValueError: expected 3 values when selecting columns by boolean mask, got 4

In [103]:
df.filter(pl.col('pop') > 2)

state,year,pop
str,i64,f64
"""Ohio""",2002,3.6
"""Nevada""",2001,2.4


In [104]:
dfa['pop'] > 2

0    False
1    False
2     True
3     True
Name: pop, dtype: bool

In [105]:
dfa[dfa['pop'] > 2]

Unnamed: 0,state,year,pop
2,Texas,2002,3.6
3,Nevada,2001,2.4


In [106]:
dfa.query('pop > 2')

Unnamed: 0,state,year,pop
2,Texas,2002,3.6
3,Nevada,2001,2.4


#### Sorting 

In [107]:
df.sort('pop')

state,year,pop
str,i64,f64
"""Ohio""",2000,1.5
"""Ohio""",2001,1.7
"""Nevada""",2001,2.4
"""Ohio""",2002,3.6


In [108]:
dfa.sort_values('pop')

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
3,Nevada,2001,2.4
2,Texas,2002,3.6


In [110]:
dfi.sort_index()

Unnamed: 0_level_0,year,pop
state,Unnamed: 1_level_1,Unnamed: 2_level_1
Nevada,2001,2.4
Ohio,2000,1.5
Ohio,2001,1.7
Texas,2002,3.6


#### Describe

In [111]:
df.describe()

statistic,state,year,pop
str,str,f64,f64
"""count""","""4""",4.0,4.0
"""null_count""","""0""",0.0,0.0
"""mean""",,2001.0,2.3
"""std""",,0.816497,0.948683
"""min""","""Nevada""",2000.0,1.5
"""25%""",,2001.0,1.7
"""50%""",,2001.0,2.4
"""75%""",,2001.0,2.4
"""max""","""Ohio""",2002.0,3.6


In [112]:
dfa.describe()

Unnamed: 0,year,pop
count,4.0,4.0
mean,2001.0,2.3
std,0.816497,0.948683
min,2000.0,1.5
25%,2000.75,1.65
50%,2001.0,2.05
75%,2001.25,2.7
max,2002.0,3.6


In [113]:
dfa.state.describe()

count        4
unique       3
top       Ohio
freq         2
Name: state, dtype: object

### Penguins Data

In [114]:
import polars as pl
df = pl.read_csv('penguins_lter.csv')

studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
str,i64,str,str,str,str,str,str,str,f64,f64,i64,i64,str,f64,f64,str
"""PAL0708""",1,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N1A1""","""Yes""","""11/11/07""",39.1,18.7,181,3750,"""MALE""",,,"""Not enough blood for isotopes."""
"""PAL0708""",2,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N1A2""","""Yes""","""11/11/07""",39.5,17.4,186,3800,"""FEMALE""",8.94956,-24.69454,
"""PAL0708""",3,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N2A1""","""Yes""","""11/16/07""",40.3,18.0,195,3250,"""FEMALE""",8.36821,-25.33302,
"""PAL0708""",4,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N2A2""","""Yes""","""11/16/07""",,,,,,,,"""Adult not sampled."""
"""PAL0708""",5,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N3A1""","""Yes""","""11/16/07""",36.7,19.3,193,3450,"""FEMALE""",8.76651,-25.32426,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""PAL0910""",120,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N38A2""","""No""","""12/1/09""",,,,,,,,
"""PAL0910""",121,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N39A1""","""Yes""","""11/22/09""",46.8,14.3,215,4850,"""FEMALE""",8.41151,-26.13832,
"""PAL0910""",122,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N39A2""","""Yes""","""11/22/09""",50.4,15.7,222,5750,"""MALE""",8.30166,-26.04117,
"""PAL0910""",123,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N43A1""","""Yes""","""11/22/09""",45.2,14.8,212,5200,"""FEMALE""",8.24246,-26.11969,


In [116]:
df.select(
 'Sample Number',
 'Species',
 'Region',
 'Island')

Sample Number,Species,Region,Island
i64,str,str,str
1,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen"""
2,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen"""
3,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen"""
4,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen"""
5,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen"""
…,…,…,…
120,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe"""
121,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe"""
122,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe"""
123,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe"""


In [117]:
df['Species'][341]

'Gentoo penguin (Pygoscelis papua)'

In [118]:
df.schema

Schema([('studyName', String),
        ('Sample Number', Int64),
        ('Species', String),
        ('Region', String),
        ('Island', String),
        ('Stage', String),
        ('Individual ID', String),
        ('Clutch Completion', String),
        ('Date Egg', String),
        ('Culmen Length (mm)', Float64),
        ('Culmen Depth (mm)', Float64),
        ('Flipper Length (mm)', Int64),
        ('Body Mass (g)', Int64),
        ('Sex', String),
        ('Delta 15 N (o/oo)', Float64),
        ('Delta 13 C (o/oo)', Float64),
        ('Comments', String)])

In [119]:
df.filter(pl.col('Island') == "Biscoe")

studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
str,i64,str,str,str,str,str,str,str,f64,f64,i64,i64,str,f64,f64,str
"""PAL0708""",21,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N11A1""","""Yes""","""11/12/07""",37.8,18.3,174,3400,"""FEMALE""",8.73762,-25.09383,
"""PAL0708""",22,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N11A2""","""Yes""","""11/12/07""",37.7,18.7,180,3600,"""MALE""",8.66271,-25.0639,
"""PAL0708""",23,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N12A1""","""Yes""","""11/12/07""",35.9,19.2,189,3800,"""FEMALE""",9.22286,-25.03474,
"""PAL0708""",24,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N12A2""","""Yes""","""11/12/07""",38.2,18.1,185,3950,"""MALE""",8.43423,-25.22664,
"""PAL0708""",25,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N13A1""","""Yes""","""11/10/07""",38.8,17.2,180,3800,"""MALE""",9.63954,-25.29856,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""PAL0910""",120,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N38A2""","""No""","""12/1/09""",,,,,,,,
"""PAL0910""",121,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N39A1""","""Yes""","""11/22/09""",46.8,14.3,215,4850,"""FEMALE""",8.41151,-26.13832,
"""PAL0910""",122,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N39A2""","""Yes""","""11/22/09""",50.4,15.7,222,5750,"""MALE""",8.30166,-26.04117,
"""PAL0910""",123,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N43A1""","""Yes""","""11/22/09""",45.2,14.8,212,5200,"""FEMALE""",8.24246,-26.11969,


In [120]:
df.filter(pl.col("Culmen Length (mm)") > 40)

studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
str,i64,str,str,str,str,str,str,str,f64,f64,i64,i64,str,f64,f64,str
"""PAL0708""",3,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N2A1""","""Yes""","""11/16/07""",40.3,18.0,195,3250,"""FEMALE""",8.36821,-25.33302,
"""PAL0708""",10,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N5A2""","""Yes""","""11/9/07""",42.0,20.2,190,4250,,9.13362,-25.09368,"""No blood sample obtained for s…"
"""PAL0708""",13,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N7A1""","""Yes""","""11/15/07""",41.1,17.6,182,3200,"""FEMALE""",,,"""Not enough blood for isotopes."""
"""PAL0708""",18,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N9A2""","""Yes""","""11/12/07""",42.5,20.7,197,4500,"""MALE""",8.67538,-25.13993,
"""PAL0708""",20,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N10A2""","""Yes""","""11/16/07""",46.0,21.5,194,4200,"""MALE""",9.11616,-24.77227,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""PAL0910""",119,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N38A1""","""No""","""12/1/09""",47.2,13.7,214,4925,"""FEMALE""",7.99184,-26.20538,
"""PAL0910""",121,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N39A1""","""Yes""","""11/22/09""",46.8,14.3,215,4850,"""FEMALE""",8.41151,-26.13832,
"""PAL0910""",122,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N39A2""","""Yes""","""11/22/09""",50.4,15.7,222,5750,"""MALE""",8.30166,-26.04117,
"""PAL0910""",123,"""Gentoo penguin (Pygoscelis pap…","""Anvers""","""Biscoe""","""Adult, 1 Egg Stage""","""N43A1""","""Yes""","""11/22/09""",45.2,14.8,212,5200,"""FEMALE""",8.24246,-26.11969,


In [121]:
df['Island'].unique()

Island
str
"""Dream"""
"""Biscoe"""
"""Torgersen"""


In [122]:
df['Island'].value_counts()

Island,count
str,u32
"""Torgersen""",52
"""Dream""",124
"""Biscoe""",168


In [123]:
df.group_by('Island').agg(pl.col('Culmen Length (mm)').mean())

Island,Culmen Length (mm)
str,f64
"""Dream""",44.167742
"""Biscoe""",45.257485
"""Torgersen""",38.95098


##### pandas

In [124]:
import pandas as pd
dfa = pd.read_csv('penguins_lter.csv')

Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
0,PAL0708,1,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A1,Yes,11/11/07,39.1,18.7,181.0,3750.0,MALE,,,Not enough blood for isotopes.
1,PAL0708,2,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N1A2,Yes,11/11/07,39.5,17.4,186.0,3800.0,FEMALE,8.94956,-24.69454,
2,PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195.0,3250.0,FEMALE,8.36821,-25.33302,
3,PAL0708,4,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A2,Yes,11/16/07,,,,,,,,Adult not sampled.
4,PAL0708,5,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N3A1,Yes,11/16/07,36.7,19.3,193.0,3450.0,FEMALE,8.76651,-25.32426,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,PAL0910,120,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N38A2,No,12/1/09,,,,,,,,
340,PAL0910,121,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A1,Yes,11/22/09,46.8,14.3,215.0,4850.0,FEMALE,8.41151,-26.13832,
341,PAL0910,122,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A2,Yes,11/22/09,50.4,15.7,222.0,5750.0,MALE,8.30166,-26.04117,
342,PAL0910,123,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N43A1,Yes,11/22/09,45.2,14.8,212.0,5200.0,FEMALE,8.24246,-26.11969,


In [125]:
dfa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   studyName            344 non-null    object 
 1   Sample Number        344 non-null    int64  
 2   Species              344 non-null    object 
 3   Region               344 non-null    object 
 4   Island               344 non-null    object 
 5   Stage                344 non-null    object 
 6   Individual ID        344 non-null    object 
 7   Clutch Completion    344 non-null    object 
 8   Date Egg             344 non-null    object 
 9   Culmen Length (mm)   342 non-null    float64
 10  Culmen Depth (mm)    342 non-null    float64
 11  Flipper Length (mm)  342 non-null    float64
 12  Body Mass (g)        342 non-null    float64
 13  Sex                  334 non-null    object 
 14  Delta 15 N (o/oo)    330 non-null    float64
 15  Delta 13 C (o/oo)    331 non-null    flo

In [126]:
dfa[dfa.Island == 'Biscoe']

Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
20,PAL0708,21,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N11A1,Yes,11/12/07,37.8,18.3,174.0,3400.0,FEMALE,8.73762,-25.09383,
21,PAL0708,22,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N11A2,Yes,11/12/07,37.7,18.7,180.0,3600.0,MALE,8.66271,-25.06390,
22,PAL0708,23,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N12A1,Yes,11/12/07,35.9,19.2,189.0,3800.0,FEMALE,9.22286,-25.03474,
23,PAL0708,24,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N12A2,Yes,11/12/07,38.2,18.1,185.0,3950.0,MALE,8.43423,-25.22664,
24,PAL0708,25,Adelie Penguin (Pygoscelis adeliae),Anvers,Biscoe,"Adult, 1 Egg Stage",N13A1,Yes,11/10/07,38.8,17.2,180.0,3800.0,MALE,9.63954,-25.29856,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,PAL0910,120,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N38A2,No,12/1/09,,,,,,,,
340,PAL0910,121,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A1,Yes,11/22/09,46.8,14.3,215.0,4850.0,FEMALE,8.41151,-26.13832,
341,PAL0910,122,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A2,Yes,11/22/09,50.4,15.7,222.0,5750.0,MALE,8.30166,-26.04117,
342,PAL0910,123,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N43A1,Yes,11/22/09,45.2,14.8,212.0,5200.0,FEMALE,8.24246,-26.11969,


In [127]:
dfa.query('`Culmen Length (mm)` > 40')

Unnamed: 0,studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
2,PAL0708,3,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N2A1,Yes,11/16/07,40.3,18.0,195.0,3250.0,FEMALE,8.36821,-25.33302,
9,PAL0708,10,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N5A2,Yes,11/9/07,42.0,20.2,190.0,4250.0,,9.13362,-25.09368,No blood sample obtained for sexing.
12,PAL0708,13,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N7A1,Yes,11/15/07,41.1,17.6,182.0,3200.0,FEMALE,,,Not enough blood for isotopes.
17,PAL0708,18,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N9A2,Yes,11/12/07,42.5,20.7,197.0,4500.0,MALE,8.67538,-25.13993,
19,PAL0708,20,Adelie Penguin (Pygoscelis adeliae),Anvers,Torgersen,"Adult, 1 Egg Stage",N10A2,Yes,11/16/07,46.0,21.5,194.0,4200.0,MALE,9.11616,-24.77227,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,PAL0910,119,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N38A1,No,12/1/09,47.2,13.7,214.0,4925.0,FEMALE,7.99184,-26.20538,
340,PAL0910,121,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A1,Yes,11/22/09,46.8,14.3,215.0,4850.0,FEMALE,8.41151,-26.13832,
341,PAL0910,122,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N39A2,Yes,11/22/09,50.4,15.7,222.0,5750.0,MALE,8.30166,-26.04117,
342,PAL0910,123,Gentoo penguin (Pygoscelis papua),Anvers,Biscoe,"Adult, 1 Egg Stage",N43A1,Yes,11/22/09,45.2,14.8,212.0,5200.0,FEMALE,8.24246,-26.11969,


In [128]:
dfa.Island.unique()

array(['Torgersen', 'Biscoe', 'Dream'], dtype=object)

In [129]:
dfa['Island'].value_counts()

Island
Biscoe       168
Dream        124
Torgersen     52
Name: count, dtype: int64

In [130]:
dfa.groupby('Island')['Culmen Length (mm)'].mean()

Island
Biscoe       45.257485
Dream        44.167742
Torgersen    38.950980
Name: Culmen Length (mm), dtype: float64