python - advanced - 17 - Pandas
installing
pip install pandas - look at: https://pandas.pydata.org/docs/getting_started/install.html
note: the alternative and maybe the best way to install panda is to install `anaconda` says this site: https://pandas.pydata.org/docs/getting_started/install.html
list with index next to it
import pandas as pd
numbers = [20,30,40,50]
pandas_series=pd.Series(numbers)
print (pandas_series)
#
0 20
1 30
2 40
3 50
Kullanilabilecek veri websiteleri
index farkli olabilir:
import pandas as pd
numbers = [20,30,40,50]
letters=['a','b','c','d']
panda_series=pd.Series(numbers,letters)
# veya şöyle de yazılabilirdi:
# panda_series=pd.Series(numbers,['a','b','c','d'])
print (panda_series)
#
a 20
b 30
c 40
d 50
data series
import pandas as pd
s1 = pd.Series([3,2,0,1])
s2=pd.Series([0,3,7,2])
data =dict(apples =s1,oranges=s2)
df=pd.DataFrame(data)
print (df)
#
apples oranges
0 3 0
1 2 3
2 0 7
3 1 2
-baska metod ile data frame
import pandas as pd
data = [['Ahmet',60],['Yagmur',70],['Cinar',80]]
df = pd.DataFrame(data, columns =['Name','Grade'])
print(df)
#
Name Grade
0 Ahmet 60
1 Yagmur 70
2 Cinar 80
Dosyadan veri okuma
import pandas as pd
df = pd.read_csv('amazon_product.csv')
print(df)
#
Unnamed: 0 asin ... unit_price unit_count
0 0 B0BQ118F2T ... NaN NaN
1 1 B0CTD47P22 ... NaN NaN
2 2 B0CHH6X6H2 ... NaN Na
read_csv, read_json, read_excel haricinde database de var
Verilerden seçici okuma
import pandas as pd
# df = pd.read_csv('amazon_product.csv')
# print(df)
from numpy.random import randn
df = pd.DataFrame(randn(3,3),index=["A","B","C"], columns=["Col1","Col2","Col3"])
result1=df
result2=df["Col1"]
result3=df.loc["A"]
print(result1)
print(result2)
print(result3)
# result - 1 -
Col1 Col2 Col3
A 1.072875 -0.249325 0.129506
B 0.448577 -1.024742 0.668504
C 1.608097 0.862942 -1.874221
# result - 2 - columnn 1
A 1.072875
B 0.448577
C 1.608097
# result - 3 - Row A
Name: Col1, dtype: float64
Col1 1.072875
Col2 -0.249325
Col3 0.129506
Dataframe filtreleme
ilk 5 kayit
import numpy as np
data = np.random.randint(10,100,75).reshape (15,5)
df=pd.DataFrame(data,columns=["Col1","Col2","Col3","Col4","Col5"])
result1=df
print(result1)
result2 = df.head(5) # ilk 5 kayit
print(result2)
#
Col1 Col2 Col3 Col4 Col5
0 54 53 86 25 84
1 22 95 79 23 39
2 76 62 63 22 53
3 10 20 43 65 67
4 15 58 16 83 30
5 81 15 58 31 32
6 27 55 57 37 14
7 47 47 23 95 24
8 10 14 99 28 16
9 19 75 94 54 85
10 57 26 64 15 69
11 99 88 22 10 76
12 37 73 19 23 73
13 18 47 44 93 60
14 89 85 10 42 32
# result - 2 - first 5 rows
Col1 Col2 Col3 Col4 Col5
0 54 53 86 25 84
1 22 95 79 23 39
2 76 62 63 22 53
3 10 20 43 65 67
4 15 58 16 83 30
result2 = df.tail(2) #son 2 kayit
result3=df["Col1"].head(3) # sadece 'Col1' verileri ve ilk 3 satir
#
0 59
1 54
2 39
tum sutunlari listeleme
result = df.columns
data uzerinden filtreleme
result4=df>50 # sonucu satir ve sutunlarda True/False olarak verir
Col1 Col2 Col3 Col4 Col5
0 True False True False True
1 True True False False True
ornek - 2 -
result5=df[df>50] # datalari True ise sayi, False ise NaN olarak verir
#
Col1 Col2 Col3 Col4 Col5
0 NaN NaN NaN NaN NaN
1 96.0 NaN NaN 74.0 93.0
ornek 3
result6=df["Col1"]>50 #Col1`e odaklanarak baktiriyoruz
# true /false olarak gelir
ornek 4
result6=df[df["Col1"]>50][["Col1","Col2"]]
print(result6)
# hem sadece col1 deki >20 degerlere bak diyoruz + iki sutunu birden getir diyoruz
# not : Col1 1`de filtreye ugun satirlar geliyor
Col1 Col2
4 91 69
5 74 62
8 97 67
11 75 15
12 63 41
13 70 66
14 52 34
ornek 5 - 2 filtreli
result7=df[(df["Col1"]>50)&(df["Col1"]<=70)]
print(result7)
# Col1 Col2 Col3 Col4 Col5
4 57 59 51 70 45
5 68 79 37 79 15
6 60 88 50 76 80
7 70 36 71 82 35
8 69 78 51 19 30
10 64 57 80 69 67
11 69 78 22 69 69
13 53 33 67 99 79
Groupby +count
import pandas as pd
df = pd.read_csv('movie_data.csv')
result=df.groupby("director_name")["director_name"].count()
print(result)
sql gibi inner join
Comments
Post a Comment