python - advanced - 17 - Pandas




installing


note: the alternative and maybe the best way to install panda is to install `anaconda` says this site: https://pandas.pydata.org/docs/getting_started/install.html

list with index next to it

import pandas as pd
numbers = [20,30,40,50]
pandas_series=pd.Series(numbers)
print (pandas_series)

#
0 20 1 30 2 40 3 50

Kullanilabilecek veri websiteleri

index farkli olabilir:

import pandas as pd
numbers = [20,30,40,50]
letters=['a','b','c','d']
panda_series=pd.Series(numbers,letters)
# veya şöyle de yazılabilirdi:
# panda_series=pd.Series(numbers,['a','b','c','d'])
print (panda_series)

#
a 20 b 30 c 40 d 50

data series

import pandas as pd
s1 = pd.Series([3,2,0,1])
s2=pd.Series([0,3,7,2])
data =dict(apples =s1,oranges=s2)
df=pd.DataFrame(data)
print (df)

#
apples oranges 0 3 0 1 2 3 2 0 7 3 1 2

-baska metod ile data frame

import pandas as pd

data = [['Ahmet',60],['Yagmur',70],['Cinar',80]]
df = pd.DataFrame(data, columns =['Name','Grade'])
print(df)

#
Name Grade 0 Ahmet 60 1 Yagmur 70 2 Cinar 80

Dosyadan veri okuma

import pandas as pd

df = pd.read_csv('amazon_product.csv')
print(df)

#
Unnamed: 0 asin ... unit_price unit_count 0 0 B0BQ118F2T ... NaN NaN 1 1 B0CTD47P22 ... NaN NaN 2 2 B0CHH6X6H2 ... NaN Na

read_csv, read_json, read_excel haricinde database de var

Verilerden seçici okuma

import pandas as pd

# df = pd.read_csv('amazon_product.csv')
# print(df)

from numpy.random import randn

df = pd.DataFrame(randn(3,3),index=["A","B","C"], columns=["Col1","Col2","Col3"])
result1=df
result2=df["Col1"]
result3=df.loc["A"]
print(result1)
print(result2)
print(result3)

# result - 1 -
Col1 Col2 Col3 A 1.072875 -0.249325 0.129506 B 0.448577 -1.024742 0.668504 C 1.608097 0.862942 -1.874221
# result - 2 - columnn 1 A 1.072875 B 0.448577 C 1.608097
# result - 3 - Row A Name: Col1, dtype: float64 Col1 1.072875 Col2 -0.249325 Col3 0.129506

Dataframe filtreleme

ilk 5 kayit
import numpy as np

data = np.random.randint(10,100,75).reshape (15,5)
df=pd.DataFrame(data,columns=["Col1","Col2","Col3","Col4","Col5"])
result1=df
print(result1)
result2 = df.head(5) # ilk 5 kayit
print(result2)

#
Col1 Col2 Col3 Col4 Col5 0 54 53 86 25 84 1 22 95 79 23 39 2 76 62 63 22 53 3 10 20 43 65 67 4 15 58 16 83 30 5 81 15 58 31 32 6 27 55 57 37 14 7 47 47 23 95 24 8 10 14 99 28 16 9 19 75 94 54 85 10 57 26 64 15 69 11 99 88 22 10 76 12 37 73 19 23 73 13 18 47 44 93 60 14 89 85 10 42 32

# result - 2 - first 5 rows Col1 Col2 Col3 Col4 Col5 0 54 53 86 25 84 1 22 95 79 23 39 2 76 62 63 22 53 3 10 20 43 65 67 4 15 58 16 83 30

result2 = df.tail(2) #son 2 kayit

result3=df["Col1"].head(3) # sadece 'Col1' verileri ve ilk 3 satir
#
0 59 1 54 2 39

tum sutunlari listeleme

result = df.columns

data uzerinden filtreleme

result4=df>50 # sonucu satir ve sutunlarda True/False olarak verir

Col1 Col2 Col3 Col4 Col5 0 True False True False True 1 True True False False True

ornek - 2 -
result5=df[df>50] # datalari True ise sayi, False ise NaN olarak verir
#
Col1 Col2 Col3 Col4 Col5 0 NaN NaN NaN NaN NaN 1 96.0 NaN NaN 74.0 93.0

ornek 3
result6=df["Col1"]>50 #Col1`e odaklanarak baktiriyoruz
# true /false olarak gelir

ornek 4
result6=df[df["Col1"]>50][["Col1","Col2"]]
print(result6)

# hem sadece col1 deki >20 degerlere bak diyoruz + iki sutunu birden getir diyoruz
# not : Col1 1`de filtreye ugun satirlar geliyor

Col1 Col2 4 91 69 5 74 62 8 97 67 11 75 15 12 63 41 13 70 66 14 52 34

ornek 5 - 2 filtreli
result7=df[(df["Col1"]>50)&(df["Col1"]<=70)]
print(result7)
#
Col1 Col2 Col3 Col4 Col5 4 57 59 51 70 45 5 68 79 37 79 15 6 60 88 50 76 80 7 70 36 71 82 35 8 69 78 51 19 30 10 64 57 80 69 67 11 69 78 22 69 69 13 53 33 67 99 79

Groupby +count

import pandas as pd

df = pd.read_csv('movie_data.csv')

result=df.groupby("director_name")["director_name"].count()

print(result)


sql gibi inner join












Comments

Popular posts from this blog

python - pro - 20 - SQLite

python - pro - 21 - NoSQL

python - pro - 19 - MySQL