python - advanced 13 - 2 - Advanced modules 2
Beginner - Intermediate - Advanced Functions - Iterators -
Advanced modules-1 - Advanced modules-2 - data mining - selenium
Beautiful Soup Module
ilk once install edilmeli
pip beautifulsoup4
from bs4 import BeautifulSoup
html_doc = """
<HTML>
<head>
<!-- Created By Oruc Kenan Yildirim -->
<meta name="description" content="This Site is about Architecture and Consulting. It contains Project Designing, Project Management and Business Consulting Systems">
<meta name="keywords" content="mimari,mimar,mimarlik,mimarlık,mimarlar,danismanlik,insaat,ankara,turkiye,istanbul,antalya,kocaeli,gebze,cankaya,gaziosmanpasa,g.o.p.,ayranci,ticaret,boya,sove,dis,cephe,kaplama,izolasyon,mermer,seramik,dekorasyon,tadilat,banyo dolabı,mutfak dolabı,çelik kapı,amerikan kapı,panel,ahşap,PVC kapı,duş kabinleri,küvet,duş teknesi,lavabo,fayans,mermer,seramik,granit uygulamaları,laminat parke,lamine parke,ahşap parke,ahşap doğtama fiti,ferforje demir doğrama,PVC doğrama,su tesisatı,elektrik tesisatı,tasarım, tasarim, avan, proje, uygulama, construction, contractor ,mekanik, mechanical, elektrik, project, atölye, atolye, studio, studyo, concept, design, ev, konut, resident, housing, restoran, restaurant, bina, office, ofis, factory, fabrika, liman, port, akaryakıt, akaryakit, istasyon, station, furniting, gas, mcdonalds, kemer, country, istanbul, kulüp, klüp, clubhouse, club, fritolay, mars, bingazi, beysu, selimiye, marmaris, apart, hotel, sheraton, taşkent, tashkent, pavaletskaya, kızıl, kizil, tepe, red, hill, aktau, kazakistan, kazakhistan, demirbank, türkpetrol, turkpetrol, castrol, toprak, ilaç, pharmacy, eczane, ykk, yapikredikoray, enka, entes, koray, tekfen, akbank, tofaş, tofas, ditas, ditaş, belediye, yenibosna, gün, gülfem, alev, karaer, osman, elliiki,architecture,design,construction,business,Türk, Turkish,Architect,Turkey,Türkiye,Y.Mimar,Yüksek Mimar,Turkey,Camii,Terrace,Rezidans"
</head>
<title>Outside Universe Design & Consulting</title>
<BODY>
<div id="menu">
<ul>
<li>
<h2>TURKCE</h2>
</li>
<li>
<h2>ENGLISH</h2>
</li>
</ul>
</div>
<div id="Layer13">Persembe 18-02-10 - tadilat basladi - Feb 18th Thursday 2010 - Renovation works begun
</div>
<div id="Layer16">
<div align="center" class="text style3">
OUTSIDE UNIVERSE
<br>Design & Consulting <br> Tasarim ve Danismanlik
</div>
</div>
</BODY>
</HTML>
"""
soup = BeautifulSoup(html_doc,'html.parser') # parse tipi: html
result = soup.prettify()
result = soup.title
result2 = soup.title.string # Outside Universe Design & Consulting
result2 = soup.h2.string #TURKCE
result = soup.findAll('h2') # [<h2>TURKCE</h2>, <h2>ENGLISH</h2>]
result = soup.findAll('h2')[0] #<h2>TURKCE</h2>
result=soup.findAll('div')[0].ul.findAll('li')[0]
#
<li>
<h2>TURKCE</h2>
</li>
#tum div`lerden array 0`i ve icindeki,
# ilk ul`un icindeki
# tum li`lerden 0 array`dekini al
print(result)
beautif soup ile web scr`e gecis yapilabilir
Comments
Post a Comment