Skip to content

Instantly share code, notes, and snippets.

@emsi
Last active June 9, 2016 21:28
Show Gist options
  • Save emsi/e17219257a3733e95c4a4c33d21ea5a6 to your computer and use it in GitHub Desktop.
Save emsi/e17219257a3733e95c4a4c33d21ea5a6 to your computer and use it in GitHub Desktop.
Funkcje do googlowania NIPu firmy po nazwie
#!/usr/bin/env python
import urllib2
import re
from bs4 import BeautifulSoup
def most_common(lst):
return max(set(lst), key=lst.count)
def googleNIP(nazwa_firmy):
qtitle=urllib.quote_plus(nazwa_firmy)
url="https://www.google.pl/search?num=50&q=NIP+%22"+qtitle+"%22" # google url
req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
response = urllib2.urlopen(req)
html = response.read()
reISBN=re.compile("NIP[^0-9]*([\d\-]{10,17})") # regexp all NIPs
found_nip=re.findall(reISBN,html)
found_nip=map(lambda n: n.translate(None, '-'),found_nip) # standarize NIP format
return found_nip
# Example usage:
nip=googleNIP("KGHM")
print most_common(nip)
import urllib
import re
from bs4 import BeautifulSoup
def krsDataFromNIP(nip):
data={} # struktura do przechowania danych o firmie
# znajdź firmę na www.krs.wp.pl
qtitle=urllib.quote_plus(nip)
url="http://www.krs.wp.pl/szukaj/wszystko/"+qtitle
req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
response = urllib2.urlopen(req)
soup = BeautifulSoup(response,"lxml")
firma=soup.find_all('div', class_='obiekt')
krs_url=firma[0].find_all('a')[0]['href']
data["nazwa"]=firma[0].find('span').find('strong').get_text()
data["branza"]= firma[0].find('span').get_text()
# odczytaj szczegółowy skład zarządu
krs_url="http://www.krs.wp.pl"+krs_url
req = urllib2.Request(krs_url, headers={ 'User-Agent': 'Mozilla/5.0' })
response = urllib2.urlopen(req)
soup = BeautifulSoup(response,"lxml")
soup.prettify()
opis=soup.find_all('div', class_='opis')
osoby= opis[2].find_all('li')
data["zarzad"]=[]
for o in osoby:
data["zarzad"].append(o.get_text())
return data
# Example usage:
krs=krsDataFromNIP(most_common(nip))
print krs["nazwa"]
print krs["branza"]
print '\n'.join(krs["zarzad"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment