get values from multiple searches or responses from url at a same time (concurrently). how to do it? using Beautifulsoup
here is my code
import requests import time import sha3 from bs4 import BeautifulSoup from ecdsa import SigningKey, SECP256k1 start = time.time() i = 1 while(i<=100): t_time = time.time() - start # i want first generate address first for example i'm given 100 address generate take time 0.125sec # one address generate scan show balance and token take 100 address 115 second this time saving way need code bro keccak = sha3.keccak_256() priv = SigningKey.generate(curve=SECP256k1) pub = priv.get_verifying_key().to_string() keccak.update(pub) address = keccak.hexdigest()[24:] # first address generate gone, before show balance and token number\ # After scan multiple searcing url set need bro, EXA: same time scan multiple address reduce time # For example same time multiple address scan show balance and Token. this set searcing time save..... url = ("https://etherscan.io/address/" + address) html = requests.get(url, headers={'user-agent': 'Mozilla/5.0'}).text soup = BeautifulSoup(html, "html.parser") box = soup.find("div", class_="col-md-6 mb-3 mb-md-0") data = box.find_all('div', class_='col-md-8') try: balance = data[0].text.split(' ')[0].strip() # token only show number only exa: 0x**********************************[hidden] : This address have 9 token number token = data[2].text.split(' ')[0].strip() except IndexError: token = 0 print (address+" "+balance+" "+str(token)) print (t_time) i = i + 1
You can use multi-threading to make it fast by executing parallelly/concurrently. In python there is a package called ‘threading’ which provides almost all necessary methods for implementing multi-threading concept in your program. here is a simple program implementing multi-threading
import threading #function to be used in thread def func(i): print("thread:"+str(i)) if __name__ == "__main__": #creating thread th1 = threading.Thread(target=func, args=(1,)) th2 = threading.Thread(target=func, args=(2,)) th3 = threading.Thread(target=func, args=(3,)) #starting thread th1.start() th2.start() th3.start() #wait till threads completes its execution th1.join() th2.join() th3.join() print("all the thread executed!")
as per your program is concerned, you can put the whole code that is inside the while loop into a function and execute it with multiple thread. Program will looks like
import threading import requests import time import sha3 from bs4 import BeautifulSoup from ecdsa import SigningKey, SECP256k1 start = time.time() def func(i): t_time = time.time() - start keccak = sha3.keccak_256() priv = SigningKey.generate(curve=SECP256k1) pub = priv.get_verifying_key().to_string() keccak.update(pub) address = keccak.hexdigest()[24:] url = ("https://etherscan.io/address/" + address) html = requests.get(url, headers={'user-agent': 'Mozilla/5.0'}).text time.sleep(10) soup = BeautifulSoup(html, "html.parser") box = soup.find("div", class_="col-md-6 mb-3 mb-md-0") data = box.find_all('div', class_='col-md-8') try: balance = data[0].text.split(' ')[0].strip() token = data[2].text.split(' ')[0].strip() except IndexError: token = 0 print (address+" "+balance+" "+str(token)) print (t_time) #list of thread thread_list=[] #creating thread i = 0 while(i<=100): t = threading.Thread(target=func, args=(i,)) thread_list.append(t) i = i + 1 #starting threads i = 0 while(i<=100): thread_list[i].start() i = i + 1 #waiting for completion i = 0 while(i<=100): thread_list[i].join() i = i + 1

but 2nd code will face a synchronization problem because the page needs time to load, especially when you are loading hundreds of pages concurrently. so, the box object not be created and a null attribute error will be produced as findall() will not be there for null object ‘box’