import requests from bs4 import BeautifulSoup import re import time from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.service import Service url = "https://www.deviantart.com/amber2024/gallery" def get_values(url): response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') spans = soup.findAll('span', class_="_3AClx") favs = 0 comments = 0 views = 0 #print(spans) # Iterate over each span in the list c = 0 for span in spans: # Extract the numeric value and the unit (Favourites, Comment, Views) print('\n'+str(list(span))+str(c)+'\n') value = str(list(span)[0]).strip('') unit = str(list(span)[2]).lstrip('abcdeghijklmnop qrstuvwxyz_1234567890N"=>') #print(value) # Convert value to numeric format if 'K' in value: value = float(value[:-1]) * 1000 else: #print(str(len(value))+'val'+value) value = int(value) print(unit) # Check the unit and update the corresponding variable if unit == 'Favourites<' or unit == 'Favourite': favs = value elif unit == 'Comments<' or unit == 'Comment<': comments = value elif unit == 'Views<' or unit == 'View': views = value c+=1 #print(favs,comments,views) return (favs,comments,views) def get_tags(url): response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') spans = soup.findAll('span', class_="_1nwad") tags = [] #print(spans) # Iterate over each span in the list c = 0 for span in spans: tags.append(str(span).split('>')[1].split('<')[0]) print(tags,spans) return tags def get_links(url,page=1): service = Service('/Users/osmond/Downloads/chromedriver-mac-arm64/chromedriver') # Path to chromedriver executable driver = webdriver.Chrome(service=service) driver.get(url+'?page='+str(page)) # Scroll to the bottom of the page last_height = driver.execute_script("return document.body.scrollHeight") while True: driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(0) # Adjust sleep time as needed new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break last_height = new_height soup = BeautifulSoup(driver.page_source, 'html.parser') links = [] for link in soup.find_all('a'): links.append(link.get('href')) #driver.quit() return links drawings = [] names = [] def recursion(url): global get_links, drawings, names recur = [] cecant = get_links(url)#tmp secant = False cocecant = 1 cosecant = [] for i in cecant: if '/all' in i and not '/all?' in i: secant = True recur.append(i) if '?page=' in i: cosecant.append(int(i.split('?page=')[1])) print(cosecant,'cosecant') recur = list(set(recur)) try: cocecant = max(cosecant) except: print('Only One Page') print(cocecant,'cocecant') if secant != True: for i in cecant: if "/gallery/" in i: recur.append(i) print(recur,'reccc') for j in recur: cecant = get_links(j)#tmp secant = False cocecant = 1 cosecant = [] for i in cecant: if '/all' in i and not '/all?' in i: secant = True recur.append(i) if '?page=' in i: cosecant.append(int(i.split('?page=')[1])) recur = list(set(recur)) print(recur) print(cosecant,'cosc') try: cocecant = max(cosecant) except: print('Only One Page') for z in range(1,cocecant+1): print(z) x = get_links(j,page=z) flag = False alled = False #If there is a folder for All Deviations for k in x: if '/art' in k: flag = True break if flag == True: print(x,'xxxxxxxxx') for c in x: if "/art/" in c and not "#comments" in c and not c in drawings: drawings.append(c) names.append(c.split('/art/')[1]) else: break drawings = list(set(drawings)) #print(get_links(url)) recursion(url) #print(drawings) finalle = [] names = [] def recur_works(): global finalle for i in drawings: finalle.append(get_values(i)) import threading drawings = list(set(drawings)) tag_sets = [] # Function to process one item from the drawings list def process_item(item): global tag_sets finalle.append(get_values(item)) names.append(item.split('/art/')[1]) tag_sets.append(get_tags(item)) # Divide the drawings into chunks for each thread num_threads = 1 chunk_size = len(drawings) // num_threads if len(drawings) % num_threads == 0 else len(drawings) // num_threads + 1 chunks = [drawings[i:i+chunk_size] for i in range(0, len(drawings), chunk_size)] # Create and start worker threads threads = [] for chunk in chunks: for drawing in chunk: # Create a new thread for each item (or group them per chunk as needed) t = threading.Thread(target=process_item, args=(drawing,)) threads.append(t) t.start() # Wait for all threads to complete for t in threads: t.join() def get_summation(): print(finalle) favs = 0 comm = 0 view = 0 for i in finalle: if i!=False: favs += i[0] comm += i[1] view += i[2] print('favs:',favs,'comm:',comm,'view:',view, 'names:', names) def get_tag_summation(): post_processed_tags = [] indexx = [] for c in range(len(tag_sets)): i = tag_sets[c] for j in i: if j in indexx: post_processed_tags[indexx.index(j)][1] = list(post_processed_tags[indexx.index(j)][1]) post_processed_tags[indexx.index(j)][2] += 1 post_processed_tags[indexx.index(j)][1][0] += finalle[c][0] post_processed_tags[indexx.index(j)][1][1] += finalle[c][1] post_processed_tags[indexx.index(j)][1][2] += finalle[c][2] else: post_processed_tags.append([j, finalle[c], 1]) indexx.append(j) return post_processed_tags #recur_works() get_summation() e = get_tag_summation() print(e)