Google processes over 8.5 billion searches per day, making its search results the most valuable dataset for SEO monitoring, market research, and competitive intelligence. But Google is also one of the hardest websites to scrape — it uses sophisticated anti-bot systems that block automated requests aggressively.
This guide shows you how to scrape Google search results reliably in 2026 using Python and rotating proxies. We cover organic results, featured snippets, People Also Ask boxes, and local pack data with working code examples.
Google uses multiple layers of anti-scraping protection:
The solution: residential proxy rotation. With rotating residential IPs, each request appears to come from a different real user in a different location.
The simplest approach uses Python’s requests library with BeautifulSoup for HTML parsing. This works well for moderate-scale scraping with proper proxy rotation.
import requests
from bs4 import BeautifulSoup
import random
import time
import urllib.parse
# SpyderProxy residential proxy setup
PROXY_HOST = "geo.spyderproxy.com"
PROXY_PORT = 11000
PROXY_USER = "your_username"
PROXY_PASS = "your_password"
def get_proxy(country="us"):
"""Get rotating proxy with country targeting"""
session_id = random.randint(100000, 999999)
proxy_url = f"http://{PROXY_USER}-country-{country}-session-{session_id}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}"
return {"http": proxy_url, "https": proxy_url}
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
]
def get_headers():
return {
"User-Agent": random.choice(USER_AGENTS),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
}
def scrape_google(query, num_results=10, country="us"):
"""Scrape Google organic search results"""
encoded_query = urllib.parse.quote_plus(query)
url = f"https://www.google.com/search?q={encoded_query}&num={num_results}&hl=en&gl={country}"
for attempt in range(3):
try:
proxy = get_proxy(country)
response = requests.get(
url,
headers=get_headers(),
proxies=proxy,
timeout=15,
)
if response.status_code == 200:
return parse_results(response.text, query)
elif response.status_code == 429:
print(f"Rate limited, rotating proxy (attempt {attempt + 1})")
time.sleep(random.uniform(5, 10))
else:
print(f"Status {response.status_code}, retrying...")
time.sleep(3)
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")
time.sleep(3)
return None
def parse_results(html, query):
"""Parse Google SERP HTML into structured data"""
soup = BeautifulSoup(html, "html.parser")
results = []
# Organic results
for position, div in enumerate(soup.select("div.g"), start=1):
title_el = div.select_one("h3")
link_el = div.select_one("a[href]")
snippet_el = div.select_one("div.VwiC3b, span.aCOpRe")
if title_el and link_el:
href = link_el.get("href", "")
if href.startswith("/url?q="):
href = href.split("/url?q=")[1].split("&")[0]
href = urllib.parse.unquote(href)
results.append({
"position": position,
"title": title_el.get_text(strip=True),
"url": href,
"snippet": snippet_el.get_text(strip=True) if snippet_el else "",
"query": query,
})
return results
# Example: Scrape results for a keyword
results = scrape_google("best residential proxies 2026", num_results=10)
if results:
for r in results:
print(f"#{r['position']} {r['title']}")
print(f" {r['url']}")
print(f" {r['snippet'][:100]}...")
print()
def extract_paa(html):
"""Extract People Also Ask questions from SERP"""
soup = BeautifulSoup(html, "html.parser")
paa_questions = []
# PAA container
paa_divs = soup.select("div.related-question-pair, div[data-sgrd]")
for div in paa_divs:
question_el = div.select_one("span, div.JlqpRe")
if question_el:
paa_questions.append(question_el.get_text(strip=True))
return paa_questions
def scrape_with_paa(query, country="us"):
"""Scrape both organic results and PAA questions"""
encoded_query = urllib.parse.quote_plus(query)
url = f"https://www.google.com/search?q={encoded_query}&num=10&hl=en&gl={country}"
proxy = get_proxy(country)
response = requests.get(url, headers=get_headers(), proxies=proxy, timeout=15)
if response.status_code == 200:
organic = parse_results(response.text, query)
paa = extract_paa(response.text)
return {"organic": organic, "people_also_ask": paa}
return None
Some Google features (local pack, knowledge panels, AI overviews) require JavaScript rendering. Playwright handles this while supporting proxy rotation.
from playwright.sync_api import sync_playwright
import random
def scrape_google_playwright(query, proxy_user, proxy_pass, country="us"):
"""Scrape Google using Playwright with proxy rotation"""
session_id = random.randint(100000, 999999)
with sync_playwright() as p:
browser = p.chromium.launch(
headless=True,
proxy={
"server": "http://geo.spyderproxy.com:11000",
"username": f"{proxy_user}-country-{country}-session-{session_id}",
"password": proxy_pass,
},
)
context = browser.new_context(
user_agent=random.choice(USER_AGENTS),
viewport={"width": 1920, "height": 1080},
locale="en-US",
)
page = context.new_page()
page.goto(f"https://www.google.com/search?q={query}&hl=en&gl={country}", timeout=30000)
page.wait_for_load_state("networkidle")
# Extract organic results
results = page.evaluate("""() => {
const items = document.querySelectorAll('div.g');
return Array.from(items).map((el, i) => ({
position: i + 1,
title: el.querySelector('h3')?.textContent || '',
url: el.querySelector('a')?.href || '',
snippet: el.querySelector('div.VwiC3b, span.aCOpRe')?.textContent || '',
}));
}""")
# Extract featured snippet
featured = page.evaluate("""() => {
const fs = document.querySelector('div.xpdopen, div.ifM9O');
if (!fs) return null;
return {
text: fs.querySelector('span.hgKElc, div.LGOjhe')?.textContent || '',
source_url: fs.querySelector('a')?.href || '',
source_title: fs.querySelector('h3')?.textContent || '',
};
}""")
browser.close()
return {"organic": results, "featured_snippet": featured}
For large-scale SERP monitoring, you need concurrent scraping with proper proxy management.
import concurrent.futures
import csv
def bulk_scrape_keywords(keywords, country="us", max_workers=5):
"""Scrape Google for multiple keywords concurrently"""
all_results = []
def scrape_keyword(keyword):
time.sleep(random.uniform(1, 4)) # Stagger requests
results = scrape_google(keyword, num_results=10, country=country)
if results:
return results
return []
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_kw = {executor.submit(scrape_keyword, kw): kw for kw in keywords}
for future in concurrent.futures.as_completed(future_to_kw):
keyword = future_to_kw[future]
try:
results = future.result()
all_results.extend(results)
print(f"Scraped '{keyword}': {len(results)} results")
except Exception as e:
print(f"Error scraping '{keyword}': {e}")
return all_results
def export_to_csv(results, filename="serp_data.csv"):
"""Export SERP results to CSV"""
if not results:
return
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=results[0].keys())
writer.writeheader()
writer.writerows(results)
print(f"Exported {len(results)} results to {filename}")
# Example: Track 50 keywords
keywords = [
"best residential proxies",
"web scraping proxy",
"rotating proxy service",
# ... add your keywords
]
results = bulk_scrape_keywords(keywords, country="us", max_workers=5)
export_to_csv(results)
| Proxy Type | Google Success Rate | Speed | Best For | SpyderProxy Price |
|---|---|---|---|---|
| Premium Residential | 90-95% | Fast | General SERP scraping, rank tracking | From $2.75/GB |
| Budget Residential | 80-90% | Fast | High-volume keyword monitoring | From $1.75/GB |
| LTE Mobile | 95%+ | Good | Local pack scraping, mobile SERPs | From $2/proxy |
| Datacenter | 40-60% | Fastest | Only for low-volume with long delays | From $3.55/mo |
Recommendation: Premium Residential proxies with country targeting give the best results for Google scraping. Use -country-us or -country-gb session flags to get results as seen from specific countries. For local SEO auditing across cities, LTE Mobile proxies provide the most authentic geo-targeted results.
One of the most powerful applications is checking rankings from different countries. With SpyderProxy’s 195+ country coverage, you can see exactly what users in any location see.
def compare_rankings_by_country(query, countries):
"""Compare Google rankings across multiple countries"""
results_by_country = {}
for country in countries:
time.sleep(random.uniform(2, 5))
results = scrape_google(query, num_results=10, country=country)
results_by_country[country] = results
print(f"Scraped {country}: {len(results) if results else 0} results")
return results_by_country
# Example: Check rankings in US, UK, Germany, France
countries = ["us", "gb", "de", "fr"]
rankings = compare_rankings_by_country("best proxy service", countries)
for country, results in rankings.items():
print(f"\n=== {country.upper()} ===")
if results:
for r in results[:5]:
print(f" #{r['position']} {r['title']}")
-country-us) for accurate geo-specific resultsScraping publicly visible search results is generally legal, but Google’s Terms of Service prohibit automated access. Many businesses do it for SEO monitoring. Always consult legal counsel for your specific situation.
Use rotating residential proxies (new IP per request), randomize User-Agents, add random delays between requests, and keep concurrency low. SpyderProxy’s residential proxies with auto-rotation make this straightforward.
You might get 10–20 requests through before hitting a CAPTCHA. For any production use case, residential proxies are essential. See our proxy comparison guide for details.
Organic rankings with titles, URLs, and snippets; featured snippets; People Also Ask questions; local pack results; knowledge panels; ad placements; and related searches at the bottom of the page.
With Premium Residential proxies and 5 concurrent workers, you can reliably scrape 5,000–10,000 keywords per day. Scale up by increasing proxy bandwidth and workers.
Google SERP scraping is essential for SEO professionals, marketers, and data analysts who need real-time search intelligence. The key to reliability is residential proxy rotation — without it, Google blocks automated requests within minutes.
Start with the requests + BeautifulSoup approach for most use cases, and upgrade to Playwright when you need JavaScript-rendered features. Pair either method with SpyderProxy residential proxies for consistent, block-free results across 195+ countries.
Get started with SpyderProxy — our rotating residential proxies with country targeting are purpose-built for SERP scraping at scale.