wikipedia movie scraping using api code
This commit is contained in:
30
scripts/scrape.py
Normal file
30
scripts/scrape.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import requests
|
||||
|
||||
url = "https://en.wikipedia.org/w/api.php"
|
||||
|
||||
headers = {
|
||||
"User-Agent": "CSE881-MovieProject/1.0 (ishaa@msu.edu)"
|
||||
}
|
||||
|
||||
params = {
|
||||
"action": "query",
|
||||
"format": "json",
|
||||
"prop": "extracts",
|
||||
"titles": "Interstellar",
|
||||
"explaintext": True,
|
||||
"redirects": 1
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
|
||||
print("Status:", response.status_code)
|
||||
print("Content-Type:", response.headers.get("content-type"))
|
||||
print("First 200 chars:\n", response.text[:200])
|
||||
|
||||
data = response.json()
|
||||
|
||||
pages = data["query"]["pages"]
|
||||
page = next(iter(pages.values()))
|
||||
|
||||
print("\nTitle:", page["title"])
|
||||
print("\nPreview:\n", page["extract"][:500])
|
||||
Reference in New Issue
Block a user