diff --git a/data/tt0074885.html b/data/tt0074885.html new file mode 100644 index 000000000..ff2bbb463 --- /dev/null +++ b/data/tt0074885.html @@ -0,0 +1,175 @@ + + + + +Mean Johnny Barrows + + + + + + + + + + + + + +
+
+
+
+
+

Mean Johnny Barrows

+
+ +
+
+
+
+
+ +
Mean Johnny Barrows
Film poster by John Solie
Directed byFred Williamson
Written byJolivett Cato
Charles Walker
StarringFred Williamson
Roddy McDowall
Stuart Whitman
Luther Adler
Jenny Sherman
Elliott Gould
Music byColeridge-Taylor Perkinson
Distributed byRamana Productions Inc.
Release date
+
  • January 1976 (1976-01) (U.S.)
+
Running time
75 minutes
CountryUnited States
LanguageEnglish
+

Mean Johnny Barrows is a 1976 American crime drama film starring Fred Williamson, who also directed the film; Stuart Whitman; Luther Adler; Jenny Sherman; and Roddy McDowall also star.[1] +

+ +

Plot

+

Johnny Barrows (played by Fred "The Hammer" Williamson) a winner of the Silver Star is dishonorably discharged from the army for punching out his Captain. Shipped back home Stateside, Johnny promptly gets mugged and hauled in by some racist cops who believe him to be drunk. Unable to secure gainful employment, Johnny finds himself on the soup line (with a cameo from "Special Guest Star" Elliott Gould) and down on his luck. +

Walking into an Italian restaurant hoping for a handout, he's offered a job as a killer by Mafiosi Mario Racconi (Stuart Whitman) and his girlfriend Nancy (Jenny Sherman) but Johnny turns him down. It seems that he's not slipped so far as to start doing odd jobs for the Mob. Eventually, Johnny lands a job at a gas station cleaning toilets and scrubbing floors for the mean penny-pinching Richard (R.G. Armstrong), who receives a beating for ripping off Barrows. +

Meanwhile, a Mafia war starts brewing between the Racconi family and the Da Vincis (the family, not the painter). Seems the Da Vinci family wants to bring in all kinds of dope and start peddling it to black and Hispanic kids. The Racconis, being an upstanding Mob family, wants no part of that on their streets. And so it goes, with the Racconi family wiped out in a treacherous double-cross, with only Mario left standing. +

Nancy is kidnapped by the Da Vinci family and gets a message to Johnny claiming that she was made to do "terrible things". Brought to the brink by poverty, The Man constantly screwing him and his love for Nancy, Johnny agrees to become a hired killer for Mario to avenge the Racconis. And so the body count starts going up as Johnny in all his white-suited glory gets mean and starts killing his way through the Da Vinci family. +

+

Cast

+ +

Additional notes

+

The structure of the film was previously used a year before in the film The Farmer (which was shot in 1975 but released in 1977). +

+

References

+
+
    +
  1. ^ "Mean Johnny Barrows". afi.com. Retrieved 2024-02-02. +
  2. +
+
+ + +


+

+
+
+
+
+
+
+
+ + \ No newline at end of file diff --git a/data/tt0074888.html b/data/tt0074888.html new file mode 100644 index 000000000..c520e2dde --- /dev/null +++ b/data/tt0074888.html @@ -0,0 +1,159 @@ + + + + +The Best Way to Walk + + + + + + + + + + + + + +
+
+
+
+
+

The Best Way to Walk

+
+ +
+
+
+
+
The Best Way to Walk
Theatrical release poster
Directed byClaude Miller
Written byLuc Béraud
Claude Miller
Produced byMag Bodard
Jean-François Davy
StarringPatrick Dewaere
Patrick Bouchitey
Christine Pascal
Claude Piéplu
CinematographyBruno Nuytten
Edited byJean-Bernard Bonis
Music byAlain Jomy
Distributed byAMLF
Release dates
+
  • 3 March 1976 (1976-03-03) (France)
  • +
  • 15 January 1978 (1978-01-15) (U.S.)
+
Running time
82 minutes
CountryFrance
LanguageFrench
Box office$13,793[1] (2008 French reissue)
+

The Best Way to Walk (French: La meilleure façon de marcher) is a 1976 French film directed by Claude Miller, his directorial debut. It stars Patrick Dewaere, Patrick Bouchitey, Christine Pascal, Claude Piéplu and Michel Blanc.[2] +

+ +

Plot

+

Marc and Philippe are two teenage counselors at a summer vacation camp in the French countryside in 1960. Marc is very virile, while Philippe is more reserved. One night, Marc surprises Philippe dressed and made-up like a woman. He responds by continually humiliating Philippe. Despite their late-adolescent rivalries and sexual confusion, each achieves an awakening. +

+

Awards

+

The film won the César Award for Best Cinematography, and was nominated for Best Film, Best Actor, Best Director, Best Screenplay, Dialogue or Adaptation and Best Sound. +

+

Cast

+ +

References

+
+
    +
  1. ^ "The Best Way to Walk". +
  2. +
  3. ^ "The Best Way to Walk". unifrance.org. Retrieved 2014-03-10. +
  4. +
+
+ + +


+

+
+
+
+
+
+
+
+ + \ No newline at end of file diff --git a/sample_data.xlsx b/sample_data.xlsx index 4d3b3f5a0..a954ef150 100644 Binary files a/sample_data.xlsx and b/sample_data.xlsx differ diff --git a/scripts/dataset_create.py b/scripts/dataset_create.py new file mode 100644 index 000000000..4511228d2 --- /dev/null +++ b/scripts/dataset_create.py @@ -0,0 +1,24 @@ +import pandas as pd +import os +from scrape import extract_movie_info + +script_dir = os.path.dirname(os.path.abspath(__file__)) +file_path = os.path.join(script_dir, "..", "sample_data.xlsx") +movie_data = pd.read_excel(file_path) +print(movie_data.columns) + +script_dir = os.path.dirname(os.path.abspath(__file__)) +movie_html = os.path.join(script_dir, "..", "data", "tt0074888.html") + +title, directed_by, cast, genre, plot = extract_movie_info(movie_html) +new_row = { + "Movie": title, + "Director": directed_by, + "Cast": ", ".join(cast), + "Genre": genre, + "Plot": plot +} + +movie_data.loc[len(movie_data)] = new_row +output_path = os.path.join(script_dir, "..", "updated_data.xlsx") +movie_data.to_excel(output_path, index=False) \ No newline at end of file diff --git a/scripts/scrape.py b/scripts/scrape.py index ac0a44926..4356d3c6f 100644 --- a/scripts/scrape.py +++ b/scripts/scrape.py @@ -1,32 +1,89 @@ -import requests +from bs4 import BeautifulSoup +import os -url = "https://en.wikipedia.org/w/api.php" +script_dir = os.path.dirname(os.path.abspath(__file__)) +file_path = os.path.join(script_dir, "..", "data", "tt0074888.html") -headers = { - "User-Agent": "CSE881-MovieProject/1.0 (ishaa@msu.edu)" -} +def extract_movie_info(file_path): -params = { - "action": "query", - "format": "json", - "titles": "Godfather", - "prop": "extracts", - "explaintext": True, - "redirects": 1 -} + with open(file_path, "r", encoding="utf-8") as f: + html = f.read() -response = requests.get(url, headers=headers, params=params) + soup = BeautifulSoup(html, "lxml") -print("Status:", response.status_code) -print("Content-Type:", response.headers.get("content-type")) -print("First 200 chars:\n", response.text[:1000]) + # ----------------------------- + # Title + # ----------------------------- + title_tag = soup.find("h1") + title = title_tag.get_text(strip=True) if title_tag else None -data = response.json() + # ----------------------------- + # Genre (first line) + # ----------------------------- + genre = None + content = soup.find("div", id="mw-content-text") + if content: + first_paragraph = content.find("p") + if first_paragraph: + genre = first_paragraph.get_text(" ", strip=True) + # ----------------------------- + # Infobox: Directed by + Starring + # ----------------------------- + infobox = soup.find("table", class_="infobox") -pages = data["query"]["pages"] -page = next(iter(pages.values())) + directed_by = None + cast = [] -print("\nTitle:", page["title"]) -print("\nPreview:\n", page["extract"]) + if infobox: + rows = infobox.find_all("tr") + + for row in rows: + header = row.find("th") + data = row.find("td") + + if not header or not data: + continue + + header_text = header.get_text(" ", strip=True) + + if header_text == "Directed by": + directed_by = data.get_text(" ", strip=True) + + elif header_text == "Starring": + # Get cast members split by
or links/text + cast_items = list(data.stripped_strings) + cast = cast_items[:5] + + # ----------------------------- + # Plot section + # ----------------------------- + plot = "" + + plot_header = soup.find(id="Plot") + + if plot_header: + # Move to the parent heading container if needed + current = plot_header.parent + + for sibling in current.find_next_siblings(): + # Stop when next h2 section begins + if sibling.name == "div" and "mw-heading2" in sibling.get("class", []): + break + if sibling.name == "p": + plot += sibling.get_text(" ", strip=True) + " " + + plot = plot.strip() + + return title, directed_by, cast, genre, plot + +# ----------------------------- +# Print results +# ----------------------------- +title, directed_by, cast, genre, plot = extract_movie_info(file_path) +print("Title:", title) +print("Directed by:", directed_by) +print("Cast:", cast) +print("Genre:", genre) +print("\nPlot:\n", plot) \ No newline at end of file diff --git a/updated_data.xlsx b/updated_data.xlsx new file mode 100644 index 000000000..fe854a26d Binary files /dev/null and b/updated_data.xlsx differ