diff --git a/data/processed/wikipedia_html/tt0074885/Mean_johnny_barrows_poster_01.jpg b/data/processed/wikipedia_html/tt0074885/Mean_johnny_barrows_poster_01.jpg
new file mode 100644
index 000000000..def287049
Binary files /dev/null and b/data/processed/wikipedia_html/tt0074885/Mean_johnny_barrows_poster_01.jpg differ
diff --git a/data/processed/wikipedia_html/tt0074885/tt0074885.html b/data/processed/wikipedia_html/tt0074885/tt0074885.html
new file mode 100644
index 000000000..ff2bbb463
--- /dev/null
+++ b/data/processed/wikipedia_html/tt0074885/tt0074885.html
@@ -0,0 +1,175 @@
+<!DOCTYPE html>
+
+<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled" dir="ltr" lang="en"><head>
+<meta charset="utf-8"/>
+<title>Mean Johnny Barrows</title>
+<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
+<link href="./_res_/favicon.png" rel="icon" type="image/png"/>
+<link href="https://en.wikipedia.org/wiki/Mean_Johnny_Barrows" rel="canonical"/> <link href="./_mw_/ext.cite.styles.css" rel="stylesheet" type="text/css"/>
+<link href="./_mw_/skins.vector.icons.css" rel="stylesheet" type="text/css"/>
+<link href="./_mw_/skins.vector.search.codex.styles.css" rel="stylesheet" type="text/css"/>
+<link href="./_mw_/skins.vector.styles.css" rel="stylesheet" type="text/css"/>
+<meta content="" name="ResourceLoaderDynamicStyles"/>
+<link href="./_mw_/site.styles.css" rel="stylesheet" type="text/css"/>
+<link href="./_mw_/noscript.css" rel="stylesheet" type="text/css"/>
+<link href="./_res_/footer.css" rel="stylesheet" type="text/css"/>
+<link href="./_res_/vector-2022.css" rel="stylesheet" type="text/css"/>
+</head>
+<body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject page-Mean_Johnny_Barrows rootpage-Mean_Johnny_Barrows skin-vector-2022 action-view">
+<div class="mw-page-container">
+<div class="mw-page-container-inner">
+<div class="mw-content-container">
+<main class="mw-body" id="content">
+<header class="mw-body-header vector-page-titlebar">
+<h1 class="firstHeading mw-first-heading" id="firstHeading"><i>Mean Johnny Barrows</i></h1>
+</header>
+<a id="top"></a>
+<div aria-labelledby="firstHeading" class="vector-body ve-init-mw-desktopArticleTarget-targetContainer" data-mw-ve-target-container="" id="bodyContent">
+<div id="contentSub">
+<div id="mw-content-subtitle"></div>
+</div>
+<div class="mw-body-content mw-content-ltr" dir="ltr" id="mw-content-text" lang="en"><div class="mw-content-ltr mw-parser-output" dir="ltr" lang="en">
+<style data-mw-deduplicate="TemplateStyles:r1305433154">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .ambox{border:1px solid #a2a9b1;border-left:10px solid #36c;background-color:#fbfbfb;box-sizing:border-box}.mw-parser-output .ambox+link+.ambox,.mw-parser-output .ambox+link+style+.ambox,.mw-parser-output .ambox+link+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+style+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+link+.ambox{margin-top:-1px}html body.mediawiki .mw-parser-output .ambox.mbox-small-left{margin:4px 1em 4px 0;overflow:hidden;width:238px;border-collapse:collapse;font-size:88%;line-height:1.25em}.mw-parser-output .ambox-speedy{border-left:10px solid #b32424;background-color:#fee7e6}.mw-parser-output .ambox-delete{border-left:10px solid #b32424}.mw-parser-output .ambox-content{border-left:10px solid #f28500}.mw-parser-output .ambox-style{border-left:10px solid #fc3}.mw-parser-output .ambox-move{border-left:10px solid #9932cc}.mw-parser-output .ambox-protection{border-left:10px solid #a2a9b1}.mw-parser-output .ambox .mbox-text{border:none;padding:0.25em 0.5em;width:100%}.mw-parser-output .ambox .mbox-image{border:none;padding:2px 0 2px 0.5em;text-align:center}.mw-parser-output .ambox .mbox-imageright{border:none;padding:2px 0.5em 2px 0;text-align:center}.mw-parser-output .ambox .mbox-empty-cell{border:none;padding:0;width:1px}.mw-parser-output .ambox .mbox-image-div{width:52px}@media(min-width:720px){.mw-parser-output .ambox{margin:0 10%}}@media print{body.ns-0 .mw-parser-output .ambox{display:none!important}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style>
+<style data-mw-deduplicate="TemplateStyles:r1295905060">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .infobox-subbox{padding:0;border:none;margin:-3px;width:auto;min-width:100%;font-size:100%;clear:none;float:none;background-color:transparent}.mw-parser-output .infobox-3cols-child{margin:auto}.mw-parser-output .infobox .navbar{font-size:100%}@media screen{html.skin-theme-clientpref-night .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media(min-width:640px){body.skin--responsive .mw-parser-output .infobox-table{display:table!important}body.skin--responsive .mw-parser-output .infobox-table>caption{display:table-caption!important}body.skin--responsive .mw-parser-output .infobox-table>tbody{display:table-row-group}body.skin--responsive .mw-parser-output .infobox-table th,body.skin--responsive .mw-parser-output .infobox-table td{padding-left:inherit;padding-right:inherit}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><table class="infobox vevent"><tbody><tr><th class="infobox-above summary" colspan="2" style="font-size: 125%; font-style: italic;">Mean Johnny Barrows</th></tr><tr><td class="infobox-image" colspan="2"><span class="mw-default-size" typeof="mw:File/Frameless"><img class="mw-file-element" data-file-height="389" data-file-width="256" decoding="async" height="380" loading="lazy" src="Mean_johnny_barrows_poster_01.jpg" width="250"/></span><div class="infobox-caption"><a href="Film_poster" title="Film poster">Film poster</a> by John Solie</div></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Directed by</th><td class="infobox-data"><a href="Fred_Williamson" title="Fred Williamson">Fred Williamson</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Written by</th><td class="infobox-data">Jolivett Cato<br/>Charles Walker</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Starring</th><td class="infobox-data"><a href="Fred_Williamson" title="Fred Williamson">Fred Williamson</a><br/><a href="Roddy_McDowall" title="Roddy McDowall">Roddy McDowall</a><br/><a href="Stuart_Whitman" title="Stuart Whitman">Stuart Whitman</a><br/><a href="Luther_Adler" title="Luther Adler">Luther Adler</a><br/>Jenny Sherman<br/><a href="Elliott_Gould" title="Elliott Gould">Elliott Gould</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Music by</th><td class="infobox-data"><a href="Coleridge-Taylor_Perkinson" title="Coleridge-Taylor Perkinson">Coleridge-Taylor Perkinson</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Distributed by</th><td class="infobox-data">Ramana Productions Inc.</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0; white-space: normal;">Release date</div></th><td class="infobox-data"><style data-mw-deduplicate="TemplateStyles:r1126788409">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .plainlist ol,.mw-parser-output .plainlist ul{line-height:inherit;list-style:none;margin:0;padding:0}.mw-parser-output .plainlist ol li,.mw-parser-output .plainlist ul li{margin-bottom:0}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><div class="plainlist film-date">
+<ul><li>January 1976<span style="display: none;"> (<span class="bday dtstart published updated itvstart">1976-01</span>)</span> (U.S.)</li></ul>
+</div></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0; white-space: normal;">Running time</div></th><td class="infobox-data">75 minutes</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Country</th><td class="infobox-data">United States</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Language</th><td class="infobox-data">English</td></tr></tbody></table>
+<p><i><b>Mean Johnny Barrows</b></i> is a 1976 American <a href="Crime_film" title="Crime film">crime drama film</a> starring <a href="Fred_Williamson" title="Fred Williamson">Fred Williamson</a>, who also directed the film;  <a href="Stuart_Whitman" title="Stuart Whitman">Stuart Whitman</a>; <a href="Luther_Adler" title="Luther Adler">Luther Adler</a>; Jenny Sherman; and <a href="Roddy_McDowall" title="Roddy McDowall">Roddy McDowall</a> also star.<sup class="reference" id="cite_ref-afi_1-0"><a href="#cite_note-afi-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup>
+</p>
+<meta property="mw:PageProp/toc"/>
+<div class="mw-heading mw-heading2"><h2 id="Plot">Plot</h2></div>
+<p>Johnny Barrows (played by Fred "The Hammer" Williamson) a winner of the <a href="Silver_Star" title="Silver Star">Silver Star</a> is dishonorably discharged from the army for punching out his Captain. Shipped back home Stateside, Johnny promptly gets mugged and hauled in by some racist cops who believe him to be drunk. Unable to secure gainful employment, Johnny finds himself on the soup line (with a cameo from "Special Guest Star" Elliott Gould) and down on his luck.
+</p><p>Walking into an Italian restaurant hoping for a handout, he's offered a job as a killer by Mafiosi Mario Racconi (Stuart Whitman) and his girlfriend Nancy (Jenny Sherman) but Johnny turns him down. It seems that he's not slipped so far as to start doing odd jobs for the Mob. Eventually, Johnny lands a job at a gas station cleaning toilets and scrubbing floors for the mean penny-pinching Richard (R.G. Armstrong), who receives a beating for ripping off Barrows.
+</p><p>Meanwhile, a Mafia war starts brewing between the Racconi family and the Da Vincis (the family, not the painter). Seems the Da Vinci family wants to bring in all kinds of dope and start peddling it to black and Hispanic kids. The Racconis, being an upstanding Mob family, wants no part of that on their streets. And so it goes, with the Racconi family wiped out in a treacherous double-cross, with only Mario left standing.
+</p><p>Nancy is kidnapped by the Da Vinci family and gets a message to Johnny claiming that she was made to do "terrible things". Brought to the brink by poverty, The Man constantly screwing him and his love for Nancy, Johnny agrees to become a hired killer for Mario to avenge the Racconis. And so the body count starts going up as Johnny in all his white-suited glory gets mean and starts killing his way through the Da Vinci family.
+</p>
+<div class="mw-heading mw-heading2"><h2 id="Cast">Cast</h2></div>
+<ul><li><a href="Fred_Williamson" title="Fred Williamson">Fred Williamson</a> as Johnny Barrows</li>
+<li><a href="Roddy_McDowall" title="Roddy McDowall">Roddy McDowall</a> as Tony Da Vince</li>
+<li><a href="Stuart_Whitman" title="Stuart Whitman">Stuart Whitman</a> as Mario Racconi</li>
+<li><a href="Luther_Adler" title="Luther Adler">Luther Adler</a> as Don Racconi</li>
+<li>Jenny Sherman as Nancy</li>
+<li><a href="Elliott_Gould" title="Elliott Gould">Elliott Gould</a> as Professor Theodore Rasputin Waterhouse</li>
+<li><a href="Anthony_Caruso_(actor)" title="Anthony Caruso (actor)">Anthony Caruso</a> as Don Da Vince</li>
+<li><a class="mw-redirect" href="R.G._Armstrong" title="R.G. Armstrong">R.G. Armstrong</a> as Richard</li>
+<li><a href="Mike_Henry_(American_football)" title="Mike Henry (American football)">Mike Henry</a> as Carlo Da Vince</li>
+<li><a href="Aaron_Banks_(martial_artist)" title="Aaron Banks (martial artist)">Aaron Banks</a> as Captain O'Malley</li>
+<li><a href="Robert_Phillips_(actor)" title="Robert Phillips (actor)">Robert Phillips</a> as Ben</li>
+<li><a href="James_Brown_(actor)" title="James Brown (actor)">James Brown</a> as Police Sergeant</li></ul>
+<div class="mw-heading mw-heading2"><h2 id="Additional_notes">Additional notes</h2></div>
+<p>The structure of the film was previously used a year before in the film <i><a href="The_Farmer_(film)" title="The Farmer (film)">The Farmer</a></i> (which was shot in 1975 but released in 1977).
+</p>
+<div class="mw-heading mw-heading2"><h2 id="References">References</h2></div>
+<style data-mw-deduplicate="TemplateStyles:r1239543626">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><div class="reflist">
+<div class="mw-references-wrap"><ol class="references">
+<li id="cite_note-afi-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-afi_1-0">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("./_mw_/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("./_mw_/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("./_mw_/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("./_mw_/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><cite class="citation web cs1"><a class="external text" href="https://catalog.afi.com/Catalog/moviedetails/55909" rel="nofollow">"Mean Johnny Barrows"</a>. <i>afi.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-02-02</span></span>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=afi.com&amp;rft.atitle=Mean+Johnny+Barrows&amp;rft_id=https%3A%2F%2Fcatalog.afi.com%2FCatalog%2Fmoviedetails%2F55909&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AMean+Johnny+Barrows"></span></span>
+</li>
+</ol></div></div>
+<div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2></div>
+<ul><li><a class="external text" href="https://www.imdb.com/title/tt0074885/" rel="nofollow"><i>Mean Johnny Barrows</i></a> at <a class="mw-redirect" href="IMDb_(identifier)" title="IMDb (identifier)">IMDb</a></li></ul>
+<div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><style data-mw-deduplicate="TemplateStyles:r1236075235">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style></div><div aria-labelledby="Films_directed_by_Fred_Williamson37" class="navbox" role="navigation" style="padding:3px"><table class="nowraplinks mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th class="navbox-title" colspan="2" scope="col"><style data-mw-deduplicate="TemplateStyles:r1239400231">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><div id="Films_directed_by_Fred_Williamson37" style="font-size:114%;margin:0 4em">Films directed by <a href="Fred_Williamson" title="Fred Williamson">Fred Williamson</a></div></th></tr><tr><td class="navbox-list navbox-odd hlist" colspan="2" style="width:100%;padding:0"><div style="padding:0 0.25em">
+<ul><li><i></i> (1976)</li>
+<li><i><a href="Adios_Amigo_(1976_film)" title="Adios Amigo (1976 film)">Adios Amigo</a></i> (1976)</li>
+<li><i><a href="Death_Journey" title="Death Journey">Death Journey</a></i> (1976)</li>
+<li><i><a href="No_Way_Back_(1976_film)" title="No Way Back (1976 film)">No Way Back</a></i> (1976)</li>
+<li><i><a href="Mr._Mean_(film)" title="Mr. Mean (film)">Mr. Mean</a></i> (1977)</li>
+<li><i><a href="One_Down%2C_Two_to_Go" title="One Down, Two to Go">One Down, Two to Go</a></i> (1982)</li>
+<li><i><a href="The_Big_Score_(1983_film)" title="The Big Score (1983 film)">The Big Score</a></i> (1983)</li>
+<li><i><a href="The_Last_Fight_(film)" title="The Last Fight (film)">The Last Fight</a></i> (1983)</li>
+<li><i>The Messenger</i> (1986)</li>
+<li><i>Foxtrap</i> (1986)</li>
+<li><i>Soda Cracker</i> (1989)</li>
+<li><i>Critical Action</i> (1991)</li>
+<li><i>Steele's Law</i> (1991)</li>
+<li><i>Three Days to Kill</i> (1992)</li>
+<li><i>South Beach</i> (1993)</li>
+<li><i>Silent Hunter</i> (1995)</li>
+<li><i><a href="Down_'n_Dirty" title="Down 'n Dirty">Down 'n Dirty</a></i> (2001)</li>
+<li><i>On the Edge</i> (2002)</li>
+<li><i>If Love Hadn't Left Me Lonely</i> (2004)</li>
+<li><i>Vegas Vampires</i> (2007)</li></ul>
+</div></td></tr></tbody></table></div>
+<p><br/>
+</p>
+<style data-mw-deduplicate="TemplateStyles:r1271159938">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .asbox{position:relative;overflow:hidden}.mw-parser-output .asbox table{background:transparent}.mw-parser-output .asbox p{margin:0}.mw-parser-output .asbox p+p{margin-top:0.25em}.mw-parser-output .asbox-body{font-style:italic}.mw-parser-output .asbox-note{font-size:smaller}.mw-parser-output .asbox .navbar{position:absolute;top:-0.75em;right:1em;display:none}.mw-parser-output :not(p):not(.asbox)+style+.asbox,.mw-parser-output :not(p):not(.asbox)+link+.asbox{margin-top:3em}
+
+
+/* end   https://en.wikipedia.org/ */
+</style></div><!--htdig_noindex--><div><div class="zim-footer">
+    This article is issued from <a class="external text" href="https://en.wikipedia.org/wiki/?title=Mean_Johnny_Barrows&amp;oldid=1234112961" title="Last edited on 2024-07-12">Wikipedia</a>. The text is available under <a class="external text" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en">Creative Commons Attribution-Share Alike 4.0</a> unless otherwise noted. Additional terms may apply for the media files.
+</div>
+</div><!--/htdig_noindex--></div>
+</div>
+</main>
+</div>
+</div>
+</div>
+<script src="./_webp_/webpHandler.js"></script>
+</body></html>
\ No newline at end of file
diff --git a/data/processed/wikipedia_html/tt0074888/La-meilleure-facon-de-marcher.jpg b/data/processed/wikipedia_html/tt0074888/La-meilleure-facon-de-marcher.jpg
new file mode 100644
index 000000000..e66679c94
Binary files /dev/null and b/data/processed/wikipedia_html/tt0074888/La-meilleure-facon-de-marcher.jpg differ
diff --git a/data/processed/wikipedia_html/tt0074888/tt0074888.html b/data/processed/wikipedia_html/tt0074888/tt0074888.html
new file mode 100644
index 000000000..c520e2dde
--- /dev/null
+++ b/data/processed/wikipedia_html/tt0074888/tt0074888.html
@@ -0,0 +1,159 @@
+<!DOCTYPE html>
+
+<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled" dir="ltr" lang="en"><head>
+<meta charset="utf-8"/>
+<title>The Best Way to Walk</title>
+<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
+<link href="./_res_/favicon.png" rel="icon" type="image/png"/>
+<link href="https://en.wikipedia.org/wiki/The_Best_Way_to_Walk" rel="canonical"/> <link href="./_mw_/ext.cite.styles.css" rel="stylesheet" type="text/css"/>
+<link href="./_mw_/skins.vector.icons.css" rel="stylesheet" type="text/css"/>
+<link href="./_mw_/skins.vector.search.codex.styles.css" rel="stylesheet" type="text/css"/>
+<link href="./_mw_/skins.vector.styles.css" rel="stylesheet" type="text/css"/>
+<meta content="" name="ResourceLoaderDynamicStyles"/>
+<link href="./_mw_/site.styles.css" rel="stylesheet" type="text/css"/>
+<link href="./_mw_/noscript.css" rel="stylesheet" type="text/css"/>
+<link href="./_res_/footer.css" rel="stylesheet" type="text/css"/>
+<link href="./_res_/vector-2022.css" rel="stylesheet" type="text/css"/>
+</head>
+<body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject page-The_Best_Way_to_Walk rootpage-The_Best_Way_to_Walk skin-vector-2022 action-view">
+<div class="mw-page-container">
+<div class="mw-page-container-inner">
+<div class="mw-content-container">
+<main class="mw-body" id="content">
+<header class="mw-body-header vector-page-titlebar">
+<h1 class="firstHeading mw-first-heading" id="firstHeading"><i>The Best Way to Walk</i></h1>
+</header>
+<a id="top"></a>
+<div aria-labelledby="firstHeading" class="vector-body ve-init-mw-desktopArticleTarget-targetContainer" data-mw-ve-target-container="" id="bodyContent">
+<div id="contentSub">
+<div id="mw-content-subtitle"></div>
+</div>
+<div class="mw-body-content mw-content-ltr" dir="ltr" id="mw-content-text" lang="en"><div class="mw-content-ltr mw-parser-output" dir="ltr" lang="en"><style data-mw-deduplicate="TemplateStyles:r1295905060">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .infobox-subbox{padding:0;border:none;margin:-3px;width:auto;min-width:100%;font-size:100%;clear:none;float:none;background-color:transparent}.mw-parser-output .infobox-3cols-child{margin:auto}.mw-parser-output .infobox .navbar{font-size:100%}@media screen{html.skin-theme-clientpref-night .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media(min-width:640px){body.skin--responsive .mw-parser-output .infobox-table{display:table!important}body.skin--responsive .mw-parser-output .infobox-table>caption{display:table-caption!important}body.skin--responsive .mw-parser-output .infobox-table>tbody{display:table-row-group}body.skin--responsive .mw-parser-output .infobox-table th,body.skin--responsive .mw-parser-output .infobox-table td{padding-left:inherit;padding-right:inherit}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><table class="infobox vevent"><tbody><tr><th class="infobox-above summary" colspan="2" style="font-size: 125%; font-style: italic;">The Best Way to Walk</th></tr><tr><td class="infobox-image" colspan="2"><span class="mw-default-size" typeof="mw:File/Frameless"><img class="mw-file-element" data-file-height="364" data-file-width="274" decoding="async" height="332" loading="lazy" src="La-meilleure-facon-de-marcher.jpg" width="250"/></span><div class="infobox-caption">Theatrical release poster</div></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Directed by</th><td class="infobox-data"><a href="Claude_Miller" title="Claude Miller">Claude Miller</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Written by</th><td class="infobox-data"><a href="Luc_B%C3%A9raud" title="Luc Béraud">Luc Béraud</a><br/>Claude Miller</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Produced by</th><td class="infobox-data"><a href="Mag_Bodard" title="Mag Bodard">Mag Bodard</a><br/><a href="Jean-Fran%C3%A7ois_Davy" title="Jean-François Davy">Jean-François Davy</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Starring</th><td class="infobox-data"><a href="Patrick_Dewaere" title="Patrick Dewaere">Patrick Dewaere</a><br/><a href="Patrick_Bouchitey" title="Patrick Bouchitey">Patrick Bouchitey</a><br/><a href="Christine_Pascal" title="Christine Pascal">Christine Pascal</a><br/><a href="Claude_Pi%C3%A9plu" title="Claude Piéplu">Claude Piéplu</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Cinematography</th><td class="infobox-data"><a href="Bruno_Nuytten" title="Bruno Nuytten">Bruno Nuytten</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Edited by</th><td class="infobox-data">Jean-Bernard Bonis</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Music by</th><td class="infobox-data"><a href="Alain_Jomy" title="Alain Jomy">Alain Jomy</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Distributed by</th><td class="infobox-data"><a class="mw-redirect" href="AMLF" title="AMLF">AMLF</a></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0; white-space: normal;">Release dates</div></th><td class="infobox-data"><style data-mw-deduplicate="TemplateStyles:r1126788409">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .plainlist ol,.mw-parser-output .plainlist ul{line-height:inherit;list-style:none;margin:0;padding:0}.mw-parser-output .plainlist ol li,.mw-parser-output .plainlist ul li{margin-bottom:0}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><div class="plainlist film-date">
+<ul><li>3 March 1976<span style="display: none;"> (<span class="bday dtstart published updated itvstart">1976-03-03</span>)</span> (France)</li>
+<li>15 January 1978<span style="display: none;"> (<span class="bday dtstart published updated itvstart">1978-01-15</span>)</span> (U.S.)</li></ul>
+</div></td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;"><div style="display: inline-block; line-height: 1.2em; padding: .1em 0; white-space: normal;">Running time</div></th><td class="infobox-data">82 minutes</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Country</th><td class="infobox-data">France</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Language</th><td class="infobox-data">French</td></tr><tr><th class="infobox-label" scope="row" style="white-space: nowrap; padding-right: 0.65em;">Box office</th><td class="infobox-data">$13,793<sup class="reference" id="cite_ref-1"><a href="#cite_note-1"><span class="cite-bracket">[</span>1<span class="cite-bracket">]</span></a></sup> <small>(2008 French reissue)</small></td></tr></tbody></table>
+<p><i><b>The Best Way to Walk</b></i> (French: <i><b>La meilleure façon de marcher</b></i>) is a 1976 French film directed by <a href="Claude_Miller" title="Claude Miller">Claude Miller</a>, his directorial debut. It stars <a href="Patrick_Dewaere" title="Patrick Dewaere">Patrick Dewaere</a>, <a href="Patrick_Bouchitey" title="Patrick Bouchitey">Patrick Bouchitey</a>, <a href="Christine_Pascal" title="Christine Pascal">Christine Pascal</a>, <a href="Claude_Pi%C3%A9plu" title="Claude Piéplu">Claude Piéplu</a> and <a href="Michel_Blanc" title="Michel Blanc">Michel Blanc</a>.<sup class="reference" id="cite_ref-2"><a href="#cite_note-2"><span class="cite-bracket">[</span>2<span class="cite-bracket">]</span></a></sup>
+</p>
+<meta property="mw:PageProp/toc"/>
+<div class="mw-heading mw-heading2"><h2 id="Plot">Plot</h2></div>
+<p>Marc and Philippe are two teenage counselors at a summer vacation camp in the French countryside in 1960. Marc is very virile, while Philippe is more reserved. One night, Marc surprises Philippe dressed and made-up like a woman. He responds by continually humiliating Philippe. Despite their late-adolescent rivalries and sexual confusion, each achieves an awakening.
+</p>
+<div class="mw-heading mw-heading2"><h2 id="Awards">Awards</h2></div>
+<p>The film won the <a href="C%C3%A9sar_Award_for_Best_Cinematography" title="César Award for Best Cinematography">César Award for Best Cinematography</a>, and was nominated for Best Film, Best Actor, Best Director, <a href="C%C3%A9sar_Award_for_Best_Original_Screenplay_or_Adaptation" title="César Award for Best Original Screenplay or Adaptation">Best Screenplay, Dialogue or Adaptation</a> and Best Sound.
+</p>
+<div class="mw-heading mw-heading2"><h2 id="Cast">Cast</h2></div>
+<ul><li><a href="Patrick_Dewaere" title="Patrick Dewaere">Patrick Dewaere</a> as Marc</li>
+<li><a href="Patrick_Bouchitey" title="Patrick Bouchitey">Patrick Bouchitey</a> as Philippe</li>
+<li><a href="Christine_Pascal" title="Christine Pascal">Christine Pascal</a> as Chantal</li>
+<li><a href="Claude_Pi%C3%A9plu" title="Claude Piéplu">Claude Piéplu</a> as Camp director</li>
+<li><a href="Marc_Chapiteau" title="Marc Chapiteau">Marc Chapiteau</a> as Gérard</li>
+<li><a href="Michel_Blanc" title="Michel Blanc">Michel Blanc</a> as Raoul Deloux</li>
+<li>Michel Such as Léni</li>
+<li>Franck d'Ascanio as Hervé</li>
+<li>Nathan Miller as kid with glasses</li></ul>
+<div class="mw-heading mw-heading2"><h2 id="References">References</h2></div>
+<style data-mw-deduplicate="TemplateStyles:r1239543626">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><div class="reflist">
+<div class="mw-references-wrap"><ol class="references">
+<li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("./_mw_/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("./_mw_/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("./_mw_/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("./_mw_/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><cite class="citation web cs1"><a class="external text" href="https://www.boxofficemojo.com/title/tt0074888/?ref_=bo_rl_ti" rel="nofollow">"The Best Way to Walk"</a>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=The+Best+Way+to+Walk&amp;rft_id=https%3A%2F%2Fwww.boxofficemojo.com%2Ftitle%2Ftt0074888%2F%3Fref_%3Dbo_rl_ti&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AThe+Best+Way+to+Walk"></span></span>
+</li>
+<li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text"><cite class="citation web cs1"><a class="external text" href="http://en.unifrance.org/movie/1257/the-best-way-to-walk" rel="nofollow">"The Best Way to Walk"</a>. unifrance.org<span class="reference-accessdate">. Retrieved <span class="nowrap">2014-03-10</span></span>.</cite><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=The+Best+Way+to+Walk&amp;rft.pub=unifrance.org&amp;rft_id=http%3A%2F%2Fen.unifrance.org%2Fmovie%2F1257%2Fthe-best-way-to-walk&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3AThe+Best+Way+to+Walk"></span></span>
+</li>
+</ol></div></div>
+<div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2></div>
+<ul><li><a class="external text" href="https://www.imdb.com/title/tt0074888/" rel="nofollow"><i>The Best Way to Walk</i></a> at <a class="mw-redirect" href="IMDb_(identifier)" title="IMDb (identifier)">IMDb</a></li></ul>
+<div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><style data-mw-deduplicate="TemplateStyles:r1236075235">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style></div><div aria-labelledby="Films_directed_by_Claude_Miller35" class="navbox" role="navigation" style="padding:3px"><table class="nowraplinks mw-collapsible mw-collapsed navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th class="navbox-title" colspan="2" scope="col"><style data-mw-deduplicate="TemplateStyles:r1239400231">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}
+
+
+/* end   https://en.wikipedia.org/ */
+</style><div id="Films_directed_by_Claude_Miller35" style="font-size:114%;margin:0 4em">Films directed by <a href="Claude_Miller" title="Claude Miller">Claude Miller</a></div></th></tr><tr><td class="navbox-list navbox-odd hlist" colspan="2" style="width:100%;padding:0"><div style="padding:0 0.25em">
+<ul><li><i></i> (1976)</li>
+<li><i><a href="This_Sweet_Sickness_(film)" title="This Sweet Sickness (film)">This Sweet Sickness</a></i> (1977)</li>
+<li><i><a href="Garde_%C3%A0_Vue" title="Garde à Vue">Garde à Vue</a></i> (1981)</li>
+<li><i><a href="Deadly_Circuit" title="Deadly Circuit">Deadly Circuit</a></i> (1983)</li>
+<li><i><a href="An_Impudent_Girl" title="An Impudent Girl">An Impudent Girl</a></i> (1985)</li>
+<li><i><a href="The_Little_Thief" title="The Little Thief">The Little Thief</a></i> (1988)</li>
+<li><i><a href="The_Accompanist" title="The Accompanist">The Accompanist</a></i> (1992)</li>
+<li><i><a href="The_Smile_(film)" title="The Smile (film)">Le Sourire</a></i> (1994)</li>
+<li><i><a href="Class_Trip" title="Class Trip">Class Trip</a></i> (1998)</li>
+<li><i>Of Woman and Magic</i> (2000)</li>
+<li><i><a href="Alias_Betty" title="Alias Betty">Alias Betty</a></i> (2001)</li>
+<li><i><a href="Little_Lili" title="Little Lili">Little Lili</a></i> (2003)</li>
+<li><i><a href="A_Secret" title="A Secret">A Secret</a></i> (2007)</li>
+<li><i>Marching Band</i> (2009)</li>
+<li><i><a href="I'm_Glad_My_Mother_Is_Alive" title="I'm Glad My Mother Is Alive">I'm Glad My Mother Is Alive</a></i> (2009)</li>
+<li><i><a href="See_How_They_Dance" title="See How They Dance">See How They Dance</a></i> (2010)</li>
+<li><i><a href="Th%C3%A9r%C3%A8se_Desqueyroux_(2012_film)" title="Thérèse Desqueyroux (2012 film)">Thérèse Desqueyroux</a></i> (2012)</li></ul>
+</div></td></tr></tbody></table></div>
+<p><br/>
+</p>
+<style data-mw-deduplicate="TemplateStyles:r1271159938">
+/* start https://en.wikipedia.org/ */
+
+
+.mw-parser-output .asbox{position:relative;overflow:hidden}.mw-parser-output .asbox table{background:transparent}.mw-parser-output .asbox p{margin:0}.mw-parser-output .asbox p+p{margin-top:0.25em}.mw-parser-output .asbox-body{font-style:italic}.mw-parser-output .asbox-note{font-size:smaller}.mw-parser-output .asbox .navbar{position:absolute;top:-0.75em;right:1em;display:none}.mw-parser-output :not(p):not(.asbox)+style+.asbox,.mw-parser-output :not(p):not(.asbox)+link+.asbox{margin-top:3em}
+
+
+/* end   https://en.wikipedia.org/ */
+</style></div><!--htdig_noindex--><div><div class="zim-footer">
+    This article is issued from <a class="external text" href="https://en.wikipedia.org/wiki/?title=The_Best_Way_to_Walk&amp;oldid=1295970239" title="Last edited on 2025-06-17">Wikipedia</a>. The text is available under <a class="external text" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en">Creative Commons Attribution-Share Alike 4.0</a> unless otherwise noted. Additional terms may apply for the media files.
+</div>
+</div><!--/htdig_noindex--></div>
+</div>
+</main>
+</div>
+</div>
+</div>
+<script src="./_webp_/webpHandler.js"></script>
+</body></html>
\ No newline at end of file
diff --git a/data/raw/wikipedia/.gitkeep b/data/raw/wikipedia/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/requirements.txt b/requirements.txt
index c2685294d..94ce5a178 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,9 @@
 # Run the following to install:
 # pip install -r requirements.txt
 
-pandas
-dtale
\ No newline at end of file
+pandas~=3.0.0
+dtale~=3.19.1
+requests~=2.32.5
+beautifulsoup4~=4.14.3
+libzim~=3.8.0
+python-slugify~=8.0.4
\ No newline at end of file
diff --git a/scripts/extract_wiki_html.py b/scripts/extract_wiki_html.py
new file mode 100644
index 000000000..c6cfced93
--- /dev/null
+++ b/scripts/extract_wiki_html.py
@@ -0,0 +1,115 @@
+import os
+import re
+import csv
+import pandas as pd
+from bs4 import BeautifulSoup
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+INPUT_DIR = os.path.join(BASE_DIR, "../data/processed/wikipedia_html")
+OUTPUT_TSV = os.path.join(BASE_DIR, "../data/processed/spreadsheet/wikipedia_metadata4.tsv")
+
+WHITELIST = {
+    "slug",
+    "title",
+    "poster_filename",
+    "Directed by",
+    "Produced by",
+    "Written by",
+    "Starring",
+    "Release date",
+    "Running time",
+    "Country",
+    "Language",
+    "Budget",
+    "Box office",
+    "Plot"
+}
+
+def clean(el):
+    if not el:
+        return ""
+    for br in el.find_all("br"):
+        br.replace_with(" | ")
+    return re.sub(r"\s+", " ", el.get_text(" ", strip=True)).strip()
+
+def parse_html(path, slug):
+    with open(path, encoding="utf-8") as f:
+        soup = BeautifulSoup(f, "html.parser")
+    row = {"slug": slug}
+    h1 = soup.select_one("h1.firstHeading")
+    if h1:
+        row["title"] = h1.get_text(strip=True)
+    else:
+        row["title"] = ""
+    # infobox
+    infobox = soup.select_one("table.infobox")
+    if infobox:
+        img = infobox.select_one("img")
+        if img and img.get("src"):
+            row["poster_filename"] = os.path.basename(img["src"])
+        else:
+            row["poster_filename"] = ""
+        for tr in infobox.select("tr"):
+            th = tr.select_one(".infobox-label")
+            td = tr.select_one(".infobox-data")
+            if th and td:
+                row[clean(th)] = clean(td)
+    # sections
+    content = soup.select_one(".mw-parser-output")
+    if not content:
+        return {k: v for k, v in row.items() if k in WHITELIST}
+    skip = {"references", "external links", "see also"}
+    current = None
+    lead = []
+    for el in content.children:
+        if getattr(el, "name", None) == "div" and "mw-heading" in el.get("class", []):
+            h = el.find(["h2", "h3", "h4", "h5", "h6"]) #assuming no more than first 6 headers need to be looked at
+            if h:
+                title = clean(h)
+                if title.lower() in skip:
+                    current = None
+                else:
+                    current = title
+                if current:
+                    row[current] = ""
+            continue
+        if not current:
+            if getattr(el, "name", None) == "p":
+                text = clean(el)
+                if text:
+                    lead.append(text)
+            continue
+        if el.name in ["p", "ul", "ol", "table"]:
+            text = clean(el)
+            if text:
+                row[current] += text
+    if lead:
+        if row.get("Plot"):
+            row["Plot"] = " | ".join(lead) + " | " + row["Plot"]
+        else:
+            row["Plot"] = " | ".join(lead)
+    return {k: v for k, v in row.items() if k in WHITELIST}
+
+def main():
+    rows = []
+    for folder in os.listdir(INPUT_DIR):
+        path = os.path.join(INPUT_DIR, folder)
+        html = next((f for f in os.listdir(path) if f.endswith(".html")), None)
+        if not html:
+            continue
+        try:
+            rows.append(parse_html(os.path.join(path, html), folder))
+        except Exception as e:
+            print("error:", html, e)
+    df = pd.DataFrame(rows).fillna("")
+    if df.empty:
+        print("The folder was empty / None parsed")
+        return
+    cols = ["slug", "poster_filename"] + [c for c in df.columns if c not in ("slug", "poster_filename")]
+    df = df[cols]
+    os.makedirs(os.path.dirname(OUTPUT_TSV), exist_ok=True)
+    df.to_csv(OUTPUT_TSV, sep="\t", index=False, quoting=csv.QUOTE_NONE, escapechar="\\")
+    print(f"Wrote {len(df)} rows -> {OUTPUT_TSV}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/extract_wiki_zim.py b/scripts/extract_wiki_zim.py
new file mode 100644
index 000000000..38955b63a
--- /dev/null
+++ b/scripts/extract_wiki_zim.py
@@ -0,0 +1,103 @@
+import shutil
+import re
+from bs4 import BeautifulSoup
+import os
+from libzim.reader import Archive
+from libzim.search import Query, Searcher
+import csv
+from slugify import slugify
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+INPUT_TSV = os.path.abspath(os.path.join(BASE_DIR, "../data/raw/imdb_datasets/title.basics.tsv"))
+OUTPUT_DIR = os.path.abspath(os.path.join(BASE_DIR, "../data/processed/wikipedia_html"))
+ZIM_PATH = os.path.abspath(os.path.join(BASE_DIR, "../data/raw/wikipedia/wikipedia_en_all_maxi_2025-08.zim"))
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+zim = Archive(ZIM_PATH)
+searcher = Searcher(zim)
+print("The Zim file is now opened")
+
+
+def sanitize_slug(slug):
+    return slugify(slug, separator="_", max_length=200) or "_unknown"
+
+#Fetch the html AND the images and put them in a folder
+def fetch_wikipedia_html_with_images(query, save_dir):
+    q = Query().set_query(query)
+    search = searcher.search(q)
+    if search.getEstimatedMatches() == 0:
+        return None
+    results = list(search.getResults(0, 5))
+    best_path = results[0]
+    try:
+        entry = zim.get_entry_by_path(best_path)
+        item = entry.get_item()
+        html_content = bytes(item.content).decode("UTF-8")
+    except Exception:
+        return None
+    soup = BeautifulSoup(html_content, "html.parser")
+    for img in soup.find_all("img"):
+        src = img.get("src")
+        if not src:
+            continue
+        img_path = src.lstrip("/")
+        try:
+            img_entry = zim.get_entry_by_path(img_path)
+            img_bytes = bytes(img_entry.get_item().content)
+        except Exception:
+            continue
+        img_name = os.path.basename(img_path)
+        img_file_path = os.path.join(save_dir, img_name)
+        with open(img_file_path, "wb") as f:
+            f.write(img_bytes)
+        img["src"] = img_name
+    return str(soup), best_path
+
+#Go through each row of the tsv file and try to get the movie on wiki
+with open(INPUT_TSV, encoding="utf-8") as f:
+    reader = csv.DictReader(f, delimiter="\t")
+    for row in reader:
+        tconst = row["tconst"]
+        title = row["primaryTitle"]
+        year = row["startYear"]
+        titleType = row["titleType"]
+        if year is None or titleType != "movie":
+            print("Skipping from TSV: ", title)
+            continue
+        already_done = False
+        for d in os.listdir(OUTPUT_DIR):
+            if os.path.exists(os.path.join(OUTPUT_DIR, d, f"{tconst}.html")):
+                already_done = True
+                break
+        if already_done:
+            print(f"Skipping already processed: {tconst}")
+            continue
+        # folder for each movie
+        movie_dir = os.path.join(OUTPUT_DIR, f"_tmp_{tconst}")
+        os.makedirs(movie_dir, exist_ok=True)
+        query = f"{title} ({year} film)" if year != "\\N" else title #if year not empty
+        print(f"fetching Wikipedia HTML + images for {tconst}: {query}")
+        result = fetch_wikipedia_html_with_images(query, movie_dir)
+        if result is None:
+            print("Wikipedia fetch failed")
+            shutil.rmtree(movie_dir, ignore_errors=True)
+            continue
+        else:
+            html_with_images, slug = result
+        slug_dir = os.path.join(OUTPUT_DIR, sanitize_slug(slug))
+        if html_with_images:
+            if "Directed by" not in html_with_images:
+                shutil.rmtree(movie_dir, ignore_errors=True)
+                continue
+            if os.path.exists(slug_dir):
+                shutil.rmtree(movie_dir, ignore_errors=True)
+            else:
+                os.rename(movie_dir, slug_dir)
+            outfile = os.path.join(slug_dir, f"{tconst}.html")
+            if os.path.exists(outfile):
+                continue
+            with open(outfile, "w", encoding="utf-8") as out:
+                out.write(html_with_images)
+        else:
+            shutil.rmtree(movie_dir, ignore_errors=True)
+            print(f"no Wikipedia page found for {query}")
\ No newline at end of file
diff --git a/scripts/rank_cols.py b/scripts/rank_cols.py
new file mode 100644
index 000000000..03aa1ed94
--- /dev/null
+++ b/scripts/rank_cols.py
@@ -0,0 +1,63 @@
+import os
+import csv
+import sys
+from collections import defaultdict
+from tqdm import tqdm
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+TSV_PATH = os.path.join(BASE_DIR, "../data/processed/spreadsheet/wikipedia_metadata3.tsv")
+OUTPUT_PATH = os.path.join(BASE_DIR, "../data/processed/spreadsheet/rank_cols_output.txt")
+
+csv.field_size_limit(min(sys.maxsize, 2**31 - 1)) # try to increase max buffer so it doesn't fail
+#https://stackoverflow.com/questions/53538888/counting-csv-column-occurrences-on-the-fly-in-python
+
+def main():
+    lines = []
+
+    def log(msg=""):
+        print(msg)
+        lines.append(str(msg))
+
+    log(f"Reading: {TSV_PATH}")
+
+    file_size = os.path.getsize(TSV_PATH)
+    col_filled = defaultdict(int)
+    row_count = 0
+
+    with open(TSV_PATH, encoding="utf-8", buffering=4 * 1024 * 1024) as f:
+        reader = csv.reader(f, delimiter="\t")
+        headers = next(reader)
+        num_cols = len(headers)
+
+        with tqdm(total=file_size, unit="B", unit_scale=True, unit_divisor=1024, desc="Processing") as pbar:
+            for row in reader:
+                row_count += 1
+                for i, val in enumerate(row):
+                    if val and val.strip():
+                        col_filled[headers[i]] += 1
+                pbar.update(sum(map(len, row)) + num_cols) #progress bar
+
+    log(f"\nTotal rows: {row_count:,}")
+    log(f"Total columns: {num_cols}\n")
+
+    ranked = sorted(
+        headers,
+        key=lambda c: col_filled.get(c, 0) / row_count,
+        reverse=True,
+    )
+
+    log(f"{'#':<5} {'Column':<40} {'Filled':>10} {'Total':>10} {'Fill %':>8}")
+    log("-" * 75)
+    for i, col in enumerate(ranked, 1):
+        filled = col_filled.get(col, 0)
+        pct = filled / row_count * 100
+        log(f"{i:<5} {col:<40} {filled:>10,} {row_count:>10,} {pct:>7.1f}%")
+
+    with open(OUTPUT_PATH, "w", encoding="utf-8") as out:
+        out.write("\n".join(lines))
+
+    print(f"\nOutput written to: {OUTPUT_PATH}")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/scrape_wiki.py b/scripts/scrape_wiki.py
new file mode 100644
index 000000000..8c7c1e7bd
--- /dev/null
+++ b/scripts/scrape_wiki.py
@@ -0,0 +1,69 @@
+import csv
+import os
+import requests
+from time import sleep
+
+HEADERS = {"User-Agent": "cse881"}
+SEARCH_URL = "https://en.wikipedia.org/w/api.php"
+BASE_URL = "https://en.wikipedia.org/api/rest_v1"
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+
+INPUT_TSV = os.path.abspath(os.path.join(BASE_DIR, "../data/raw/imdb_datasets/title.basics.test.tsv"))
+OUTPUT_DIR = os.path.abspath(os.path.join(BASE_DIR, "../data/raw/wikipedia/wikipedia_html"))
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+def fetch_wikipedia_html(query):
+    params = {
+        "action": "query",
+        "list": "search",
+        "srsearch": query,
+        "format": "json"
+    }
+
+    resp = requests.get(SEARCH_URL, params=params, headers=HEADERS).json()
+    results = resp.get("query", {}).get("search", [])
+
+    if not results:
+        return None
+
+    best_title = results[0]["title"]
+    wiki_title = best_title.replace(" ", "_")
+    html_url = f"{BASE_URL}/page/html/{wiki_title}"
+    r = requests.get(html_url, headers=HEADERS)
+
+    if r.status_code != 200:
+        return None
+    return r.text
+
+
+with open(INPUT_TSV, encoding="utf-8") as f:
+    print("Opened file:", INPUT_TSV)
+    print("First 500 chars:")
+    print(f.read(500))
+    f.seek(0)
+
+    reader = csv.DictReader(f, delimiter="\t")
+    for row in reader:
+        tconst = row["tconst"]
+        title = row["primaryTitle"]
+        year = row["startYear"]
+        outfile = os.path.join(OUTPUT_DIR, f"{tconst}.html")
+        print(outfile)
+
+        if os.path.exists(outfile):
+            print(f"Skipping {tconst}: {query}")
+            continue #if exists, skip
+
+        query = f"{title} {year}" if year != "\\N" else title
+        print(f"Fetching Wikipedia for {tconst}: {query}")
+        html = fetch_wikipedia_html(query)
+        if html:
+            with open(outfile, "w", encoding="utf-8") as out:
+                out.write(html)
+        else:
+            print(f"No Wikipedia page found")
+        sleep(0.5)
+print("Completed")
+
+#https://en.wikipedia.org/w/index.php?api=wmf-restbase&title=Special%3ARestSandbox#/Page%20content/get_page_summary__title_