- Delete the folders if we skipped a movie due to not being found

This commit is contained in:
prabhaavp
2026-03-10 13:17:21 -04:00
parent 0ac1234afa
commit 36af063777

View File

@@ -60,14 +60,15 @@ with open(INPUT_TSV, encoding="utf-8") as f:
# folder for each movie # folder for each movie
movie_dir = os.path.join(OUTPUT_DIR, tconst) movie_dir = os.path.join(OUTPUT_DIR, tconst)
os.makedirs(movie_dir, exist_ok=True) os.makedirs(movie_dir, exist_ok=True)
outfile = os.path.join(movie_dir, f"{tconst}.html")
if os.path.exists(outfile):
continue
query = f"{title} ({year} film)" if year != "\\N" else title #if year not empty query = f"{title} ({year} film)" if year != "\\N" else title #if year not empty
print(f"fetching Wikipedia HTML + images for {tconst}: {query}") print(f"fetching Wikipedia HTML + images for {tconst}: {query}")
html_with_images = fetch_wikipedia_html_with_images(query, movie_dir) html_with_images = fetch_wikipedia_html_with_images(query, movie_dir)
if html_with_images: if html_with_images:
if "Directed by" not in html_with_images: if "Directed by" not in html_with_images:
os.rmdir(movie_dir)
continue
outfile = os.path.join(movie_dir, f"{tconst}.html")
if os.path.exists(outfile):
continue continue
with open(outfile, "w", encoding="utf-8") as out: with open(outfile, "w", encoding="utf-8") as out:
out.write(html_with_images) out.write(html_with_images)