From 36af063777eace996fd62ae11c9e07fda638f1c7 Mon Sep 17 00:00:00 2001 From: prabhaavp Date: Tue, 10 Mar 2026 13:17:21 -0400 Subject: [PATCH] - Delete the folders if we skipped a movie due to not being found --- scripts/extract_wiki_zim.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/extract_wiki_zim.py b/scripts/extract_wiki_zim.py index df15ec220..ec7854904 100644 --- a/scripts/extract_wiki_zim.py +++ b/scripts/extract_wiki_zim.py @@ -60,14 +60,15 @@ with open(INPUT_TSV, encoding="utf-8") as f: # folder for each movie movie_dir = os.path.join(OUTPUT_DIR, tconst) os.makedirs(movie_dir, exist_ok=True) - outfile = os.path.join(movie_dir, f"{tconst}.html") - if os.path.exists(outfile): - continue query = f"{title} ({year} film)" if year != "\\N" else title #if year not empty print(f"fetching Wikipedia HTML + images for {tconst}: {query}") html_with_images = fetch_wikipedia_html_with_images(query, movie_dir) if html_with_images: if "Directed by" not in html_with_images: + os.rmdir(movie_dir) + continue + outfile = os.path.join(movie_dir, f"{tconst}.html") + if os.path.exists(outfile): continue with open(outfile, "w", encoding="utf-8") as out: out.write(html_with_images)