Revisions to Zim parsing, netflix parsing, and updates to html scraping to include synopsis

This commit is contained in:
prabhaavp
2026-03-19 01:56:14 -04:00
parent 0a70920ba9
commit 492160c3a3
13 changed files with 252 additions and 63 deletions

11
.idea/datamining_881_new.iml generated Normal file
View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
<excludeFolder url="file://$MODULE_DIR$/data/processed/wikipedia_html" />
</content>
<orderEntry type="jdk" jdkName="Python 3.13 (datamining_881_new)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>