preprocessing changes
This commit is contained in:
@@ -52,6 +52,20 @@ def get_genre(row):
|
|||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def pre_director(text):
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
text = text.lower().strip()
|
||||||
|
return text
|
||||||
|
|
||||||
|
def clean_cast(text, top_k=5):
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
text = text.lower()
|
||||||
|
cast_list = [actor.strip() for actor in text.split(",")]
|
||||||
|
cast_list = [actor for actor in cast_list if actor]
|
||||||
|
return cast_list
|
||||||
|
|
||||||
# print(df.columns)
|
# print(df.columns)
|
||||||
|
|
||||||
# df['preprocessed'] = df['Plot'].apply(clean_text)
|
# df['preprocessed'] = df['Plot'].apply(clean_text)
|
||||||
|
|||||||
Reference in New Issue
Block a user