parent
36cffbfe04
commit
b73d40fcc4
@ -0,0 +1,105 @@
|
||||
import csv
|
||||
import os
|
||||
from typing import List
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
def levenshtein_distance(s1, s2):
|
||||
if len(s1) < len(s2):
|
||||
return levenshtein_distance(s2, s1)
|
||||
|
||||
if len(s2) == 0:
|
||||
return len(s1)
|
||||
|
||||
previous_row = range(len(s2) + 1)
|
||||
for i, c1 in enumerate(s1):
|
||||
current_row = [i + 1]
|
||||
for j, c2 in enumerate(s2):
|
||||
insertions = previous_row[j + 1] + 1
|
||||
deletions = current_row[j] + 1
|
||||
substitutions = previous_row[j] + (c1 != c2)
|
||||
current_row.append(min(insertions, deletions, substitutions))
|
||||
previous_row = current_row
|
||||
|
||||
return previous_row[-1]
|
||||
|
||||
def find_closest_actor_name(input_name, actor_names, actor_ids):
|
||||
closest_name = None
|
||||
closest_id = None
|
||||
min_distance = float('inf')
|
||||
for i in range(len(actor_names)):
|
||||
actor_name = actor_names[i]
|
||||
actor_id = actor_ids[i]
|
||||
distance = levenshtein_distance(input_name, actor_name)
|
||||
if distance < min_distance:
|
||||
min_distance = distance
|
||||
closest_name = actor_name
|
||||
closest_id = actor_id
|
||||
return (closest_name,closest_id)
|
||||
|
||||
def getUniqueActorNames(filePath):
|
||||
# Lire le fichier TSV
|
||||
df = pd.read_csv(filePath, sep='\t')
|
||||
actor_names = df['primaryName'].tolist()
|
||||
actor_ids = df['nconst'].tolist() # Supposer que la colonne s'appelle 'ActorName'
|
||||
# Supposer que la colonne s'appelle 'ActorName'
|
||||
return (actor_names,actor_ids)
|
||||
|
||||
def saveUniqueActorsSorted(inputFilePath, outputFilePath):
|
||||
# Lire le fichier TSV
|
||||
df = pd.read_csv(inputFilePath, sep='\t')
|
||||
|
||||
# Trier le DataFrame par 'primaryName' en ordre alphabétique
|
||||
df_sorted = df.sort_values(by='primaryName')
|
||||
|
||||
# Écrire les données triées dans le fichier CSV
|
||||
with open(outputFilePath, mode='w', newline='', encoding='utf-8') as file:
|
||||
writer = csv.writer(file, delimiter='\t')
|
||||
|
||||
# Écrire l'en-tête
|
||||
writer.writerow(['primaryName', 'nconst'])
|
||||
|
||||
# Écrire chaque ligne du DataFrame trié dans le fichier CSV
|
||||
for index, row in df_sorted.iterrows():
|
||||
writer.writerow([row['primaryName'], row['nconst']])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mustContinue = True
|
||||
if not os.path.exists("processedData/uniqueActorNames.tsv"):
|
||||
saveUniqueActorsSorted("processedData/actorsRatingsGroupedWithName.tsv", "processedData/uniqueActorNames.tsv")
|
||||
tupleActors = getUniqueActorNames("processedData/uniqueActorNames.tsv")
|
||||
selectedActorNames = []
|
||||
|
||||
|
||||
print("Bienvenue dans MoviePrecog!")
|
||||
while(mustContinue):
|
||||
print("1: Ajouter un acteur à la liste (4 acteurs / actrices requis)")
|
||||
print("2: Afficher la liste")
|
||||
print("3: Vider la liste")
|
||||
print("4: Lancer la prévision")
|
||||
print("5: Quitter")
|
||||
userChoice = input("Votre choix: ")
|
||||
if(userChoice == "1"):
|
||||
actorName = input("Quel acteur voulez-vous chercher ? ")
|
||||
actorFound = find_closest_actor_name(actorName, tupleActors[0], tupleActors[1])
|
||||
print("L'acteur/actrice trouvé est: " + actorFound)
|
||||
sublevelMustContinue = True
|
||||
leave = False
|
||||
correctActorChoice = input("Est-ce le bon acteur/actrice ? (o = oui, n = non)")
|
||||
if(correctActorChoice == "n"):
|
||||
while(sublevelMustContinue):
|
||||
leave = input("Sortir ou relancer ? (sortir = s, relancer = r)")
|
||||
if(leave == "r"):
|
||||
actorName = input("Quel acteur voulez-vous chercher ? ")
|
||||
actorFound = find_closest_actor_name(actorName, tupleActors[0], tupleActors[1])
|
||||
print("L'acteur/actrice trouvé est: " + actorFound)
|
||||
sublevelMustContinue = True
|
||||
correctActorChoice = input("Est-ce le bon acteur/actrice ? (o = oui, n = non)")
|
||||
if(correctActorChoice == o):
|
||||
sublevelMustContinue = False
|
||||
else if(leave != "r" or (leave != "s" and leave ))
|
||||
|
||||
|
||||
|
||||
print(find_closest_actor_name("Robert downey jr",tupleActors[0],tupleActors[1]))
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue