parent
36cffbfe04
commit
b73d40fcc4
@ -0,0 +1,105 @@
|
|||||||
|
import csv
|
||||||
|
import os
|
||||||
|
from typing import List
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def levenshtein_distance(s1, s2):
|
||||||
|
if len(s1) < len(s2):
|
||||||
|
return levenshtein_distance(s2, s1)
|
||||||
|
|
||||||
|
if len(s2) == 0:
|
||||||
|
return len(s1)
|
||||||
|
|
||||||
|
previous_row = range(len(s2) + 1)
|
||||||
|
for i, c1 in enumerate(s1):
|
||||||
|
current_row = [i + 1]
|
||||||
|
for j, c2 in enumerate(s2):
|
||||||
|
insertions = previous_row[j + 1] + 1
|
||||||
|
deletions = current_row[j] + 1
|
||||||
|
substitutions = previous_row[j] + (c1 != c2)
|
||||||
|
current_row.append(min(insertions, deletions, substitutions))
|
||||||
|
previous_row = current_row
|
||||||
|
|
||||||
|
return previous_row[-1]
|
||||||
|
|
||||||
|
def find_closest_actor_name(input_name, actor_names, actor_ids):
|
||||||
|
closest_name = None
|
||||||
|
closest_id = None
|
||||||
|
min_distance = float('inf')
|
||||||
|
for i in range(len(actor_names)):
|
||||||
|
actor_name = actor_names[i]
|
||||||
|
actor_id = actor_ids[i]
|
||||||
|
distance = levenshtein_distance(input_name, actor_name)
|
||||||
|
if distance < min_distance:
|
||||||
|
min_distance = distance
|
||||||
|
closest_name = actor_name
|
||||||
|
closest_id = actor_id
|
||||||
|
return (closest_name,closest_id)
|
||||||
|
|
||||||
|
def getUniqueActorNames(filePath):
|
||||||
|
# Lire le fichier TSV
|
||||||
|
df = pd.read_csv(filePath, sep='\t')
|
||||||
|
actor_names = df['primaryName'].tolist()
|
||||||
|
actor_ids = df['nconst'].tolist() # Supposer que la colonne s'appelle 'ActorName'
|
||||||
|
# Supposer que la colonne s'appelle 'ActorName'
|
||||||
|
return (actor_names,actor_ids)
|
||||||
|
|
||||||
|
def saveUniqueActorsSorted(inputFilePath, outputFilePath):
|
||||||
|
# Lire le fichier TSV
|
||||||
|
df = pd.read_csv(inputFilePath, sep='\t')
|
||||||
|
|
||||||
|
# Trier le DataFrame par 'primaryName' en ordre alphabétique
|
||||||
|
df_sorted = df.sort_values(by='primaryName')
|
||||||
|
|
||||||
|
# Écrire les données triées dans le fichier CSV
|
||||||
|
with open(outputFilePath, mode='w', newline='', encoding='utf-8') as file:
|
||||||
|
writer = csv.writer(file, delimiter='\t')
|
||||||
|
|
||||||
|
# Écrire l'en-tête
|
||||||
|
writer.writerow(['primaryName', 'nconst'])
|
||||||
|
|
||||||
|
# Écrire chaque ligne du DataFrame trié dans le fichier CSV
|
||||||
|
for index, row in df_sorted.iterrows():
|
||||||
|
writer.writerow([row['primaryName'], row['nconst']])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
mustContinue = True
|
||||||
|
if not os.path.exists("processedData/uniqueActorNames.tsv"):
|
||||||
|
saveUniqueActorsSorted("processedData/actorsRatingsGroupedWithName.tsv", "processedData/uniqueActorNames.tsv")
|
||||||
|
tupleActors = getUniqueActorNames("processedData/uniqueActorNames.tsv")
|
||||||
|
selectedActorNames = []
|
||||||
|
|
||||||
|
|
||||||
|
print("Bienvenue dans MoviePrecog!")
|
||||||
|
while(mustContinue):
|
||||||
|
print("1: Ajouter un acteur à la liste (4 acteurs / actrices requis)")
|
||||||
|
print("2: Afficher la liste")
|
||||||
|
print("3: Vider la liste")
|
||||||
|
print("4: Lancer la prévision")
|
||||||
|
print("5: Quitter")
|
||||||
|
userChoice = input("Votre choix: ")
|
||||||
|
if(userChoice == "1"):
|
||||||
|
actorName = input("Quel acteur voulez-vous chercher ? ")
|
||||||
|
actorFound = find_closest_actor_name(actorName, tupleActors[0], tupleActors[1])
|
||||||
|
print("L'acteur/actrice trouvé est: " + actorFound)
|
||||||
|
sublevelMustContinue = True
|
||||||
|
leave = False
|
||||||
|
correctActorChoice = input("Est-ce le bon acteur/actrice ? (o = oui, n = non)")
|
||||||
|
if(correctActorChoice == "n"):
|
||||||
|
while(sublevelMustContinue):
|
||||||
|
leave = input("Sortir ou relancer ? (sortir = s, relancer = r)")
|
||||||
|
if(leave == "r"):
|
||||||
|
actorName = input("Quel acteur voulez-vous chercher ? ")
|
||||||
|
actorFound = find_closest_actor_name(actorName, tupleActors[0], tupleActors[1])
|
||||||
|
print("L'acteur/actrice trouvé est: " + actorFound)
|
||||||
|
sublevelMustContinue = True
|
||||||
|
correctActorChoice = input("Est-ce le bon acteur/actrice ? (o = oui, n = non)")
|
||||||
|
if(correctActorChoice == o):
|
||||||
|
sublevelMustContinue = False
|
||||||
|
else if(leave != "r" or (leave != "s" and leave ))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print(find_closest_actor_name("Robert downey jr",tupleActors[0],tupleActors[1]))
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue