How can I make sure that my novelty measurement is good enough for the recommendation system? I cannot do an online survey or testing with hundreds of people. My goal is to input a product and with my hybrid approach (content and item-based collaborative filtering) output novel products that are still relevant to my filtering. I do not know if the approach for modelling is good at all. As well as not knowing how to accurately evaluate and validate it.
def evaluate_model_with_novelty(test_data, hybrid_recommendation_model, item_user_matrix, top_n=5): total_novelty = 0.0 total_users = 0 for user_id in test_data['author_id'].unique(): user_data = test_data[test_data['author_id'] == user_id] past_interactions = set(user_data['product_id']) # product_id is present in item_user_matrix index product_id = user_data['product_id'].iloc[0] if product_id not in item_user_matrix.index: print(f"Warning: Product ID {product_id} not found in item_user_matrix index.") continue recommendations = hybrid_recommendation_model(product_id, item_user_matrix, top_n=top_n) recommended_products = [product_id for product_id, _ in recommendations] # Calculate novelty as the percentage of recommended items that are not in past interactions num_novel_recommendations = len(set(recommended_products) - past_interactions) novelty = num_novel_recommendations / len(recommended_products) if len(recommended_products) > 0 else 0 total_novelty += novelty total_users += 1 return total_novelty / total_users# Function to train hybrid recommender systemdef train_hybrid_recommendation_model(train_data, item_user_matrix, item_similarity, tfidf_matrix, product_name_dict): return lambda product_id, item_user_matrix, top_n: hybrid_recommendation(product_id, item_user_matrix, item_similarity, tfidf_matrix, product_name_dict, top_n=top_n)# Example usagetrain_data, test_data = split_data(data)product_name_dict = get_product_name_dict(data)# Train the hybrid recommender systemhybrid_recommendation_model = train_hybrid_recommendation_model(train_data, item_user_matrix, item_similarity, tfidf_matrix, product_name_dict)# Evaluate the model for noveltynovelty_score = evaluate_model_with_novelty(test_data, hybrid_recommendation_model, item_user_matrix, top_n=5)print("Novelty Score:", novelty_score)