From cb9ba53e1c88c8dd2280dc7c859ac146c654d5ca Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 9 Jun 2024 20:07:55 -0700 Subject: [PATCH] Removed dependency on csv gem for load_movielens --- CHANGELOG.md | 4 ++++ Gemfile | 1 - gemfiles/activerecord72.gemfile | 1 - lib/disco/data.rb | 12 +++++------- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2be2f5b..c29cbcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.4.2 (unreleased) + +- Removed dependency on `csv` gem for `load_movielens` + ## 0.4.1 (2024-05-23) - Reduced memory for `item_recs` and `similar_users` diff --git a/Gemfile b/Gemfile index b88c365..394375e 100644 --- a/Gemfile +++ b/Gemfile @@ -11,4 +11,3 @@ gem "matrix" # for daru gem "rover-df" gem "ngt", ">= 0.3.0" gem "faiss" -gem "csv" diff --git a/gemfiles/activerecord72.gemfile b/gemfiles/activerecord72.gemfile index 62d00de..ba6b453 100644 --- a/gemfiles/activerecord72.gemfile +++ b/gemfiles/activerecord72.gemfile @@ -11,4 +11,3 @@ gem "matrix" # for daru gem "rover-df" gem "ngt", ">= 0.3.0" gem "faiss" -gem "csv" diff --git a/lib/disco/data.rb b/lib/disco/data.rb index 640053a..4c299e6 100644 --- a/lib/disco/data.rb +++ b/lib/disco/data.rb @@ -1,23 +1,21 @@ module Disco module Data def load_movielens - require "csv" - item_path = download_file("ml-100k/u.item", "https://files.grouplens.org/datasets/movielens/ml-100k/u.item", file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701") data_path = download_file("ml-100k/u.data", "https://files.grouplens.org/datasets/movielens/ml-100k/u.data", file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490") - # convert u.item to utf-8 - movies_str = File.read(item_path).encode("UTF-8", "ISO-8859-1") - movies = {} - CSV.parse(movies_str, col_sep: "|") do |row| + File.foreach(item_path) do |line| + # convert u.item to utf-8 + row = line.encode("UTF-8", "ISO-8859-1").split("|") movies[row[0]] = row[1] end data = [] - CSV.foreach(data_path, col_sep: "\t") do |row| + File.foreach(data_path) do |line| + row = line.split("\t") data << { user_id: row[0].to_i, item_id: movies[row[1]],