Source code for newsreclib.data.components.adressa_user_info

# Adapted from https://github.com/yjw1029/Efficient-FedRec/blob/839f967c1ed1c0cb0b1b4d670828437ffb712f29/preprocess/adressa_raw.py

from typing import List, Optional

import numpy as np


[docs]class UserInfo: """ Attributes: train_date_split: A string with the date before which click behaviors are included in the history of a user. test_date_split: A string with the date after which click behaviors are included in the test set. """ def __init__( self, train_date_split: int, test_date_split: int, ) -> None: self.hist_news = [] self.hist_time = [] self.train_news = [] self.train_time = [] self.test_news = [] self.test_time = [] self.train_date_split = train_date_split self.test_date_split = test_date_split
[docs] def update(self, nindex: int, click_time: int, date: str): """ Args: nindex: The index of a news article. click_time: The time when the user clicked on the news article. date: The processed click time used to assign the sample into the `history` of the user, the `train` or the `test` set. """ if date >= self.train_date_split and date < self.test_date_split: self.train_news.append(nindex) self.train_time.append(click_time) elif date >= self.test_date_split: self.test_news.append(nindex) self.test_time.append(click_time) else: self.hist_news.append(nindex) self.hist_time.append(click_time)
[docs] def sort_click(self): """Sorts user clicks by time in ascending order.""" self.train_news = np.array(self.train_news, dtype="int32") self.train_time = np.array(self.train_time, dtype="int32") self.test_news = np.array(self.test_news, dtype="int32") self.test_time = np.array(self.test_time, dtype="int32") self.hist_news = np.array(self.hist_news, dtype="int32") self.hist_time = np.array(self.hist_time, dtype="int32") order = np.argsort(self.train_time) self.train_news = self.train_news[order] self.train_time = self.train_time[order] order = np.argsort(self.test_time) self.test_news = self.test_news[order] self.test_time = self.test_time[order] order = np.argsort(self.hist_time) self.hist_news = self.hist_news[order] self.hist_time = self.hist_time[order]