From 0d0b043e40c09e747a4741aa25ce32eec3e31dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcus=20Vin=C3=ADcius=20de=20Carvalho?= Date: Mon, 4 Oct 2021 18:34:15 +0800 Subject: [PATCH] Add files via upload --- ACDC.py | 1548 ++++++++++++++++++++++++++++++++++++++++ ACDCDataManipulator.py | 1236 ++++++++++++++++++++++++++++++++ ACDC_Ablation_A.py | 1323 ++++++++++++++++++++++++++++++++++ ACDC_Ablation_B.py | 1323 ++++++++++++++++++++++++++++++++++ ACDC_Ablation_C.py | 1323 ++++++++++++++++++++++++++++++++++ ACDC_Ablation_D.py | 1322 ++++++++++++++++++++++++++++++++++ AutoEncoder.py | 191 +++++ ElasticNodes.py | 94 +++ LICENSE | 54 +- MySingletons.py | 120 ++++ MyUtil.py | 64 ++ NeuralNetwork.py | 579 +++++++++++++++ 12 files changed, 9156 insertions(+), 21 deletions(-) create mode 100644 ACDC.py create mode 100644 ACDCDataManipulator.py create mode 100644 ACDC_Ablation_A.py create mode 100644 ACDC_Ablation_B.py create mode 100644 ACDC_Ablation_C.py create mode 100644 ACDC_Ablation_D.py create mode 100644 AutoEncoder.py create mode 100644 ElasticNodes.py create mode 100644 MySingletons.py create mode 100644 MyUtil.py create mode 100644 NeuralNetwork.py diff --git a/ACDC.py b/ACDC.py new file mode 100644 index 0000000..d220aef --- /dev/null +++ b/ACDC.py @@ -0,0 +1,1548 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +from ACDCDataManipulator import DataManipulator +from NeuralNetwork import NeuralNetwork +from AutoEncoder import DenoisingAutoEncoder +from MySingletons import MyDevice +from colorama import Fore, Back, Style +from itertools import cycle +import numpy as np +import matplotlib.pylab as plt +import math +import torch +import time + + +def __copy_weights(source: NeuralNetwork, targets: list, layer_numbers=None, copy_moment: bool = True): + if layer_numbers is None: + layer_numbers = [1] + if type(targets) is not list: + targets = [targets] + for layer_number in layer_numbers: + layer_number -= 1 + for target in targets: + if layer_number >= source.number_hidden_layers: + target.output_weight = source.output_weight.detach() + target.output_bias = source.output_bias.detach() + if copy_moment: + target.output_momentum = source.output_momentum.detach() + target.output_bias_momentum = source.output_bias_momentum.detach() + else: + target.weight[layer_number] = source.weight[layer_number].detach() + target.bias[layer_number] = source.bias[layer_number].detach() + if copy_moment: + target.momentum[layer_number] = source.momentum[layer_number].detach() + target.bias_momentum[layer_number] = source.bias_momentum[layer_number].detach() + + +def __grow_nodes(*networks): + origin = networks[0] + if origin.growable[origin.number_hidden_layers]: + nodes = 1 + for i in range(nodes): + for network in networks: + network.grow_node(origin.number_hidden_layers) + return True + else: + return False + + +def __prune_nodes(*networks): + origin = networks[0] + if origin.prunable[origin.number_hidden_layers][0] >= 0: + nodes_to_prune = origin.prunable[origin.number_hidden_layers].tolist() + for network in networks: + for node_to_prune in nodes_to_prune[::-1]: + network.prune_node(origin.number_hidden_layers, node_to_prune) + return True + return False + + +def __width_evolution(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None): + if y is None: + y = x + + network.feedforward(x, y) + network.width_adaptation_stepwise(y) + + +def __discriminative(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, is_neg_grad: bool = False): + y = x.detach() if y is None else y + network.train(x=x, y=y, is_neg_grad=is_neg_grad) + + +def __generative(network: DenoisingAutoEncoder, x: torch.tensor, y: torch.tensor = None, + is_tied_weight=True, noise_ratio=0.1, glw_epochs: int = 1): + y = x.detach() if y is None else y + network.greedy_layer_wise_pretrain(x=x, number_epochs=glw_epochs, noise_ratio=noise_ratio) + network.train(x=x, y=y, noise_ratio=noise_ratio, is_tied_weight=is_tied_weight) + + +def __test(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, + is_source: bool = False, is_discriminative: bool = False, metrics=None): + with torch.no_grad(): + y = x.detach() if y is None else y + network.test(x=x, y=y) + + if is_source: + if is_discriminative: + metrics['classification_rate_source'].append(network.classification_rate) + metrics['classification_source_loss'].append(float(network.loss_value)) + metrics['classification_source_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_source_loss'].append(float(network.loss_value)) + else: + if is_discriminative: + metrics['classification_rate_target'].append(network.classification_rate) + metrics['classification_target_loss'].append(float(network.loss_value)) + metrics['classification_target_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_target_loss'].append(float(network.loss_value)) + + +def __force_same_size(a_tensor, b_tensor, shuffle=True, strategy='max'): + common = np.min([a_tensor.shape[0], b_tensor.shape[0]]) + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + if strategy == 'max': + if math.ceil(a_tensor.shape[0] / common) <= math.ceil(b_tensor.shape[0] / common): + b_tensor = torch.stack(list(target for target, source in zip(b_tensor, cycle(a_tensor)))) + a_tensor = torch.stack(list(source for target, source in zip(b_tensor, cycle(a_tensor)))) + else: + b_tensor = torch.stack(list(target for target, source in zip(cycle(b_tensor), a_tensor))) + a_tensor = torch.stack(list(source for target, source in zip(cycle(b_tensor), a_tensor))) + + elif strategy == 'min': + a_tensor = a_tensor[:common] + b_tensor = b_tensor[:common] + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + return a_tensor, b_tensor + + +def __print_annotation(lst): + def custom_range(xx): + step = int(len(xx) * 0.25) - 1 + return range(0, len(xx), 1 if step == 0 else step) + + for idx in custom_range(lst): + pos = lst[idx] if isinstance(lst[idx], (int, float, np.int32)) else lst[idx][0] + plt.annotate(format(pos, '.2f'), (idx, pos)) + pos = lst[-1] if isinstance(lst[-1], (int, float, np.int32)) else lst[-1][0] + plt.annotate(format(pos, '.2f'), (len(lst), pos)) + + +def __plot_time(train_time: np.ndarray, + test_time: np.ndarray, + annotation=True): + plt.title('Processing time') + plt.ylabel('Seconds') + plt.xlabel('Minibatches') + + plt.plot(train_time, linewidth=1, + label=('Train time: %f (Mean) %f (Accumulated)' % + (np.nanmean(train_time), np.sum(train_time)))) + plt.plot(test_time, linewidth=1, + label=('Test time: %f (Mean) %f (Accumulated)' % + (np.nanmean(test_time), np.sum(test_time)))) + plt.legend() + + if annotation: + __print_annotation(train_time) + __print_annotation(test_time) + + plt.tight_layout() + plt.show() + + +def __plot_node_evolution(nodes_discriminator: np.ndarray, + nodes_domain_classifier: np.ndarray, + nodes_feature_extraction: np.ndarray, + annotation=True): + plt.title('Node evolution') + plt.ylabel('Nodes') + plt.xlabel('Minibatches') + + plt.plot(nodes_discriminator, linewidth=1, + label=('Discriminator HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_discriminator), nodes_discriminator[-1]))) + plt.plot(nodes_domain_classifier, linewidth=1, + label=('Domain Classifier HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_domain_classifier), nodes_domain_classifier[-1]))) + plt.plot(nodes_feature_extraction, linewidth=1, + label=('Feature Extraction HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_feature_extraction), nodes_feature_extraction[-1]))) + plt.legend() + + if annotation: + __print_annotation(nodes_discriminator) + __print_annotation(nodes_domain_classifier) + __print_annotation(nodes_feature_extraction) + + plt.tight_layout() + plt.show() + + +def __plot_losses(classification_source_loss: np.ndarray, + classification_target_loss: np.ndarray, + reconstruction_source_loss: np.ndarray, + reconstruction_target_loss: np.ndarray, + domain_classifier_loss: np.ndarray, + annotation=True): + plt.title('Losses evolution') + plt.ylabel('Loss value') + plt.xlabel('Minibatches') + + plt.plot(classification_source_loss, linewidth=1, + label=('Classification Source Loss mean: %f' % + (np.nanmean(classification_source_loss)))) + plt.plot(classification_target_loss, linewidth=1, + label=('Classification Target Loss mean: %f' % + (np.nanmean(classification_target_loss)))) + plt.plot(reconstruction_source_loss, linewidth=1, + label=('Reconstruction Source Loss mean: %f' % + (np.nanmean(reconstruction_source_loss)))) + plt.plot(reconstruction_target_loss, linewidth=1, + label=('Reconstruction Target Loss mean: %f' % + (np.nanmean(reconstruction_target_loss)))) + plt.plot(domain_classifier_loss, linewidth=1, + label=('Domain Classifier Loss mean: %f' % + (np.nanmean(domain_classifier_loss)))) + plt.legend() + + if annotation: + __print_annotation(classification_source_loss) + __print_annotation(classification_target_loss) + __print_annotation(reconstruction_source_loss) + __print_annotation(reconstruction_target_loss) + __print_annotation(domain_classifier_loss) + + plt.tight_layout() + plt.show() + + +def __plot_classification_rates(source_rate: np.ndarray, + target_rate: np.ndarray, + domain_rate: np.ndarray, + total_source_rate: float, + total_target_rate: float, + total_domain_classification_rate: float, + annotation=True, + class_number=None): + plt.title('Source and Target Classification Rates') + plt.ylabel('Classification Rate') + plt.xlabel('Minibatches') + + plt.plot(source_rate, linewidth=1, label=('Source CR: %f (batch) | %f (dataset)' % + (np.nanmean(source_rate), total_source_rate))) + plt.plot(target_rate, linewidth=1, label=('Target CR: %f (batch) | %f (dataset)' % + (np.nanmean(target_rate), total_target_rate))) + plt.plot(domain_rate, linewidth=1, label=('Domain CR: %f (batch) | %f (dataset)' % + (np.nanmean(domain_rate), total_domain_classification_rate))) + + if annotation: + __print_annotation(source_rate) + __print_annotation(target_rate) + __print_annotation(domain_rate) + + if class_number is not None: + plt.plot(np.ones(len(source_rate)) * 1 / class_number, + linewidth=1, label='Random Classification Threshold: %f' % (1 / class_number)) + + plt.plot(np.ones(len(source_rate)) * 1 / 2, + linewidth=1, label='Random Domain Classification Threshold: %f' % (1 / 2)) + + plt.legend() + + plt.tight_layout() + plt.show() + + +def __plot_ns(bias, var, ns, annotation=True): + plt.plot(bias, linewidth=1, label=('Bias mean: %f' % (np.nanmean(bias)))) + plt.plot(var, linewidth=1, label=('Variance mean: %f' % (np.nanmean(var)))) + plt.plot(ns, linewidth=1, label=('NS (Bias + Variance) mean: %f' % (np.nanmean(ns)))) + plt.legend() + + if annotation: + __print_annotation(bias) + __print_annotation(var) + __print_annotation(ns) + + plt.tight_layout() + plt.show() + + +def __plot_discriminative_network_significance(bias, var, annotation=True): + plt.title('Discriminative Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_domain_classifier_network_significance(bias, var, annotation=True): + plt.title('Domain Classifier Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_feature_extractor_network_significance(bias, var, annotation=True): + plt.title('Feature Extractor Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __load_source_target(source: str, target: str, n_source_concept_drift: int = 1, n_target_concept_drift: int = 1): + dm_s = DataManipulator() + dm_t = DataManipulator() + + source = source.replace('_', '-').replace(' ', '-').lower() + target = target.replace('_', '-').replace(' ', '-').lower() + + if source == 'mnist-28': + dm_s.load_mnist(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-26': + dm_s.load_mnist(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-24': + dm_s.load_mnist(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-22': + dm_s.load_mnist(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-20': + dm_s.load_mnist(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-18': + dm_s.load_mnist(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-16': + dm_s.load_mnist(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-28': + dm_s.load_usps(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-26': + dm_s.load_usps(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-24': + dm_s.load_usps(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-22': + dm_s.load_usps(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-20': + dm_s.load_usps(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-18': + dm_s.load_usps(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-16': + dm_s.load_usps(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'cifar10': + dm_s.load_cifar10(n_concept_drifts=n_source_concept_drift) + elif source == 'stl10': + dm_s.load_stl10(n_concept_drifts=n_source_concept_drift) + elif source == 'london-bike': + dm_s.load_london_bike_sharing(n_concept_drifts=n_source_concept_drift) + elif source == 'washington-bike': + dm_s.load_washington_bike_sharing(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-fashion': + dm_s.load_amazon_review_fashion(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-all-beauty': + dm_s.load_amazon_review_all_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-appliances': + dm_s.load_amazon_review_appliances(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-arts-crafts-sewing': + dm_s.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-automotive': + dm_s.load_amazon_review_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-books': + dm_s.load_amazon_review_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cds-vinyl': + dm_s.load_amazon_review_cds_vinyl(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cellphones_accessories': + dm_s.load_amazon_review_cellphones_accessories(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-clothing-shoes-jewelry': + dm_s.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-digital-music': + dm_s.load_amazon_review_digital_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-electronics': + dm_s.load_amazon_review_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-gift-card': + dm_s.load_amazon_review_gift_card(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-grocery-gourmet-food': + dm_s.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-home-kitchen': + dm_s.load_amazon_review_home_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-industrial-scientific': + dm_s.load_amazon_review_industrial_scientific(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-kindle-store': + dm_s.load_amazon_review_kindle_store(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-luxury-beauty': + dm_s.load_amazon_review_luxury_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-magazine-subscription': + dm_s.load_amazon_review_magazine_subscription(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-movies-tv': + dm_s.load_amazon_review_movies_tv(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-musical-instruments': + dm_s.load_amazon_review_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-office-products': + dm_s.load_amazon_review_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-patio-lawn-garden': + dm_s.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-pet-supplies': + dm_s.load_amazon_review_pet_supplies(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-prime-pantry': + dm_s.load_amazon_review_prime_pantry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-software': + dm_s.load_amazon_review_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-sports-outdoors': + dm_s.load_amazon_review_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-tools-home-improvements': + dm_s.load_amazon_review_tools_home_improvements(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-toys-games': + dm_s.load_amazon_review_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-video-games': + dm_s.load_amazon_review_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-books': + dm_s.load_amazon_review_nips_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-dvd': + dm_s.load_amazon_review_nips_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-electronics': + dm_s.load_amazon_review_nips_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-kitchen': + dm_s.load_amazon_review_nips_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-apparel': + dm_s.load_amazon_review_acl_apparel(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-automotive': + dm_s.load_amazon_review_acl_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-baby': + dm_s.load_amazon_review_acl_baby(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-beauty': + dm_s.load_amazon_review_acl_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-books': + dm_s.load_amazon_review_acl_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-camera_photo': + dm_s.load_amazon_review_acl_camera_photo(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-cell_phones_service': + dm_s.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-computer_video_games': + dm_s.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-dvd': + dm_s.load_amazon_review_acl_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-electronics': + dm_s.load_amazon_review_acl_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-gourmet_food': + dm_s.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-grocery': + dm_s.load_amazon_review_acl_grocery(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-health_personal_care': + dm_s.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-jewelry_watches': + dm_s.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-kitchen_housewares': + dm_s.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-magazines': + dm_s.load_amazon_review_acl_magazines(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-music': + dm_s.load_amazon_review_acl_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-musical_instruments': + dm_s.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-office_products': + dm_s.load_amazon_review_acl_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-outdoor_living': + dm_s.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-software': + dm_s.load_amazon_review_acl_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-sports_outdoors': + dm_s.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-tools_hardware': + dm_s.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-toys_games': + dm_s.load_amazon_review_acl_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-video': + dm_s.load_amazon_review_acl_video(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-all': + dm_s.load_news_popularity_obama_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-all': + dm_s.load_news_popularity_economy_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-all': + dm_s.load_news_popularity_microsoft_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-all': + dm_s.load_news_popularity_palestine_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-facebook': + dm_s.load_news_popularity_obama_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-facebook': + dm_s.load_news_popularity_economy_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-facebook': + dm_s.load_news_popularity_microsoft_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-facebook': + dm_s.load_news_popularity_palestine_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-googleplus': + dm_s.load_news_popularity_obama_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-googleplus': + dm_s.load_news_popularity_economy_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-googleplus': + dm_s.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-googleplus': + dm_s.load_news_popularity_palestine_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-linkedin': + dm_s.load_news_popularity_obama_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-linkedin': + dm_s.load_news_popularity_economy_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-linkedin': + dm_s.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-linkedin': + dm_s.load_news_popularity_palestine_linkedin(n_concept_drifts=n_source_concept_drift) + + if target == 'mnist-28': + dm_t.load_mnist(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-26': + dm_t.load_mnist(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-24': + dm_t.load_mnist(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-22': + dm_t.load_mnist(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-20': + dm_t.load_mnist(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-18': + dm_t.load_mnist(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-16': + dm_t.load_mnist(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-28': + dm_t.load_usps(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-26': + dm_t.load_usps(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-24': + dm_t.load_usps(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-22': + dm_t.load_usps(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-20': + dm_t.load_usps(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-18': + dm_t.load_usps(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-16': + dm_t.load_usps(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'cifar10': + dm_t.load_cifar10(n_concept_drifts=n_target_concept_drift) + elif target == 'stl10': + dm_t.load_stl10(n_concept_drifts=n_target_concept_drift) + elif target == 'london-bike': + dm_t.load_london_bike_sharing(n_concept_drifts=n_source_concept_drift) + elif target == 'washington-bike': + dm_t.load_washington_bike_sharing(n_concept_drifts=n_source_concept_drift) + elif target == 'amazon-review-fashion': + dm_t.load_amazon_review_fashion(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-all-beauty': + dm_t.load_amazon_review_all_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-appliances': + dm_t.load_amazon_review_appliances(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-arts-crafts-sewing': + dm_t.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-automotive': + dm_t.load_amazon_review_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-books': + dm_t.load_amazon_review_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cds-vinyl': + dm_t.load_amazon_review_cds_vinyl(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cellphones_accessories': + dm_t.load_amazon_review_cellphones_accessories(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-clothing-shoes-jewelry': + dm_t.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-digital-music': + dm_t.load_amazon_review_digital_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-electronics': + dm_t.load_amazon_review_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-gift-card': + dm_t.load_amazon_review_gift_card(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-grocery-gourmet-food': + dm_t.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-home-kitchen': + dm_t.load_amazon_review_home_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-industrial-scientific': + dm_t.load_amazon_review_industrial_scientific(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-kindle-store': + dm_t.load_amazon_review_kindle_store(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-luxury-beauty': + dm_t.load_amazon_review_luxury_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-magazine-subscription': + dm_t.load_amazon_review_magazine_subscription(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-movies-tv': + dm_t.load_amazon_review_movies_tv(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-musical-instruments': + dm_t.load_amazon_review_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-office-products': + dm_t.load_amazon_review_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-patio-lawn-garden': + dm_t.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-pet-supplies': + dm_t.load_amazon_review_pet_supplies(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-prime-pantry': + dm_t.load_amazon_review_prime_pantry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-software': + dm_t.load_amazon_review_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-sports-outdoors': + dm_t.load_amazon_review_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-tools-home-improvements': + dm_t.load_amazon_review_tools_home_improvements(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-toys-games': + dm_t.load_amazon_review_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-video-games': + dm_t.load_amazon_review_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-books': + dm_t.load_amazon_review_nips_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-dvd': + dm_t.load_amazon_review_nips_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-electronics': + dm_t.load_amazon_review_nips_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-kitchen': + dm_t.load_amazon_review_nips_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-apparel': + dm_t.load_amazon_review_acl_apparel(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-automotive': + dm_t.load_amazon_review_acl_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-baby': + dm_t.load_amazon_review_acl_baby(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-beauty': + dm_t.load_amazon_review_acl_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-books': + dm_t.load_amazon_review_acl_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-camera_photo': + dm_t.load_amazon_review_acl_camera_photo(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-cell_phones_service': + dm_t.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-computer_video_games': + dm_t.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-dvd': + dm_t.load_amazon_review_acl_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-electronics': + dm_t.load_amazon_review_acl_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-gourmet_food': + dm_t.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-grocery': + dm_t.load_amazon_review_acl_grocery(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-health_personal_care': + dm_t.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-jewelry_watches': + dm_t.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-kitchen_housewares': + dm_t.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-magazines': + dm_t.load_amazon_review_acl_magazines(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-music': + dm_t.load_amazon_review_acl_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-musical_instruments': + dm_t.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-office_products': + dm_t.load_amazon_review_acl_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-outdoor_living': + dm_t.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-software': + dm_t.load_amazon_review_acl_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-sports_outdoors': + dm_t.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-tools_hardware': + dm_t.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-toys_games': + dm_t.load_amazon_review_acl_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-video': + dm_t.load_amazon_review_acl_video(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-all': + dm_t.load_news_popularity_obama_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-all': + dm_t.load_news_popularity_economy_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-all': + dm_t.load_news_popularity_microsoft_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-all': + dm_t.load_news_popularity_palestine_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-facebook': + dm_t.load_news_popularity_obama_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-facebook': + dm_t.load_news_popularity_economy_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-facebook': + dm_t.load_news_popularity_microsoft_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-facebook': + dm_t.load_news_popularity_palestine_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-googleplus': + dm_t.load_news_popularity_obama_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-googleplus': + dm_t.load_news_popularity_economy_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-googleplus': + dm_t.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-googleplus': + dm_t.load_news_popularity_palestine_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-linkedin': + dm_t.load_news_popularity_obama_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-linkedin': + dm_t.load_news_popularity_economy_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-linkedin': + dm_t.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-linkedin': + dm_t.load_news_popularity_palestine_linkedin(n_concept_drifts=n_target_concept_drift) + + return dm_s, dm_t + + +def acdc(source, target, + n_source_concept_drift: int = 5, + n_target_concept_drift: int = 7, + internal_epochs: int = 1, is_gpu=False): + def print_metrics(minibatch, metrics, DMs, DMt, NN, DAEt, DA): + print('Minibatch: %d | Execution time (dataset load/pre-processing + model run): %f' % ( + minibatch, time.time() - metrics['start_execution_time'])) + if minibatch > 1: + print(( + 'Total of samples:' + Fore.BLUE + ' %d + %d = %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d + %d = %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1] - metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1] - metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + else: + print(( + 'Total of samples:' + Fore.BLUE + ' %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + + if minibatch > 1: + string_max = '' + Fore.GREEN + 'Max' + Style.RESET_ALL + string_mean = '' + Fore.YELLOW + 'Mean' + Style.RESET_ALL + string_min = '' + Fore.RED + 'Min' + Style.RESET_ALL + string_now = '' + Fore.BLUE + 'Now' + Style.RESET_ALL + string_accu = '' + Fore.MAGENTA + 'Accu' + Style.RESET_ALL + + print(( + '%s %s %s %s %s Training time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['train_time']), + np.nanmean(metrics['train_time']), + np.min(metrics['train_time']), + metrics['train_time'][-1], + np.sum(metrics['train_time']))) + print(( + '%s %s %s %s %s Testing time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['test_time']), + np.nanmean(metrics['test_time']), + np.min(metrics['test_time']), + metrics['test_time'][-1], + np.sum(metrics['test_time']))) + print(( + '%s %s %s %s CR Source:' + Fore.GREEN + ' %f%% ' + Back.BLUE + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_source']) * 100, + np.nanmean(metrics['classification_rate_source']) * 100, + np.min(metrics['classification_rate_source']) * 100, + metrics['classification_rate_source'][-1] * 100)) + print(( + '%s %s %s %s CR Target:' + Fore.GREEN + ' %f%% ' + Back.RED + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_target']) * 100, + np.nanmean(metrics['classification_rate_target']) * 100, + np.min(metrics['classification_rate_target']) * 100, + metrics['classification_rate_target'][-1] * 100)) + print(( + '%s %s %s %s CR Domain Discriminator:' + Fore.GREEN + ' %f%% ' + Fore.YELLOW + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_domain']) * 100, + np.nanmean(metrics['classification_rate_domain']) * 100, + np.min(metrics['classification_rate_domain']) * 100, + metrics['classification_rate_domain'][-1] * 100)) + print(( + '%s %s %s %s Classification Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_source_loss']), + np.nanmean(metrics['classification_source_loss']), + np.min(metrics['classification_source_loss']), + metrics['classification_source_loss'][-1])) + print(( + '%s %s %s %s Classification Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_target_loss']), + np.nanmean(metrics['classification_target_loss']), + np.min(metrics['classification_target_loss']), + metrics['classification_target_loss'][-1])) + print(( + '%s %s %s %s Domain Discriminator Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['domain_regression_loss']), + np.nanmean(metrics['domain_regression_loss']), + np.min(metrics['domain_regression_loss']), + metrics['domain_regression_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_source_loss']), + np.nanmean(metrics['reconstruction_source_loss']), + np.min(metrics['reconstruction_source_loss']), + metrics['reconstruction_source_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_target_loss']), + np.nanmean(metrics['reconstruction_target_loss']), + np.min(metrics['reconstruction_target_loss']), + metrics['reconstruction_target_loss'][-1])) + print(( + '%s %s %s %s Discriminator Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_discriminator']), + np.nanmean(metrics['node_evolution_discriminator']), + np.min(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1])) + print(( + '%s %s %s %s Denoising Autoencoder Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_feature_extraction']), + np.nanmean(metrics['node_evolution_feature_extraction']), + np.min(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1])) + print(( + '%s %s %s %s Domain Classifier Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_domain_classifier']), + np.nanmean(metrics['node_evolution_domain_classifier']), + np.min(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1])) + print(('Network structure:' + Fore.BLUE + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, NN.layers)))) + print(('Domain Discriminator structure:' + Fore.GREEN + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DA.layers)))) + print(('Denoising Auto Encoder:' + Fore.RED + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DAEt.layers)))) + print(Style.RESET_ALL) + + metrics = {'classification_rate_source': [], + 'classification_rate_target': [], + 'classification_rate_domain': [], + 'number_evaluated_samples_source': [], + 'number_evaluated_samples_target': [], + 'train_time': [], + 'test_time': [], + 'node_evolution_discriminator': [], + 'node_evolution_domain_classifier': [], + 'node_evolution_feature_extraction': [], + 'classification_target_loss': [], + 'classification_source_loss': [], + 'reconstruction_source_loss': [], + 'reconstruction_target_loss': [], + 'domain_regression_loss': [], + 'classification_source_misclassified': [], + 'classification_target_misclassified': [], + 'domain_classification_misclassified': [], + 'start_execution_time': time.time()} + MyDevice().set(is_gpu=is_gpu) + internal_epochs = internal_epochs if internal_epochs >= 1 else 1 + + SOURCE_DOMAIN_LABEL = torch.tensor([[1, 0]], dtype=torch.float, device=MyDevice().get()) + TARGET_DOMAIN_LABEL = torch.tensor([[0, 1]], dtype=torch.float, device=MyDevice().get()) + + dm_s, dm_t = __load_source_target(source, target, n_source_concept_drift, n_target_concept_drift) + + dae = DenoisingAutoEncoder([dm_s.number_features(), + int(dm_s.number_features() * 0.5), + dm_s.number_features()]) + nn = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 1, + dm_s.number_classes()]) + da = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 1, + 2]) + + count_source = 0 + count_target = 0 + count_window = 0 + window_size = 1000 + batch_counter = 0 + + x_source = [] + y_source = [] + x_target = [] + y_target = [] + + while count_source < dm_s.number_samples() \ + or count_target < dm_t.number_samples(): + if count_window < window_size \ + and (count_source < dm_s.number_samples() + or count_target < dm_t.number_samples()): + + source_prob = (dm_s.number_samples() - count_source) / ( + dm_s.number_samples() - count_source + dm_t.number_samples() - count_target + 0.) + + if (np.random.rand() <= source_prob and count_source < dm_s.number_samples()) or ( + count_target >= dm_t.number_samples() and count_source < dm_s.number_samples()): + x, y = dm_s.get_x_y(count_source) + x_source.append(x) + y_source.append(y) + count_source += 1 + count_window += 1 + elif count_target < dm_t.number_samples(): + x, y = dm_t.get_x_y(count_target) + x_target.append(x) + y_target.append(y) + count_target += 1 + count_window += 1 + else: + batch_counter += 1 + metrics['number_evaluated_samples_source'].append(count_source) + metrics['number_evaluated_samples_target'].append(count_target) + + # Workaround to avoid empty stream + if batch_counter > 1: + if (count_source - metrics['number_evaluated_samples_source'][-2] == 0): + x, y = dm_s.get_x_y(np.random.randint(0, count_source)) + x_source.append(x) + y_source.append(y) + if (count_target - metrics['number_evaluated_samples_target'][-2] == 0): + x, y = dm_t.get_x_y(np.random.randint(0, count_target)) + x_target.append(x) + y_target.append(y) + # Workaround to avoid empty stream + + x_source = torch.tensor(x_source, dtype=torch.float, device=MyDevice().get()) + y_source = torch.tensor(y_source, dtype=torch.float, device=MyDevice().get()) + x_target = torch.tensor(x_target, dtype=torch.float, device=MyDevice().get()) + y_target = torch.tensor(y_target, dtype=torch.float, device=MyDevice().get()) + + # TEST + if batch_counter > 1: + metrics['test_time'].append(time.time()) + __test(network=nn, x=x_source, y=y_source, + is_source=True, is_discriminative=True, metrics=metrics) + __test(network=nn, x=x_target, y=y_target, + is_source=False, is_discriminative=True, metrics=metrics) + __test(network=dae, x=x_source, + is_source=True, is_discriminative=False, metrics=metrics) + __test(network=dae, x=x_target, + is_source=False, is_discriminative=False, metrics=metrics) + + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + metrics['domain_regression_loss'].append(float(da.loss_value)) + metrics['classification_rate_domain'].append(da.classification_rate) + metrics['domain_classification_misclassified'].append(da.misclassified) + metrics['test_time'][-1] = time.time() - metrics['test_time'][-1] + + # TRAIN + metrics['train_time'].append(time.time()) + + common_source, x_target = __force_same_size(torch.cat((x_source.T, y_source.T)).T, x_target, shuffle=False) + x_source, y_source = common_source.T.split(x_source.shape[1]) + x_source, y_source = x_source.T, y_source.T + + epoch = 1 + while epoch <= internal_epochs: + for xs, xt, ys in [(xs.view(1, xs.shape[0]), xt.view(1, xt.shape[0]), ys.view(1, ys.shape[0])) + for xs, xt, ys in zip(x_source, x_target, y_source)]: + # Evolving + if epoch == 1: + # Evolving Feature Extraction + for i in range(0, 2): + if i == 0: + __width_evolution(network=dae, x=xs, y=xt) + elif i == 1: + __width_evolution(network=dae, x=xt, y=xs) + if __grow_nodes(dae, da, nn): + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + elif __prune_nodes(dae, da, nn): + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Evolving Source + __width_evolution(network=nn, x=xs, y=ys) + __width_evolution(network=da, x=xs, y=torch.cat(xs.shape[0]*[SOURCE_DOMAIN_LABEL])) + if not __grow_nodes(da, nn): + if __prune_nodes(da): + __prune_nodes(nn) + elif not __grow_nodes(nn): + __prune_nodes(nn) + + # Evolving Target + __width_evolution(network=da, x=xt, y=torch.cat(xt.shape[0]*[TARGET_DOMAIN_LABEL])) + if not __grow_nodes(da, nn): + __prune_nodes(da) + + # Denoising AutoEncoder + __generative(network=dae, x=xs, y=xt) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + __generative(network=dae, x=xt, y=xs) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Domain Discriminator + da.feedforward(x=xs, y=torch.cat(xs.shape[0]*[SOURCE_DOMAIN_LABEL]), train=True).backpropagate() + dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + for weight_no in range(da.number_hidden_layers, 0, -1): + da.update_weight(weight_no=weight_no) + + da.feedforward(x=xt, y=torch.cat(xt.shape[0]*[TARGET_DOMAIN_LABEL]), train=True).backpropagate() + dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + for weight_no in range(da.number_hidden_layers, 0, -1): + da.update_weight(weight_no=weight_no) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Discriminator + __discriminative(network=nn, x=xs, y=ys) + __copy_weights(source=nn, targets=[da, dae], layer_numbers=[1], copy_moment=True) + + epoch += 1 + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + + # Metrics + metrics['train_time'][-1] = time.time() - metrics['train_time'][-1] + metrics['node_evolution_discriminator'].append(nn.layers[-2]) + metrics['node_evolution_domain_classifier'].append(da.layers[-2]) + metrics['node_evolution_feature_extraction'].append(dae.layers[-2]) + print_metrics(batch_counter, metrics, dm_s, dm_t, nn, dae, da) + + # Reset variables for the next batch + x_source = [] + y_source = [] + x_target = [] + y_target = [] + count_window = 0 + + result_string = '%f (T) | %f (S) \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %f' % ( + np.mean(metrics['classification_rate_target']), + np.mean(metrics['classification_rate_source']), + + np.mean(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1], + + np.mean(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1], + + np.mean(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1], + + np.mean(metrics['train_time']), + np.sum(metrics['train_time'])) + + print('CR Rate (Target) | CR Rate (Source) | \t ' \ + 'Feature Extractor Node Evolution (mean | final) \t ' \ + 'Discriminator Node Evolution (mean | final) \t ' \ + 'Domain Classifier Node Evolution (mean | final) \t ' \ + 'Train Time (mean | total)') + print(result_string) + + result = {} + result['string'] = result_string + result['classification_rate_source_batch'] = np.nanmean(metrics['classification_rate_source']) + result['classification_rate_target_batch'] = np.nanmean(metrics['classification_rate_target']) + result['classification_rate_domain_batch'] = np.nanmean(metrics['classification_rate_domain']) + result['classification_rate_source_total'] = 1 - np.sum( + metrics['classification_source_misclassified']) / dm_s.number_samples() + result['classification_rate_target_total'] = 1 - np.sum( + metrics['classification_target_misclassified']) / dm_t.number_samples() + result['classification_rate_domain_total'] = 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()) + result['source_node_mean'] = np.nanmean(metrics['node_evolution_discriminator']) + result['target_node_mean'] = np.nanmean(metrics['node_evolution_feature_extraction']) + result['domain_node_mean'] = np.nanmean(metrics['node_evolution_domain_classifier']) + result['source_node_final'] = metrics['node_evolution_discriminator'][-1] + result['target_node_final'] = metrics['node_evolution_feature_extraction'][-1] + result['domain_node_final'] = metrics['node_evolution_domain_classifier'][-1] + result['train_time_mean'] = np.nanmean(metrics['train_time']) + result['train_time_final'] = np.nansum(metrics['train_time']) + result['test_time_mean'] = np.nanmean(metrics['test_time']) + result['test_time_final'] = np.nansum(metrics['test_time']) + result['classification_source_loss_mean'] = np.nanmean(metrics['classification_source_loss']) + result['classification_target_loss_mean'] = np.nanmean(metrics['classification_target_loss']) + result['reconstruction_source_loss_mean'] = np.nanmean(metrics['reconstruction_source_loss']) + result['reconstruction_target_loss_mean'] = np.nanmean(metrics['reconstruction_target_loss']) + result['domain_adaptation_loss_mean'] = np.nanmean(metrics['domain_regression_loss']) + + print() + print(result) + + __plot_time(metrics['train_time'], + metrics['test_time'], + annotation=False) + __plot_classification_rates(metrics['classification_rate_source'], + metrics['classification_rate_target'], + metrics['classification_rate_domain'], + 1 - np.sum(metrics['classification_source_misclassified']) / dm_s.number_samples(), + 1 - np.sum(metrics['classification_target_misclassified']) / dm_t.number_samples(), + 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()), + class_number=dm_s.number_classes(), + annotation=False) + __plot_node_evolution(metrics['node_evolution_discriminator'], + metrics['node_evolution_domain_classifier'], + metrics['node_evolution_feature_extraction'], + annotation=False) + __plot_losses(metrics['classification_source_loss'], + metrics['classification_target_loss'], + metrics['reconstruction_source_loss'], + metrics['reconstruction_target_loss'], + metrics['domain_regression_loss'], + annotation=False) + __plot_discriminative_network_significance(nn.BIAS, nn.VAR, annotation=False) + __plot_domain_classifier_network_significance(da.BIAS, da.VAR, annotation=False) + __plot_feature_extractor_network_significance(dae.BIAS, dae.VAR, annotation=False) + + return result + + +def generate_csv_from_dataset(dataset_name: str, + n_concept_drift: int = 1, + is_source: bool = True, + is_one_hot_encoding: bool = True, + label_starts_at: int = 0): + import csv, os + from tqdm import tqdm + filename = 'source.csv' if is_source else 'target.csv' + + dm, _ = __load_source_target(source=dataset_name, + target='', + n_source_concept_drift=n_concept_drift) + + try: + os.remove(filename) + except: + pass + f = open(filename, 'x') + f.close() + + print('Exporting dataset "%s" as file "%s"' % (dataset_name, filename)) + with open(filename, 'w', newline='') as csv_file: + writer = csv.writer(csv_file, delimiter=',') + pbar = tqdm(total=dm.number_samples()) + for i in range(dm.number_samples()): + x, y = dm.get_x_y(i) + temp_y = np.zeros(dm.number_classes() + label_starts_at) + temp_y[y.argmax() + label_starts_at] = 1 + y = temp_y + if not is_one_hot_encoding: + y = np.asarray([y.argmax()]) + + writer.writerow(np.concatenate((x, y)).tolist()) + pbar.update(1) + pbar.close() + print('Done!') + + +def generate_arff_from_dataset(source_dataset_name: str, + target_dataset_name: str, + n_source_concept_drifts: int = 1, + n_target_concept_drifts: int = 1, + output_filename : str = None): + import os + import arff + if output_filename is not None: + filename = output_filename + else: + filename = 'source_target_melanie.arff' + + dm_s, dm_t = __load_source_target(source=source_dataset_name, + target=target_dataset_name, + n_source_concept_drift=n_source_concept_drifts, + n_target_concept_drift=n_target_concept_drifts) + + try: + os.remove(filename) + except: + pass + + print(f'Exporting datasets {source_dataset_name} and {target_dataset_name} as file {filename}') + data = [] + + count_source = 0 + count_target = 0 + + while count_source < dm_s.number_samples() or count_target < dm_t.number_samples(): + source_prob = (dm_s.number_samples() - count_source) / ( + dm_s.number_samples() - count_source + dm_t.number_samples() - count_target + 0.) + + sample = [] + if (np.random.rand() <= source_prob and count_source < dm_s.number_samples()) or ( + count_target >= dm_t.number_samples() and count_source < dm_s.number_samples()): + x, y = dm_s.get_x_y(count_source) + count_source += 1 + sample.append(1) + elif count_target < dm_t.number_samples(): + x, y = dm_t.get_x_y(count_target) + count_target += 1 + sample.append(0) + + for i in x.tolist(): + sample.append(i) + sample.append(y.argmax()) + data.append(sample) + + # data = {'data': data, + # 'relation': f'{source_dataset_name}_{target_dataset_name}', + # 'attributes': 'something'} + # with open(filename, 'x') as f: + # arff.dump(data, f) + arff.dump(filename, data, relation=f'{source_dataset_name}_{target_dataset_name}') + print('done') + + + +def pre_download_benchmarks(): + def print_info(dm): + print('Number of samples: %d' % dm.number_samples()) + print('Number of features: %d' % dm.number_features()) + print('Number of classes: %d' % dm.number_classes()) + return DataManipulator() + + dm = DataManipulator() + dm.load_mnist() + dm = print_info(dm) + dm.load_usps() + dm = print_info(dm) + dm.load_cifar10() + dm = print_info(dm) + dm.load_stl10() + dm = print_info(dm) + dm.load_london_bike_sharing() + dm = print_info(dm) + dm.load_washington_bike_sharing() + dm = print_info(dm) + # dm.load_news_popularity_obama_all() + # dm = print_info(dm) + # dm.load_news_popularity_economy_all() + # dm = print_info(dm) + # dm.load_news_popularity_microsoft_all() + # dm = print_info(dm) + # dm.load_news_popularity_palestine_all() + # dm = print_info(dm) + # dm.load_amazon_review_fashion() + # dm = print_info(dm) + dm.load_amazon_review_all_beauty() + dm = print_info(dm) + # dm.load_amazon_review_appliances() + # dm = print_info(dm) + # dm.load_amazon_review_arts_crafts_sewing() + # dm = print_info(dm) + # dm.load_amazon_review_automotive() + # dm = print_info(dm) + # dm.load_amazon_review_cds_vinyl() + # dm = print_info(dm) + # dm.load_amazon_review_cellphones_accessories() + # dm = print_info(dm) + # dm.load_amazon_review_clothing_shoes_jewelry() + # dm = print_info(dm) + # dm.load_amazon_review_digital_music() + # dm = print_info(dm) + # dm.load_amazon_review_electronics() + # dm = print_info(dm) + # dm.load_amazon_review_gift_card() + # dm = print_info(dm) + # dm.load_amazon_review_grocery_gourmet_food() + # dm = print_info(dm) + # dm.load_amazon_review_home_kitchen() + # dm = print_info(dm) + dm.load_amazon_review_industrial_scientific() + dm = print_info(dm) + # dm.load_amazon_review_kindle_store() + # dm = print_info(dm) + dm.load_amazon_review_luxury_beauty() + dm = print_info(dm) + dm.load_amazon_review_magazine_subscription() + dm = print_info(dm) + # dm.load_amazon_review_movies_tv() + # dm = print_info(dm) + # dm.load_amazon_review_musical_instruments() + # dm = print_info(dm) + # dm.load_amazon_review_office_products() + # dm = print_info(dm) + # dm.load_amazon_review_patio_lawn_garden() + # dm = print_info(dm) + # dm.load_amazon_review_pet_supplies() + # dm = print_info(dm) + # dm.load_amazon_review_prime_pantry() + # dm = print_info(dm) + # dm.load_amazon_review_software() + # dm = print_info(dm) + # dm.load_amazon_review_sports_outdoors() + # dm = print_info(dm) + # dm.load_amazon_review_tools_home_improvements() + # dm = print_info(dm) + # dm.load_amazon_review_toys_games() + # dm = print_info(dm) + # dm.load_amazon_review_video_games() + # dm = print_info(dm) + dm.load_amazon_review_books() + print_info(dm) + + +print('ACDC: Autonomous Cross Domain Conversion') +print('') +print('Available methods:') +print('************************************************************') +print('def acdc(%s,%s,%s,%s,%s,%s\n\t)' % ( + '\n\tsource: str', + '\n\ttarget: str', + '\n\tn_source_concept_drift: int = 5', + '\n\tn_target_concept_drift: int = 7', + '\n\tinternal_epochs: int = 1', + '\n\tis_gpu: bool = False')) +print(' ') +print('source: String representing the source benchmark') +print('target: String representing the target benchmark') +print('n_source_concept_drift: Number of concept drifts at the source stream') +print('n_target_concept_drift: Number of concept drifts at the target stream') +print('internal_epochs: Number of internal epochs per minibatch') +print( + 'is_gpu: False to run on CPU. True to run on GPU. The paper were generated on CPU. The code is not optimized for GPU. Only runs if you have a huge ammount of GRAM') +print(' ') +print('Returns a dictionary with all results for the run') +print('************************************************************') +print(' ') +print('************************************************************') +print('pre_download_benchmarks()') +print('************************************************************') +print(' ') +print('************************************************************') +print('generate_csv_from_dataset(%s,%s,%s,%s,%s\n\t)' % ( + '\n\tdataset_name: str', + '\n\tn_concept_drift: int = 1', + '\n\tis_source: bool = True', + '\n\tis_one_hot_enconding: bool = True', + '\n\tlabel_starts_at: int = 0')) +print(' ') +print('dataset_name: String representing which benchmark should be converted to CSV') +print('n_concept_drift: Number of concept drifts applied into the CSV dataset') +print('is_source: True to generate a file "source.csv", False to generate a file "target.csv"') +print('is_one_hot_enconding: If True, label will be the n last columns in an one-hot-encoding format, if False, label will be the last column as a number') +print('label_starts_at: The smallest label. Usually it is 0, but some source_code, specially made in Matlab, can start from 1') +print('************************************************************') +print(' ') +print('List of possible strings for datasets:') +print(' ') +print('mnist-28: MNIST resized to 28x28, which is original size ~ 784 features') +print('mnist-16: MNIST resized to 16x16 ~ 256 features') +print('usps-28: USPS resized to 28x28 ~ 784 features') +print('usps-16: USPS resized to 16x16, which is original size ~ 256 features') +print('cifar10: CIFAR10 extracted from Resnet ~ 512 features') +print('stl10: STL10 extracted from Resnet ~512 features') +print('amazon-review-all-beauty: Amazon Review | All Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-books: Amazon Review | Books | Word2Vec applied ~ 300 features') +print('amazon-review-industrial-scientific: Amazon Review | Industrial and Scientific | Word2Vec applied ~ 300 features') +print('amazon-review-luxury-beauty: Amazon Review | Luxury Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-magazine-subscription: Amazon Review | Magazine Subscription | Word2Vec applied ~ 300 features') +print('london-bike: London bike sharing dataset ~ 8 features') +print('washington-bike: Washington D.C. bike sharing dataset ~ 8 features') + +# pre_download_benchmarks() +# acdc('mnist-28', 'usps-28', 5, 7, 1, False) +# acdc('london-bike', 'washington-bike', 5, 7, 1, False) +# acdc('washington-bike', 'london-bike', 5, 7, 1, False) +# generate_arff_from_dataset('mnist-28', 'usps-28', 5, 7, 'mnist-28_usps-28_1.arff') +# generate_arff_from_dataset('mnist-28', 'usps-28', 5, 7, 'mnist-28_usps-28_2.arff') +# generate_arff_from_dataset('mnist-28', 'usps-28', 5, 7, 'mnist-28_usps-28_3.arff') +# generate_arff_from_dataset('mnist-28', 'usps-28', 5, 7, 'mnist-28_usps-28_4.arff') +# generate_arff_from_dataset('mnist-28', 'usps-28', 5, 7, 'mnist-28_usps-28_5.arff') +# +# generate_arff_from_dataset('usps-16', 'mnist-16', 5, 7, 'usps-16_mnist-16_1.arff') +# generate_arff_from_dataset('usps-16', 'mnist-16', 5, 7, 'usps-16_mnist-16_2.arff') +# generate_arff_from_dataset('usps-16', 'mnist-16', 5, 7, 'usps-16_mnist-16_3.arff') +# generate_arff_from_dataset('usps-16', 'mnist-16', 5, 7, 'usps-16_mnist-16_4.arff') +# generate_arff_from_dataset('usps-16', 'mnist-16', 5, 7, 'usps-16_mnist-16_5.arff') +# +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-books', 5, 7, 'beauty_books_1.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-books', 5, 7, 'beauty_books_2.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-books', 5, 7, 'beauty_books_3.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-books', 5, 7, 'beauty_books_4.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-books', 5, 7, 'beauty_books_5.arff') +# +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-industrial-scientific', 5, 7, 'beauty_industrial_1.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-industrial-scientific', 5, 7, 'beauty_industrial_2.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-industrial-scientific', 5, 7, 'beauty_industrial_3.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-industrial-scientific', 5, 7, 'beauty_industrial_4.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-industrial-scientific', 5, 7, 'beauty_industrial_5.arff') +# +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-luxury-beauty', 5, 7, 'beauty_luxury_1.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-luxury-beauty', 5, 7, 'beauty_luxury_2.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-luxury-beauty', 5, 7, 'beauty_luxury_3.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-luxury-beauty', 5, 7, 'beauty_luxury_4.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-luxury-beauty', 5, 7, 'beauty_luxury_5.arff') +# +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-magazine-subscription', 5, 7, 'beauty_magazine_1.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-magazine-subscription', 5, 7, 'beauty_magazine_2.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-magazine-subscription', 5, 7, 'beauty_magazine_3.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-magazine-subscription', 5, 7, 'beauty_magazine_4.arff') +# generate_arff_from_dataset('amazon-review-all-beauty', 'amazon-review-magazine-subscription', 5, 7, 'beauty_magazine_5.arff') +# +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-all-beauty', 5, 7, 'books_beauty_1.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-all-beauty', 5, 7, 'books_beauty_2.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-all-beauty', 5, 7, 'books_beauty_3.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-all-beauty', 5, 7, 'books_beauty_4.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-all-beauty', 5, 7, 'books_beauty_5.arff') +# +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-industrial-scientific', 5, 7, 'books_industrial_1.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-industrial-scientific', 5, 7, 'books_industrial_2.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-industrial-scientific', 5, 7, 'books_industrial_3.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-industrial-scientific', 5, 7, 'books_industrial_4.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-industrial-scientific', 5, 7, 'books_industrial_5.arff') +# +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-luxury-beauty', 5, 7, 'books_luxury_1.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-luxury-beauty', 5, 7, 'books_luxury_2.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-luxury-beauty', 5, 7, 'books_luxury_3.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-luxury-beauty', 5, 7, 'books_luxury_4.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-luxury-beauty', 5, 7, 'books_luxury_5.arff') +# +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-magazine-subscription', 5, 7, 'books_magazine_1.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-magazine-subscription', 5, 7, 'books_magazine_2.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-magazine-subscription', 5, 7, 'books_magazine_3.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-magazine-subscription', 5, 7, 'books_magazine_4.arff') +# generate_arff_from_dataset('amazon-review-books', 'amazon-review-magazine-subscription', 5, 7, 'books_magazine_5.arff') +# +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-all-beauty', 5, 7, 'industrial_beauty_1.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-all-beauty', 5, 7, 'industrial_beauty_2.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-all-beauty', 5, 7, 'industrial_beauty_3.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-all-beauty', 5, 7, 'industrial_beauty_4.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-all-beauty', 5, 7, 'industrial_beauty_5.arff') +# +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-books', 5, 7, 'industrial_books_1.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-books', 5, 7, 'industrial_books_2.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-books', 5, 7, 'industrial_books_3.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-books', 5, 7, 'industrial_books_4.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-books', 5, 7, 'industrial_books_5.arff') +# +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-luxury-beauty', 5, 7, 'industrial_luxury_1.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-luxury-beauty', 5, 7, 'industrial_luxury_2.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-luxury-beauty', 5, 7, 'industrial_luxury_3.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-luxury-beauty', 5, 7, 'industrial_luxury_4.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-luxury-beauty', 5, 7, 'industrial_luxury_5.arff') +# +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-magazine-subscription', 5, 7, 'industrial_magazine_1.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-magazine-subscription', 5, 7, 'industrial_magazine_2.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-magazine-subscription', 5, 7, 'industrial_magazine_3.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-magazine-subscription', 5, 7, 'industrial_magazine_4.arff') +# generate_arff_from_dataset('amazon-review-industrial-scientific', 'amazon-review-magazine-subscription', 5, 7, 'industrial_magazine_5.arff') +# +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-all-beauty', 5, 7, 'luxury_beauty_1.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-all-beauty', 5, 7, 'luxury_beauty_2.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-all-beauty', 5, 7, 'luxury_beauty_3.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-all-beauty', 5, 7, 'luxury_beauty_4.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-all-beauty', 5, 7, 'luxury_beauty_5.arff') +# +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-books', 5, 7, 'luxury_books_1.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-books', 5, 7, 'luxury_books_2.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-books', 5, 7, 'luxury_books_3.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-books', 5, 7, 'luxury_books_4.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-books', 5, 7, 'luxury_books_5.arff') +# +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-industrial-scientific', 5, 7, 'luxury_industrial_1.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-industrial-scientific', 5, 7, 'luxury_industrial_2.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-industrial-scientific', 5, 7, 'luxury_industrial_3.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-industrial-scientific', 5, 7, 'luxury_industrial_4.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-industrial-scientific', 5, 7, 'luxury_industrial_5.arff') +# +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-magazine-subscription', 5, 7, 'luxury_magazine_1.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-magazine-subscription', 5, 7, 'luxury_magazine_2.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-magazine-subscription', 5, 7, 'luxury_magazine_3.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-magazine-subscription', 5, 7, 'luxury_magazine_4.arff') +# generate_arff_from_dataset('amazon-review-luxury-beauty', 'amazon-review-magazine-subscription', 5, 7, 'luxury_magazine_5.arff') +# +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-all-beauty', 5, 7, 'magazine_beauty_1.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-all-beauty', 5, 7, 'magazine_beauty_2.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-all-beauty', 5, 7, 'magazine_beauty_3.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-all-beauty', 5, 7, 'magazine_beauty_4.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-all-beauty', 5, 7, 'magazine_beauty_5.arff') +# +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-books', 5, 7, 'magazine_books_1.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-books', 5, 7, 'magazine_books_2.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-books', 5, 7, 'magazine_books_3.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-books', 5, 7, 'magazine_books_4.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-books', 5, 7, 'magazine_books_5.arff') +# +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-industrial-scientific', 5, 7, 'magazine_industrial_1.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-industrial-scientific', 5, 7, 'magazine_industrial_2.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-industrial-scientific', 5, 7, 'magazine_industrial_3.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-industrial-scientific', 5, 7, 'magazine_industrial_4.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-industrial-scientific', 5, 7, 'magazine_industrial_5.arff') +# +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-luxury-beauty', 5, 7, 'magazine_luxury_1.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-luxury-beauty', 5, 7, 'magazine_luxury_2.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-luxury-beauty', 5, 7, 'magazine_luxury_3.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-luxury-beauty', 5, 7, 'magazine_luxury_4.arff') +# generate_arff_from_dataset('amazon-review-magazine-subscription', 'amazon-review-luxury-beauty', 5, 7, 'magazine_luxury_5.arff') +# +# generate_arff_from_dataset('cifar10', 'stl10', 5, 7, 'cifar_stl_1.arff') +# generate_arff_from_dataset('cifar10', 'stl10', 5, 7, 'cifar_stl_2.arff') +# generate_arff_from_dataset('cifar10', 'stl10', 5, 7, 'cifar_stl_3.arff') +# generate_arff_from_dataset('cifar10', 'stl10', 5, 7, 'cifar_stl_4.arff') +# generate_arff_from_dataset('cifar10', 'stl10', 5, 7, 'cifar_stl_5.arff') +# +# generate_arff_from_dataset('stl10', 'cifar10', 5, 7, 'stl_cifar_1.arff') +# generate_arff_from_dataset('stl10', 'cifar10', 5, 7, 'stl_cifar_2.arff') +# generate_arff_from_dataset('stl10', 'cifar10', 5, 7, 'stl_cifar_3.arff') +# generate_arff_from_dataset('stl10', 'cifar10', 5, 7, 'stl_cifar_4.arff') +# generate_arff_from_dataset('stl10', 'cifar10', 5, 7, 'stl_cifar_5.arff') +# +# generate_arff_from_dataset('london-bike', 'washington-bike', 5, 7, 'london_washington_1.arff') +# generate_arff_from_dataset('london-bike', 'washington-bike', 5, 7, 'london_washington_2.arff') +# generate_arff_from_dataset('london-bike', 'washington-bike', 5, 7, 'london_washington_3.arff') +# generate_arff_from_dataset('london-bike', 'washington-bike', 5, 7, 'london_washington_4.arff') +# generate_arff_from_dataset('london-bike', 'washington-bike', 5, 7, 'london_washington_5.arff') +# +# generate_arff_from_dataset('washington-bike', 'london-bike', 5, 7, 'washington_london_1.arff') +# generate_arff_from_dataset('washington-bike', 'london-bike', 5, 7, 'washington_london_2.arff') +# generate_arff_from_dataset('washington-bike', 'london-bike', 5, 7, 'washington_london_3.arff') +# generate_arff_from_dataset('washington-bike', 'london-bike', 5, 7, 'washington_london_4.arff') +# generate_arff_from_dataset('washington-bike', 'london-bike', 5, 7, 'washington_london_5.arff') \ No newline at end of file diff --git a/ACDCDataManipulator.py b/ACDCDataManipulator.py new file mode 100644 index 0000000..4acceea --- /dev/null +++ b/ACDCDataManipulator.py @@ -0,0 +1,1236 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +import numpy as np +import pandas +import pandas as pd +import torch +import torchvision +import ssl +import gzip +import json +from tqdm import tqdm +from torchvision.datasets.utils import download_url +from MySingletons import MyWord2Vec +from nltk.tokenize import TweetTokenizer +import os +import tarfile +from lxml import etree + + +class MyCustomBikeSharingDataLoader(torch.utils.data.Dataset): + path = 'data/BikeSharing/' + df = None + + @property + def datasets(self): + return self.df + + def __init__(self, london_or_washignton : str = 'london'): + if london_or_washignton.lower() == 'london': + self.base_files = ['london_merged.csv'] + self.base_url = 'https://www.kaggle.com/marklvl/bike-sharing-dataset' + elif london_or_washignton.lower() == 'washington': + self.base_files = ['hour.csv', 'day.csv'] + self.base_url = 'https://www.kaggle.com/hmavrodiev/london-bike-sharing-dataset' + + if not os.path.exists(self.path): + os.makedirs(self.path) + + for file in self.base_files: + if os.path.isfile(f'{self.path}{file}'): + if self.df is None: + self.df = pandas.read_csv(f'{self.path}{file}') + else: + df = pandas.read_csv(f'{self.path}{file}') + self.df = pd.concat([self.df, df], sort=True) + else: + print(f'Please, manually download file {file} from url {self.base_url} and put it at path {self.path}') + exit() + + if london_or_washignton.lower() == 'london': + self.df['demand'] = (self.df['cnt'] <= self.df['cnt'].median()).astype(int) + self.df.drop(columns=['timestamp', 'cnt'], inplace=True) + self.df = self.df[ + ['t1', 't2', 'hum', 'wind_speed', 'weather_code', 'is_holiday', 'is_weekend', 'season', 'demand']] + elif london_or_washignton.lower() == 'washington': + self.df['demand'] = (self.df['cnt'] <= self.df['cnt'].median()).astype(int) + self.df.drop(columns=['casual', 'dteday', 'holiday', 'hr', 'instant', 'mnth', 'registered','yr', 'cnt'], + inplace=True) + self.df.rename( + columns={'temp': 't1', 'atemp': 't2', 'windspeed': 'wind_speed', 'weathersit': 'weather_code', + 'workingday': 'is_holiday', 'weekday': 'is_weekend'}, inplace=True) + self.df['is_weekend'] = ((self.df['is_weekend'] == 0) | (self.df['is_weekend'] == 6)).astype(int) + self.df['is_holiday'] = (self.df['is_holiday'] == 0).astype(int) + self.df['season'] = self.df['season'] - 1 + self.df = self.df[ + ['t1', 't2', 'hum', 'wind_speed', 'weather_code', 'is_holiday', 'is_weekend', 'season', 'demand']] + + self.normalize() + + def __len__(self): + return len(self.df) + + def __getitem__(self, idx: int): + item = self.df.iloc[idx] + + if idx < len(self): + try: + return {'x': item.drop('demand').to_numpy(), 'y': item['demand']} + except: + return self.__getitem__(idx - 1) + else: + return None + + def normalize(self, a: int = 0, b: int = 1): + assert a < b + for feature_name in self.df.drop('demand', axis=1).columns: + max_value = self.df[feature_name].max() + min_value = self.df[feature_name].min() + self.df[feature_name] = (b - a) * (self.df[feature_name] - min_value) / (max_value - min_value) + a + + +class MyCustomAmazonReviewDataLoader(torch.utils.data.Dataset): + base_url = 'http://deepyeti.ucsd.edu/jianmo/amazon/categoryFilesSmall/' + path = 'data/AmazonReview/' + df = None + + @property + def datasets(self): + return self.df + + def __init__(self, filename): + torchvision.datasets.utils.download_url(self.base_url + filename, self.path) + self.df = self.get_df(self.path + filename) + self.normalize() + + def __len__(self): + return len(self.df) + + def __getitem__(self, idx: int): + item = self.df.iloc[idx] + + if idx < len(self): + try: + return {'x': item.drop('overall').to_numpy(), 'y': item['overall']} + except: + return self.__getitem__(idx - 1) + else: + return None + + def normalize(self, a: int = 0, b: int = 1): + assert a < b + for feature_name in self.df.drop('overall', axis=1).columns: + max_value = self.df[feature_name].max() + min_value = self.df[feature_name].min() + self.df[feature_name] = (b - a) * (self.df[feature_name] - min_value) / (max_value - min_value) + a + + @staticmethod + def parse(path): + g = gzip.open(path, 'r') + for l in g: + yield json.loads(l) + + def get_df(self, path, high_bound=500000): + try: + print('Trying to load processed file %s.h5 from disc...' % path) + df = pd.read_hdf(path_or_buf=os.path.join(os.path.dirname(__file__), path + '.h5'), + key='df') + except: + print('Processed file does not exists') + print('Reading dataset into memory and applying Word2Vec...') + print('\nWe will save a maximum of half million samples because of memory constraints') + print('and because that is more than sufficient samples to test transfer learning models\n') + + i = 0 + df = {} + + if path == 'data/AmazonReview/AMAZON_FASHION_5.json.gz': + total = 3176 + elif path == 'data/AmazonReview/All_Beauty_5.json.gz': + total = 5269 + elif path == 'data/AmazonReview/Appliances_5.json.gz': + total = 2277 + elif path == 'data/AmazonReview/Arts_Crafts_and_Sewing_5.json.gz': + total = 494485 + elif path == 'data/AmazonReview/Automotive_5.json.gz': + total = 1711519 + elif path == 'data/AmazonReview/Books_5.json.gz': + total = 27164983 + elif path == 'data/AmazonReview/CDs_and_Vinyl_5.json.gz': + total = 1443755 + elif path == 'data/AmazonReview/Cell_Phones_and_Accessories_5.json.gz': + total = 1128437 + elif path == 'data/AmazonReview/Clothing_Shoes_and_Jewelry_5.json.gz': + total = 11285464 + elif path == 'data/AmazonReview/Digital_Music_5.json.gz': + total = 169781 + elif path == 'data/AmazonReview/Electronics_5.json.gz': + total = 6739590 + elif path == 'data/AmazonReview/Gift_Cards_5.json.gz': + total = 2972 + elif path == 'data/AmazonReview/Grocery_and_Gourmet_Food_5.json.gz': + total = 1143860 + elif path == 'data/AmazonReview/Home_and_Kitchen_5.json.gz': + total = 6898955 + elif path == 'data/AmazonReview/Industrial_and_Scientific_5.json.gz': + total = 77071 + elif path == 'data/AmazonReview/Kindle_Store_5.json.gz': + total = 2222983 + elif path == 'data/AmazonReview/Luxury_Beauty_5.json.gz': + total = 34278 + elif path == 'data/AmazonReview/Magazine_Subscriptions_5.json.gz': + total = 2375 + elif path == 'data/AmazonReview/Movies_and_TV_5.json.gz': + total = 3410019 + elif path == 'data/AmazonReview/Musical_Instruments_5.json.gz': + total = 231392 + elif path == 'data/AmazonReview/Office_Products_5.json.gz': + total = 800357 + elif path == 'data/AmazonReview/Patio_Lawn_and_Garden_5.json.gz': + total = 798415 + elif path == 'data/AmazonReview/Pet_Supplies_5.json.gz': + total = 2098325 + elif path == 'data/AmazonReview/Prime_Pantry_5.json.gz': + total = 137788 + elif path == 'data/AmazonReview/Software_5.json.gz': + total = 12805 + elif path == 'data/AmazonReview/Sports_and_Outdoors_5.json.gz': + total = 2839940 + elif path == 'data/AmazonReview/Tools_and_Home_Improvement_5.json.gz': + total = 2070831 + elif path == 'data/AmazonReview/Toys_and_Games_5.json.gz': + total = 1828971 + elif path == 'data/AmazonReview/Video_Games_5.json.gz': + total = 497577 + + MyWord2Vec().get() + pbar = tqdm(unit=' samples', total=np.min([total, high_bound])) + + tokenizer = TweetTokenizer() + for d in self.parse(path): + if i >= 500000: + break + try: + reviewText = d['reviewText'] + try: + word_count = 0 + vector = np.zeros(MyWord2Vec().get().vector_size) + for word in tokenizer.tokenize(reviewText): + try: + vector += MyWord2Vec().get()[word] + word_count += 1 + except: + pass + if word_count > 1: + try: + overall = d['overall'] + df[i] = {'overall': overall, 'reviewText': vector / word_count} + pbar.update(1) + i += 1 + except: + pass + except: + pass + except: + pass + pbar.close() + + print('Saving processed tokenized dataset in disc for future usage...') + df = pd.DataFrame.from_dict(df, orient='index') + df = pd.DataFrame([{x: y for x, y in enumerate(item)} + for item in df['reviewText'].values.tolist()]).assign(overall=df.overall.tolist()) + + df.to_hdf(path_or_buf=os.path.join(os.path.dirname(__file__), path + '.h5'), + key='df', + mode='w', + format='table', + complevel=9, + complib='bzip2') + df = pd.read_hdf(path_or_buf=os.path.join(os.path.dirname(__file__), path + '.h5'), + key='df') + return df + + +class MyCustomAmazonReviewNIPSDataLoader(torch.utils.data.Dataset): + dataset_url = 'https://www.cs.jhu.edu/~mdredze/datasets/sentiment/processed_stars.tar.gz' + path = 'data/AmazonReviewNIPS/' + compressed_filename = path + 'processed_stars.tar.gz' + + books_file_path = path + 'processed_stars/books/all_balanced.review' + dvd_file_path = path + 'processed_stars/dvd/all_balanced.review' + electronics_file_path = path + 'processed_stars/electronics/all_balanced.review' + kitchen_file_path = path + 'processed_stars/kitchen/all_balanced.review' + + @property + def datasets(self): + return self.df + + def __init__(self, folder): + torchvision.datasets.utils.download_url(self.dataset_url, self.path) + tar = tarfile.open(self.compressed_filename) + tar.extractall(self.path) + tar.close() + + if folder == 'books': + filename_path = self.books_file_path + elif folder == 'dvd': + filename_path = self.dvd_file_path + elif folder == 'electronics': + filename_path = self.electronics_file_path + elif folder == 'kitchen': + filename_path = self.kitchen_file_path + + self.df = self.get_df(filename_path) + + def __len__(self): + return len(self.df) + + def __getitem__(self, idx: int): + item = self.df.iloc[idx] + + if idx < len(self): + return {'x': item['x'], 'y': item['targets']} + else: + return None + + def get_df(self, path): + try: + print('Trying to load processed file %s.h5 from disc...' % path) + df = pd.read_hdf(path_or_buf=path + '.h5', + key='df') + except: + print('Processed file does not exists') + print('Reading dataset into memory and applying Word2Vec...') + + if path == self.books_file_path: + total = 5501 + elif path == self.dvd_file_path: + total = 5518 + elif path == self.electronics_file_path: + total = 5901 + elif path == self.kitchen_file_path: + total = 5149 + + line_count = 0 + df = {} + MyWord2Vec().get() + pbar = tqdm(unit=' samples', total=total) + for line in open(path, 'rb'): + word_count = 0 + vector = np.zeros(MyWord2Vec().get().vector_size) + for word in line.decode('utf-8').split(' '): + x, y = word.split(':') + if x != '#label#': + for j in range(int(y)): + for xx in x.split('_'): + try: + vector += MyWord2Vec().get()[xx] + word_count += 1 + except: + pass + else: + try: + df[line_count] = {'x': vector / word_count, 'targets': int(float(y.replace('\n', '')))} + except: + df[line_count] = {'x': vector / word_count, 'targets': int(float(y))} + line_count += 1 + pbar.update(1) + pbar.close() + + print('Saving processed tokenized dataset in disc for future usage...') + df = pd.DataFrame.from_dict(df, orient='index') + df.to_hdf(path_or_buf=path + '.h5', + key='df', + mode='w', + format='table', + complevel=9, + complib='bzip2') + df = pd.read_hdf(path_or_buf=path + '.h5', + key='df') + + return df + + +class MyCustomNewsPopularityDataLoader(torch.utils.data.Dataset): + dataset_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00432/Data/News_Final.csv' + path = 'data/UCIMultiSourceNews/' + filename = 'News_Final.csv' + + @property + def datasets(self): + return self.df + + def __init__(self, topic: str, social_feed: str): + torchvision.datasets.utils.download_url(self.dataset_url, self.path) + path = (self.path + topic + '_' + social_feed + '.h5').lower() + + try: + print('Trying to load processed file %s from disc...' % path) + self.df = pd.read_hdf(path_or_buf=path, + key='df') + except: + print('Processed file does not exists') + print('Reading dataset into memory and applying Word2Vec...') + + self.df = {} + df = pd.read_csv(self.path + self.filename) + + if social_feed == 'all': + df = df.loc[df['Topic'] == topic][['Title', 'Headline', 'Facebook', 'GooglePlus', 'LinkedIn']] + else: + df = df.loc[df['Topic'] == topic][['Title', 'Headline', social_feed]] + df['targets'] = df[df.columns[2:]].sum(axis=1) + df = df[['Title', 'Headline', 'targets']] + df.loc[df['targets'] <= 10, 'targets'] = 0 + df.loc[df['targets'] > 10, 'targets'] = 1 + df['fullText'] = df['Title'].astype(str) + ' ' + df['Headline'].astype(str) + + tokenizer = TweetTokenizer() + MyWord2Vec().get() + + sample_count = 0 + pbar = tqdm(unit=' samples', total=len(df)) + for _, row in df.iterrows(): + word_counter = 0 + vector = np.zeros(MyWord2Vec().get().vector_size) + try: + for word in tokenizer.tokenize(row['fullText']): + vector += MyWord2Vec().get()[word] + word_counter += 1 + except: + pass + if word_counter > 0: + self.df[sample_count] = {'x': vector / word_counter, 'targets': int(row['targets'])} + sample_count += 1 + pbar.update(1) + pbar.close() + + print('Saving processed tokenized dataset in disc for future usage...') + self.df = pd.DataFrame.from_dict(self.df, orient='index') + self.df = pd.DataFrame([{x: y for x, y in enumerate(item)} for item in self.df['x'].values.tolist()], + index=self.df.index).assign(targets=self.df['targets'].tolist()) + self.df.to_hdf(path_or_buf=path, + key='df', + mode='w', + format='table', + complevel=9, + complib='bzip2') + self.df = pd.read_hdf(path_or_buf=path, + key='df') + + def __len__(self): + return len(self.df) + + def __getitem__(self, idx: int): + item = self.df.iloc[idx] + + if idx < len(self): + return {'x': item.drop('targets').to_numpy(), 'y': int(item['targets'])} + else: + return None + + +class MyCustomAmazonReviewACLDataLoader(torch.utils.data.Dataset): + dataset_url = 'https://www.cs.jhu.edu/~mdredze/datasets/sentiment/unprocessed.tar.gz' + path = 'data/AmazonReviewACL/' + compressed_filename = path + 'unprocessed.tar.gz' + + apparel_file_path = path + 'sorted_data/apparel/all.review' + automotive_file_path = path + 'sorted_data/automotive/all.review' + baby_file_path = path + 'sorted_data/baby/all.review' + beauty_file_path = path + 'sorted_data/beauty/all.review' + books_file_path = path + 'sorted_data/books/all.review' + camera_photo_file_path = path + 'sorted_data/camera_&_photo/all.review' + cell_phones_service_file_path = path + 'sorted_data/cell_phones_&_service/all.review' + computer_video_games_file_path = path + 'sorted_data/computer_&_video_games/all.review' + dvd_file_path = path + 'sorted_data/dvd/all.review' + electronics_file_path = path + 'sorted_data/electronics/all.review' + gourmet_food_file_path = path + 'sorted_data/gourmet_food/all.review' + grocery_file_path = path + 'sorted_data/grocery/all.review' + health_personal_care_file_path = path + 'sorted_data/health_&_personal_care/all.review' + jewelry_watches_file_path = path + 'sorted_data/jewelry_&_watches/all.review' + kitchen_housewares_file_path = path + 'sorted_data/kitchen_&_housewares/all.review' + magazines_file_path = path + 'sorted_data/magazines/all.review' + music_file_path = path + 'sorted_data/music/all.review' + musical_instruments_file_path = path + 'sorted_data/musical_instruments/all.review' + office_products_file_path = path + 'sorted_data/office_products/all.review' + outdoor_living_file_path = path + 'sorted_data/outdoor_living/all.review' + software_file_path = path + 'sorted_data/software/all.review' + sports_outdoors_file_path = path + 'sorted_data/sports_&_outdoors/all.review' + tools_hardware_file_path = path + 'sorted_data/tools_&_hardware/all.review' + toys_games_file_path = path + 'sorted_data/toys_&_games/all.review' + video_file_path = path + 'sorted_data/video/all.review' + + @property + def datasets(self): + return self.df + + def __init__(self, folder): + torchvision.datasets.utils.download_url(self.dataset_url, self.path) + tar = tarfile.open(self.compressed_filename) + tar.extractall(self.path) + tar.close() + + if folder == 'apparel': filename_path = self.apparel_file_path + if folder == 'automotive': filename_path = self.automotive_file_path + if folder == 'baby': filename_path = self.baby_file_path + if folder == 'beauty': filename_path = self.beauty_file_path + if folder == 'books': filename_path = self.books_file_path + if folder == 'camera_photo': filename_path = self.camera_photo_file_path + if folder == 'cell_phones_service': filename_path = self.cell_phones_service_file_path + if folder == 'computer_video_games': filename_path = self.computer_video_games_file_path + if folder == 'dvd': filename_path = self.dvd_file_path + if folder == 'electronics': filename_path = self.electronics_file_path + if folder == 'gourmet_food': filename_path = self.gourmet_food_file_path + if folder == 'grocery': filename_path = self.grocery_file_path + if folder == 'health_personal_care': filename_path = self.health_personal_care_file_path + if folder == 'jewelry_watches': filename_path = self.jewelry_watches_file_path + if folder == 'kitchen_housewares': filename_path = self.kitchen_housewares_file_path + if folder == 'magazines': filename_path = self.magazines_file_path + if folder == 'music': filename_path = self.music_file_path + if folder == 'musical_instruments': filename_path = self.musical_instruments_file_path + if folder == 'office_products': filename_path = self.office_products_file_path + if folder == 'outdoor_living': filename_path = self.outdoor_living_file_path + if folder == 'software': filename_path = self.software_file_path + if folder == 'sports_outdoors': filename_path = self.sports_outdoors_file_path + if folder == 'tools_hardware': filename_path = self.tools_hardware_file_path + if folder == 'toys_games': filename_path = self.toys_games_file_path + if folder == 'video': filename_path = self.video_file_path + + self.df = self.get_df(filename_path) + + def __len__(self): + return len(self.df) + + def __getitem__(self, idx: int): + item = self.df.iloc[idx] + + if idx < len(self): + return {'x': item['x'], 'y': item['targets']} + else: + return None + + def get_df(self, path): + try: + os.remove(path + '.xml') + except: + pass + + try: + print('Trying to load processed file %s.h5 from disc...' % path) + df = pd.read_hdf(path_or_buf=path + '.h5', + key='df') + except: + print('Processed file does not exists') + print('Reading dataset into memory and applying Word2Vec...') + + with open(path + '.xml', 'w', encoding='utf-8-sig') as f: + f.write('') + for line in open(path, 'rb'): + f.write(line.decode(encoding='utf-8-sig', errors='ignore')) + f.write('') + + parser = etree.XMLParser(recover=True) + with open(path + '.xml', 'r', encoding='utf-8-sig') as f: + contents = f.read() + tree = etree.fromstring(contents, parser=parser) + + df = {} + tokenizer = TweetTokenizer() + MyWord2Vec().get() + line_count = 0 + pbar = tqdm(unit=' samples', total=len(tree.findall('review')) - 1) + for review in tree.findall('review'): + word_count = 0 + vector = np.zeros(MyWord2Vec().get().vector_size) + try: + for word in tokenizer.tokenize(review.find('review_text').text): + try: + vector += MyWord2Vec().get()[word] + word_count += 1 + except: + pass + if word_count > 0: + try: + score = int(float(review.find('rating').text.replace('\n', ''))) + if type(score) is int: + df[line_count] = {'x': vector / word_count, 'targets': score} + line_count += 1 + pbar.update(1) + except: + pass + except: + pass + pbar.close() + + print('Saving processed tokenized dataset in disc for future usage...') + df = pd.DataFrame.from_dict(df, orient='index') + df.to_hdf(path_or_buf=path + '.h5', + key='df', + mode='w', + format='table', + complevel=9, + complib='bzip2') + df = pd.read_hdf(path_or_buf=path + '.h5', + key='df') + + try: + os.remove(path + '.xml') + except: + pass + + return df + + +class MyCustomMNISTUSPSDataLoader(torch.utils.data.Dataset): + datasets = [] + transforms = None + + def __init__(self, datasets, transforms: torchvision.transforms = None): + self.datasets = datasets + self.transforms = transforms + + def __len__(self): + return sum(len(d) for d in self.datasets) + + def __getitem__(self, idx: int): + if torch.is_tensor(idx): + idx = idx.tolist() + + offset = 0 + dataset_idx = 0 + sample = None + if idx < len(self): + while sample is None: + if idx < (offset + len(self.datasets[dataset_idx])): + sample = self.datasets[dataset_idx][idx - offset] + else: + offset += len(self.datasets[dataset_idx]) + dataset_idx += 1 + else: + return None + + x = sample[0] + for transform in self.transforms: + x = transform(x) + return {'x': x, 'y': sample[1]} + +class MyCustomCIFAR10STL10DataLoader(torch.utils.data.Dataset): + datasets = [] + transforms = None + resnet = None + samples = None + + def __init__(self, datasets, transforms: torchvision.transforms = None): + self.datasets = [] + self.resnet = torchvision.models.resnet18(pretrained=True) + self.resnet.eval() + self.resnet.fc_backup = self.resnet.fc + self.resnet.fc = torch.nn.Sequential() + if isinstance(self, CIFAR10): + for dataset in datasets: + idx_to_delete = np.where(np.array([dataset.targets]) == 6)[1] + dataset.targets = list(np.delete(np.array(dataset.targets), idx_to_delete)) + dataset.data = np.delete(dataset.data, idx_to_delete, 0) + self.datasets.append(dataset) + elif isinstance(self, STL10): + for dataset in datasets: + idx_to_delete = np.where(np.array([dataset.labels]) == 7)[1] + dataset.labels = list(np.delete(np.array(dataset.labels), idx_to_delete)) + dataset.data = np.delete(dataset.data, idx_to_delete, 0) + self.datasets.append(dataset) + self.transforms = transforms + + + def __len__(self): + return sum(len(d) for d in self.datasets) + + def __getitem__(self, idx: int): + if torch.is_tensor(idx): + idx = idx.tolist() + + offset = 0 + dataset_idx = 0 + sample = None + if idx < len(self): + while sample is None: + if idx < (offset + len(self.datasets[dataset_idx])): + sample = self.datasets[dataset_idx][idx - offset] + else: + offset += len(self.datasets[dataset_idx]) + dataset_idx += 1 + else: + return None + + x = sample[0] + for transform in self.transforms: + x = transform(x) + x = x.unsqueeze(0) + + if torch.cuda.is_available(): + x = x.to('cuda') + self.resnet.to('cuda') + + if isinstance(self, CIFAR10): + if sample[1] == 0: + y = 0 # Airplane + elif sample[1] == 1: + y = 1 # Automobile + elif sample[1] == 2: + y = 2 # Bird + elif sample[1] == 3: + y = 3 # Cat + elif sample[1] == 4: + y = 4 # Deer + elif sample[1] == 5: + y = 5 # Dog + elif sample[1] == 7: + y = 6 # Horse + elif sample[1] == 8: + y = 7 # Ship + elif sample[1] == 9: + y = 8 # Truck + elif isinstance(self, STL10): + if sample[1] == 0: + y = 0 # Airplane + elif sample[1] == 1: + y = 2 # Bird + elif sample[1] == 2: + y = 1 # Car + elif sample[1] == 3: + y = 3 # Cat + elif sample[1] == 4: + y = 4 # Deer + elif sample[1] == 5: + y = 5 # Dog + elif sample[1] == 6: + y = 6 # Horse + elif sample[1] == 8: + y = 7 # Ship + elif sample[1] == 9: + y = 8 # Truck + with torch.no_grad(): + x = self.resnet(x)[0].to('cpu') + return {'x': x, 'y': y} + + +class USPS(MyCustomMNISTUSPSDataLoader): + def __init__(self, transform: torchvision.transforms = None): + ssl._create_default_https_context = ssl._create_unverified_context + datasets = [] + datasets.append(torchvision.datasets.USPS(root='./data', train=True, download=True)) + datasets.append(torchvision.datasets.USPS(root='./data', train=False, download=True)) + + MyCustomMNISTUSPSDataLoader.__init__(self, datasets, transform) + + +class MNIST(MyCustomMNISTUSPSDataLoader): + def __init__(self, transform: torchvision.transforms = None): + ssl._create_default_https_context = ssl._create_unverified_context + datasets = [] + datasets.append(torchvision.datasets.MNIST(root='./data', train=True, download=True)) + datasets.append(torchvision.datasets.MNIST(root='./data', train=False, download=True)) + + MyCustomMNISTUSPSDataLoader.__init__(self, datasets, transform) + + +class CIFAR10(MyCustomCIFAR10STL10DataLoader): + def __init__(self, transform: torchvision.transforms = None): + ssl._create_default_https_context = ssl._create_unverified_context + datasets = [] + datasets.append(torchvision.datasets.CIFAR10(root='./data', train=True, download=True)) + datasets.append(torchvision.datasets.CIFAR10(root='./data', train=False, download=True)) + + MyCustomCIFAR10STL10DataLoader.__init__(self, datasets, transform) + + +class STL10(MyCustomCIFAR10STL10DataLoader): + def __init__(self, transform: torchvision.transforms = None): + ssl._create_default_https_context = ssl._create_unverified_context + datasets = [] + datasets.append(torchvision.datasets.STL10(root='./data', split='train', download=True)) + datasets.append(torchvision.datasets.STL10(root='./data', split='test', download=True)) + + MyCustomCIFAR10STL10DataLoader.__init__(self, datasets, transform) + + +class DataManipulator: + data = None + __number_samples = None + __number_features = None + __number_classes = None + __padding = 0 + concept_drift_noise = None + n_concept_drifts = 1 + + def concept_drift(self, x, idx): + if idx == 0: + return x + + def normalize(x, a: int = 0, b: int = 1): + assert a < b + return (b - a) * (x - np.min(x)) / (np.max(x) - np.min(x)) + a + + if self.concept_drift_noise is None: + self.concept_drift_noise = [] + for i in range(self.n_concept_drifts - 1): + np.random.seed(seed=self.n_concept_drifts * self.n_concept_drifts + i) + self.concept_drift_noise.append((np.random.rand(self.number_features())) + 1) # Random on range [0, 2) + np.random.seed(seed=None) + return normalize(x * self.concept_drift_noise[idx - 1], np.min(x), np.max(x)) + + def number_classes(self, force_count: bool = False): + if self.__number_classes is None or force_count: + try: + self.__min_class = int(np.min([np.min(d.targets) for d in self.data.datasets])) + self.__max_class = int(np.max([np.max(d.targets) for d in self.data.datasets])) + except TypeError: + self.__min_class = int(np.min([np.min(d.targets.numpy()) for d in self.data.datasets])) + self.__max_class = int(np.max([np.max(d.targets.numpy()) for d in self.data.datasets])) + except AttributeError: + try: + self.__min_class = int(np.min(self.data.datasets.overall.values)) + self.__max_class = int(np.max(self.data.datasets.overall.values)) + except: + try: + self.__min_class = int(np.min(self.data.datasets.demand.values)) + self.__max_class = int(np.max(self.data.datasets.demand.values)) + except: + try: + self.__min_class = int(np.min(self.data.datasets.targets.values)) + self.__max_class = int(np.max(self.data.datasets.targets.values)) + except: + self.__min_class = int(np.min([np.min(d.labels) for d in self.data.datasets])) + self.__max_class = int(np.max([np.max(d.labels) for d in self.data.datasets])) + self.__number_classes = len(range(self.__min_class, self.__max_class + 1)) + if isinstance(self.data, CIFAR10) or isinstance(self.data, STL10): + self.__number_classes = self.__number_classes - 1 + + return self.__number_classes + + def number_features(self, force_count: bool = False, specific_sample: int = None): + if self.__number_features is None or force_count or specific_sample is not None: + if specific_sample is None: + idx = 0 + else: + idx = specific_sample + self.__number_features = int(np.prod(self.get_x(idx).shape)) + + return self.__number_features + + def number_samples(self, force_count: bool = False): + if self.__number_samples is None or force_count: + self.__number_samples = len(self.data) + + return self.__number_samples + + def get_x_from_y(self, y: int, idx: int = 0, random_idx: bool = False): + x = None + if random_idx: + while x is None: + idx = np.random.randint(0, self.number_samples()) + temp_x, temp_y = self.get_x_y(idx) + if np.argmax(temp_y) == y: + x = temp_x + else: + while x is None: + temp_x, temp_y = self.get_x_y(idx) + if np.argmax(temp_y) == y: + x = temp_x + else: + idx += 1 + return x + + def get_x_y(self, idx: int): + data = self.data[idx] + if self.__padding > 0: + m = torch.nn.ConstantPad2d(self.__padding, 0) + x = m(data['x']).flatten().numpy() + else: + if type(data['x']) is np.ndarray: + x = data['x'] + else: + x = data['x'].flatten().numpy() + + y = np.zeros(self.number_classes()) + y[int((data['y'] - self.__min_class))] = 1 + + x = self.concept_drift(x, int(idx / (self.number_samples() / self.n_concept_drifts))) + return x, y + + def get_x(self, idx: int): + x, _ = self.get_x_y(idx) + return x + + def get_y(self, idx: int): + _, y = self.get_x_y(idx) + return y + + def load_mnist(self, resize: int = None, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + if resize is None: + self.data = MNIST([torchvision.transforms.ToTensor()]) + else: + self.data = MNIST([torchvision.transforms.Resize(resize), + torchvision.transforms.ToTensor()]) + + def load_usps(self, resize: int = None, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + if resize is None: + self.data = USPS([torchvision.transforms.ToTensor()]) + else: + self.data = USPS([torchvision.transforms.Resize(resize), + torchvision.transforms.ToTensor()]) + + def load_cifar10(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = CIFAR10([torchvision.transforms.Resize(224), + torchvision.transforms.ToTensor(), + # torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], + # std=[0.229, 0.224, 0.225]) + ]) + + def load_stl10(self, resize: int = None, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + if resize is None: + self.data = STL10([torchvision.transforms.Resize(224), + torchvision.transforms.ToTensor(), + # torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], + # std=[0.229, 0.224, 0.225]) + ]) + + def load_london_bike_sharing(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomBikeSharingDataLoader('london') + + def load_washington_bike_sharing(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomBikeSharingDataLoader('washington') + + def load_amazon_review_fashion(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('AMAZON_FASHION_5.json.gz') + + def load_amazon_review_all_beauty(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('All_Beauty_5.json.gz') + + def load_amazon_review_appliances(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Appliances_5.json.gz') + + def load_amazon_review_arts_crafts_sewing(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Arts_Crafts_and_Sewing_5.json.gz') + + def load_amazon_review_automotive(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Automotive_5.json.gz') + + def load_amazon_review_books(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Books_5.json.gz') + + def load_amazon_review_cds_vinyl(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('CDs_and_Vinyl_5.json.gz') + + def load_amazon_review_cellphones_accessories(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Cell_Phones_and_Accessories_5.json.gz') + + def load_amazon_review_clothing_shoes_jewelry(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Clothing_Shoes_and_Jewelry_5.json.gz') + + def load_amazon_review_digital_music(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Digital_Music_5.json.gz') + + def load_amazon_review_electronics(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Electronics_5.json.gz') + + def load_amazon_review_gift_card(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Gift_Cards_5.json.gz') + + def load_amazon_review_grocery_gourmet_food(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Grocery_and_Gourmet_Food_5.json.gz') + + def load_amazon_review_home_kitchen(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Home_and_Kitchen_5.json.gz') + + def load_amazon_review_industrial_scientific(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Industrial_and_Scientific_5.json.gz') + + def load_amazon_review_kindle_store(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Kindle_Store_5.json.gz') + + def load_amazon_review_luxury_beauty(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Luxury_Beauty_5.json.gz') + + def load_amazon_review_magazine_subscription(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Magazine_Subscriptions_5.json.gz') + + def load_amazon_review_movies_tv(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Movies_and_TV_5.json.gz') + + def load_amazon_review_musical_instruments(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Musical_Instruments_5.json.gz') + + def load_amazon_review_office_products(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Office_Products_5.json.gz') + + def load_amazon_review_patio_lawn_garden(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Patio_Lawn_and_Garden_5.json.gz') + + def load_amazon_review_pet_supplies(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Pet_Supplies_5.json.gz') + + def load_amazon_review_prime_pantry(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Prime_Pantry_5.json.gz') + + def load_amazon_review_software(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Software_5.json.gz') + + def load_amazon_review_sports_outdoors(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Sports_and_Outdoors_5.json.gz') + + def load_amazon_review_tools_home_improvements(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Tools_and_Home_Improvement_5.json.gz') + + def load_amazon_review_toys_games(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Toys_and_Games_5.json.gz') + + def load_amazon_review_video_games(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewDataLoader('Video_Games_5.json.gz') + + def load_amazon_review_nips_books(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewNIPSDataLoader('books') + + def load_amazon_review_nips_dvd(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewNIPSDataLoader('dvd') + + def load_amazon_review_nips_electronics(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewNIPSDataLoader('electronics') + + def load_amazon_review_nips_kitchen(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewNIPSDataLoader('kitchen') + + def load_amazon_review_acl_apparel(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('apparel') + + def load_amazon_review_acl_automotive(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('automotive') + + def load_amazon_review_acl_baby(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('baby') + + def load_amazon_review_acl_beauty(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('beauty') + + def load_amazon_review_acl_books(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('books') + + def load_amazon_review_acl_camera_photo(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('camera_photo') + + def load_amazon_review_acl_cell_phones_service(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('cell_phones_service') + + def load_amazon_review_acl_computer_video_games(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('computer_video_games') + + def load_amazon_review_acl_dvd(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('dvd') + + def load_amazon_review_acl_electronics(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('electronics') + + def load_amazon_review_acl_gourmet_food(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('gourmet_food') + + def load_amazon_review_acl_grocery(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('grocery') + + def load_amazon_review_acl_health_personal_care(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('health_personal_care') + + def load_amazon_review_acl_jewelry_watches(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('jewelry_watches') + + def load_amazon_review_acl_kitchen_housewares(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('kitchen_housewares') + + def load_amazon_review_acl_magazines(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('magazines') + + def load_amazon_review_acl_music(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('music') + + def load_amazon_review_acl_musical_instruments(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('musical_instruments') + + def load_amazon_review_acl_office_products(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('office_products') + + def load_amazon_review_acl_outdoor_living(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('outdoor_living') + + def load_amazon_review_acl_software(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('software') + + def load_amazon_review_acl_sports_outdoors(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('sports_outdoors') + + def load_amazon_review_acl_tools_hardware(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('tools_hardware') + + def load_amazon_review_acl_toys_games(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('toys_games') + + def load_amazon_review_acl_video(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomAmazonReviewACLDataLoader('video') + + def load_news_popularity_obama_all(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('obama', 'all') + + def load_news_popularity_economy_all(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('economy', 'all') + + def load_news_popularity_microsoft_all(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('microsoft', 'all') + + def load_news_popularity_palestine_all(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('palestine', 'all') + + def load_news_popularity_obama_facebook(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('obama', 'Facebook') + + def load_news_popularity_economy_facebook(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('economy', 'Facebook') + + def load_news_popularity_microsoft_facebook(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('microsoft', 'Facebook') + + def load_news_popularity_palestine_facebook(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('palestine', 'Facebook') + + def load_news_popularity_obama_googleplus(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('obama', 'GooglePlus') + + def load_news_popularity_economy_googleplus(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('economy', 'GooglePlus') + + def load_news_popularity_microsoft_googleplus(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('microsoft', 'GooglePlus') + + def load_news_popularity_palestine_googleplus(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('palestine', 'GooglePlus') + + def load_news_popularity_obama_linkedin(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('obama', 'LinkedIn') + + def load_news_popularity_economy_linkedin(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('economy', 'LinkedIn') + + def load_news_popularity_microsoft_linkedin(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('microsoft', 'LinkedIn') + + def load_news_popularity_palestine_linkedin(self, n_concept_drifts: int = 1): + self.n_concept_drifts = n_concept_drifts + self.data = MyCustomNewsPopularityDataLoader('palestine', 'LinkedIn') \ No newline at end of file diff --git a/ACDC_Ablation_A.py b/ACDC_Ablation_A.py new file mode 100644 index 0000000..67cb44e --- /dev/null +++ b/ACDC_Ablation_A.py @@ -0,0 +1,1323 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +from ACDCDataManipulator import DataManipulator +from NeuralNetwork import NeuralNetwork +from AutoEncoder import DenoisingAutoEncoder +from MySingletons import MyDevice +from colorama import Fore, Back, Style +from itertools import cycle +import numpy as np +import matplotlib.pylab as plt +import math +import torch +import time + + +def __copy_weights(source: NeuralNetwork, targets: list, layer_numbers=None, copy_moment: bool = True): + if layer_numbers is None: + layer_numbers = [1] + if type(targets) is not list: + targets = [targets] + for layer_number in layer_numbers: + layer_number -= 1 + for target in targets: + if layer_number >= source.number_hidden_layers: + target.output_weight = source.output_weight.detach() + target.output_bias = source.output_bias.detach() + if copy_moment: + target.output_momentum = source.output_momentum.detach() + target.output_bias_momentum = source.output_bias_momentum.detach() + else: + target.weight[layer_number] = source.weight[layer_number].detach() + target.bias[layer_number] = source.bias[layer_number].detach() + if copy_moment: + target.momentum[layer_number] = source.momentum[layer_number].detach() + target.bias_momentum[layer_number] = source.bias_momentum[layer_number].detach() + + +def __grow_nodes(*networks): + origin = networks[0] + if origin.growable[origin.number_hidden_layers]: + nodes = 1 + for i in range(nodes): + for network in networks: + network.grow_node(origin.number_hidden_layers) + return True + else: + return False + + +def __prune_nodes(*networks): + origin = networks[0] + if origin.prunable[origin.number_hidden_layers][0] >= 0: + nodes_to_prune = origin.prunable[origin.number_hidden_layers].tolist() + for network in networks: + for node_to_prune in nodes_to_prune[::-1]: + network.prune_node(origin.number_hidden_layers, node_to_prune) + return True + return False + + +def __width_evolution(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None): + if y is None: + y = x + + network.feedforward(x, y) + network.width_adaptation_stepwise(y) + + +def __discriminative(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, is_neg_grad: bool = False): + y = x.detach() if y is None else y + network.train(x=x, y=y, is_neg_grad=is_neg_grad) + + +def __generative(network: DenoisingAutoEncoder, x: torch.tensor, y: torch.tensor = None, + is_tied_weight=True, noise_ratio=0.1, glw_epochs: int = 1): + y = x.detach() if y is None else y + network.greedy_layer_wise_pretrain(x=x, number_epochs=glw_epochs, noise_ratio=noise_ratio) + network.train(x=x, y=y, noise_ratio=noise_ratio, is_tied_weight=is_tied_weight) + + +def __test(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, + is_source: bool = False, is_discriminative: bool = False, metrics=None): + with torch.no_grad(): + y = x.detach() if y is None else y + network.test(x=x, y=y) + + if is_source: + if is_discriminative: + metrics['classification_rate_source'].append(network.classification_rate) + metrics['classification_source_loss'].append(float(network.loss_value)) + metrics['classification_source_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_source_loss'].append(float(network.loss_value)) + else: + if is_discriminative: + metrics['classification_rate_target'].append(network.classification_rate) + metrics['classification_target_loss'].append(float(network.loss_value)) + metrics['classification_target_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_target_loss'].append(float(network.loss_value)) + + +def __force_same_size(a_tensor, b_tensor, shuffle=True, strategy='max'): + common = np.min([a_tensor.shape[0], b_tensor.shape[0]]) + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + if strategy == 'max': + if math.ceil(a_tensor.shape[0] / common) <= math.ceil(b_tensor.shape[0] / common): + b_tensor = torch.stack(list(target for target, source + in zip(b_tensor[torch.randperm(b_tensor.shape[0])], + cycle(a_tensor[torch.randperm(a_tensor.shape[0])])))) + a_tensor = torch.stack(list(source for target, source + in zip(b_tensor[torch.randperm(b_tensor.shape[0])], + cycle(a_tensor[torch.randperm(a_tensor.shape[0])])))) + else: + b_tensor = torch.stack(list(target for target, source + in zip(cycle(b_tensor[torch.randperm(b_tensor.shape[0])]), + a_tensor[torch.randperm(a_tensor.shape[0])]))) + a_tensor = torch.stack(list(source for target, source + in zip(cycle(b_tensor[torch.randperm(b_tensor.shape[0])]), + a_tensor[torch.randperm(a_tensor.shape[0])]))) + + elif strategy == 'min': + a_tensor = a_tensor[:common] + b_tensor = b_tensor[:common] + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + return a_tensor, b_tensor + + +def __print_annotation(lst): + def custom_range(xx): + step = int(len(xx) * 0.25) - 1 + return range(0, len(xx), 1 if step == 0 else step) + + for idx in custom_range(lst): + pos = lst[idx] if isinstance(lst[idx], (int, float, np.int32)) else lst[idx][0] + plt.annotate(format(pos, '.2f'), (idx, pos)) + pos = lst[-1] if isinstance(lst[-1], (int, float, np.int32)) else lst[-1][0] + plt.annotate(format(pos, '.2f'), (len(lst), pos)) + + +def __plot_time(train_time: np.ndarray, + test_time: np.ndarray, + annotation=True): + plt.title('Processing time') + plt.ylabel('Seconds') + plt.xlabel('Minibatches') + + plt.plot(train_time, linewidth=1, + label=('Train time: %f (Mean) %f (Accumulated)' % + (np.nanmean(train_time), np.sum(train_time)))) + plt.plot(test_time, linewidth=1, + label=('Test time: %f (Mean) %f (Accumulated)' % + (np.nanmean(test_time), np.sum(test_time)))) + plt.legend() + + if annotation: + __print_annotation(train_time) + __print_annotation(test_time) + + plt.tight_layout() + plt.show() + + +def __plot_node_evolution(nodes_discriminator: np.ndarray, + nodes_domain_classifier: np.ndarray, + nodes_feature_extraction: np.ndarray, + annotation=True): + plt.title('Node evolution') + plt.ylabel('Nodes') + plt.xlabel('Minibatches') + + plt.plot(nodes_discriminator, linewidth=1, + label=('Discriminator HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_discriminator), nodes_discriminator[-1]))) + plt.plot(nodes_domain_classifier, linewidth=1, + label=('Domain Classifier HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_domain_classifier), nodes_domain_classifier[-1]))) + plt.plot(nodes_feature_extraction, linewidth=1, + label=('Feature Extraction HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_feature_extraction), nodes_feature_extraction[-1]))) + plt.legend() + + if annotation: + __print_annotation(nodes_discriminator) + __print_annotation(nodes_domain_classifier) + __print_annotation(nodes_feature_extraction) + + plt.tight_layout() + plt.show() + + +def __plot_losses(classification_source_loss: np.ndarray, + classification_target_loss: np.ndarray, + reconstruction_source_loss: np.ndarray, + reconstruction_target_loss: np.ndarray, + domain_classifier_loss: np.ndarray, + annotation=True): + plt.title('Losses evolution') + plt.ylabel('Loss value') + plt.xlabel('Minibatches') + + plt.plot(classification_source_loss, linewidth=1, + label=('Classification Source Loss mean: %f' % + (np.nanmean(classification_source_loss)))) + plt.plot(classification_target_loss, linewidth=1, + label=('Classification Target Loss mean: %f' % + (np.nanmean(classification_target_loss)))) + plt.plot(reconstruction_source_loss, linewidth=1, + label=('Reconstruction Source Loss mean: %f' % + (np.nanmean(reconstruction_source_loss)))) + plt.plot(reconstruction_target_loss, linewidth=1, + label=('Reconstruction Target Loss mean: %f' % + (np.nanmean(reconstruction_target_loss)))) + plt.plot(domain_classifier_loss, linewidth=1, + label=('Domain Classifier Loss mean: %f' % + (np.nanmean(domain_classifier_loss)))) + plt.legend() + + if annotation: + __print_annotation(classification_source_loss) + __print_annotation(classification_target_loss) + __print_annotation(reconstruction_source_loss) + __print_annotation(reconstruction_target_loss) + __print_annotation(domain_classifier_loss) + + plt.tight_layout() + plt.show() + + +def __plot_classification_rates(source_rate: np.ndarray, + target_rate: np.ndarray, + domain_rate: np.ndarray, + total_source_rate: float, + total_target_rate: float, + total_domain_classification_rate: float, + annotation=True, + class_number=None): + plt.title('Source and Target Classification Rates') + plt.ylabel('Classification Rate') + plt.xlabel('Minibatches') + + plt.plot(source_rate, linewidth=1, label=('Source CR: %f (batch) | %f (dataset)' % + (np.nanmean(source_rate), total_source_rate))) + plt.plot(target_rate, linewidth=1, label=('Target CR: %f (batch) | %f (dataset)' % + (np.nanmean(target_rate), total_target_rate))) + plt.plot(domain_rate, linewidth=1, label=('Domain CR: %f (batch) | %f (dataset)' % + (np.nanmean(domain_rate), total_domain_classification_rate))) + + if annotation: + __print_annotation(source_rate) + __print_annotation(target_rate) + __print_annotation(domain_rate) + + if class_number is not None: + plt.plot(np.ones(len(source_rate)) * 1 / class_number, + linewidth=1, label='Random Classification Threshold: %f' % (1 / class_number)) + + plt.plot(np.ones(len(source_rate)) * 1 / 2, + linewidth=1, label='Random Domain Classification Threshold: %f' % (1 / 2)) + + plt.legend() + + plt.tight_layout() + plt.show() + + +def __plot_ns(bias, var, ns, annotation=True): + plt.plot(bias, linewidth=1, label=('Bias mean: %f' % (np.nanmean(bias)))) + plt.plot(var, linewidth=1, label=('Variance mean: %f' % (np.nanmean(var)))) + plt.plot(ns, linewidth=1, label=('NS (Bias + Variance) mean: %f' % (np.nanmean(ns)))) + plt.legend() + + if annotation: + __print_annotation(bias) + __print_annotation(var) + __print_annotation(ns) + + plt.tight_layout() + plt.show() + + +def __plot_discriminative_network_significance(bias, var, annotation=True): + plt.title('Discriminative Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_domain_classifier_network_significance(bias, var, annotation=True): + plt.title('Domain Classifier Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_feature_extractor_network_significance(bias, var, annotation=True): + plt.title('Feature Extractor Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __load_source_target(source: str, target: str, n_source_concept_drift: int = 1, n_target_concept_drift: int = 1): + dm_s = DataManipulator() + dm_t = DataManipulator() + + source = source.replace('_', '-').replace(' ', '-').lower() + target = target.replace('_', '-').replace(' ', '-').lower() + + if source == 'mnist-28': + dm_s.load_mnist(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-26': + dm_s.load_mnist(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-24': + dm_s.load_mnist(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-22': + dm_s.load_mnist(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-20': + dm_s.load_mnist(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-18': + dm_s.load_mnist(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-16': + dm_s.load_mnist(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-28': + dm_s.load_usps(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-26': + dm_s.load_usps(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-24': + dm_s.load_usps(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-22': + dm_s.load_usps(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-20': + dm_s.load_usps(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-18': + dm_s.load_usps(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-16': + dm_s.load_usps(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'cifar10': + dm_s.load_cifar10(n_concept_drifts=n_source_concept_drift) + elif source == 'stl10': + dm_s.load_stl10(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-fashion': + dm_s.load_amazon_review_fashion(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-all-beauty': + dm_s.load_amazon_review_all_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-appliances': + dm_s.load_amazon_review_appliances(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-arts-crafts-sewing': + dm_s.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-automotive': + dm_s.load_amazon_review_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-books': + dm_s.load_amazon_review_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cds-vinyl': + dm_s.load_amazon_review_cds_vinyl(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cellphones_accessories': + dm_s.load_amazon_review_cellphones_accessories(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-clothing-shoes-jewelry': + dm_s.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-digital-music': + dm_s.load_amazon_review_digital_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-electronics': + dm_s.load_amazon_review_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-gift-card': + dm_s.load_amazon_review_gift_card(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-grocery-gourmet-food': + dm_s.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-home-kitchen': + dm_s.load_amazon_review_home_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-industrial-scientific': + dm_s.load_amazon_review_industrial_scientific(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-kindle-store': + dm_s.load_amazon_review_kindle_store(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-luxury-beauty': + dm_s.load_amazon_review_luxury_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-magazine-subscription': + dm_s.load_amazon_review_magazine_subscription(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-movies-tv': + dm_s.load_amazon_review_movies_tv(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-musical-instruments': + dm_s.load_amazon_review_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-office-products': + dm_s.load_amazon_review_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-patio-lawn-garden': + dm_s.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-pet-supplies': + dm_s.load_amazon_review_pet_supplies(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-prime-pantry': + dm_s.load_amazon_review_prime_pantry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-software': + dm_s.load_amazon_review_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-sports-outdoors': + dm_s.load_amazon_review_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-tools-home-improvements': + dm_s.load_amazon_review_tools_home_improvements(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-toys-games': + dm_s.load_amazon_review_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-video-games': + dm_s.load_amazon_review_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-books': + dm_s.load_amazon_review_nips_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-dvd': + dm_s.load_amazon_review_nips_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-electronics': + dm_s.load_amazon_review_nips_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-kitchen': + dm_s.load_amazon_review_nips_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-apparel': + dm_s.load_amazon_review_acl_apparel(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-automotive': + dm_s.load_amazon_review_acl_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-baby': + dm_s.load_amazon_review_acl_baby(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-beauty': + dm_s.load_amazon_review_acl_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-books': + dm_s.load_amazon_review_acl_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-camera_photo': + dm_s.load_amazon_review_acl_camera_photo(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-cell_phones_service': + dm_s.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-computer_video_games': + dm_s.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-dvd': + dm_s.load_amazon_review_acl_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-electronics': + dm_s.load_amazon_review_acl_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-gourmet_food': + dm_s.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-grocery': + dm_s.load_amazon_review_acl_grocery(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-health_personal_care': + dm_s.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-jewelry_watches': + dm_s.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-kitchen_housewares': + dm_s.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-magazines': + dm_s.load_amazon_review_acl_magazines(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-music': + dm_s.load_amazon_review_acl_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-musical_instruments': + dm_s.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-office_products': + dm_s.load_amazon_review_acl_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-outdoor_living': + dm_s.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-software': + dm_s.load_amazon_review_acl_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-sports_outdoors': + dm_s.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-tools_hardware': + dm_s.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-toys_games': + dm_s.load_amazon_review_acl_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-video': + dm_s.load_amazon_review_acl_video(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-all': + dm_s.load_news_popularity_obama_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-all': + dm_s.load_news_popularity_economy_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-all': + dm_s.load_news_popularity_microsoft_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-all': + dm_s.load_news_popularity_palestine_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-facebook': + dm_s.load_news_popularity_obama_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-facebook': + dm_s.load_news_popularity_economy_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-facebook': + dm_s.load_news_popularity_microsoft_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-facebook': + dm_s.load_news_popularity_palestine_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-googleplus': + dm_s.load_news_popularity_obama_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-googleplus': + dm_s.load_news_popularity_economy_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-googleplus': + dm_s.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-googleplus': + dm_s.load_news_popularity_palestine_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-linkedin': + dm_s.load_news_popularity_obama_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-linkedin': + dm_s.load_news_popularity_economy_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-linkedin': + dm_s.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-linkedin': + dm_s.load_news_popularity_palestine_linkedin(n_concept_drifts=n_source_concept_drift) + + if target == 'mnist-28': + dm_t.load_mnist(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-26': + dm_t.load_mnist(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-24': + dm_t.load_mnist(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-22': + dm_t.load_mnist(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-20': + dm_t.load_mnist(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-18': + dm_t.load_mnist(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-16': + dm_t.load_mnist(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-28': + dm_t.load_usps(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-26': + dm_t.load_usps(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-24': + dm_t.load_usps(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-22': + dm_t.load_usps(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-20': + dm_t.load_usps(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-18': + dm_t.load_usps(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-16': + dm_t.load_usps(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'cifar10': + dm_t.load_cifar10(n_concept_drifts=n_target_concept_drift) + elif target == 'stl10': + dm_t.load_stl10(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-fashion': + dm_t.load_amazon_review_fashion(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-all-beauty': + dm_t.load_amazon_review_all_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-appliances': + dm_t.load_amazon_review_appliances(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-arts-crafts-sewing': + dm_t.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-automotive': + dm_t.load_amazon_review_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-books': + dm_t.load_amazon_review_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cds-vinyl': + dm_t.load_amazon_review_cds_vinyl(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cellphones_accessories': + dm_t.load_amazon_review_cellphones_accessories(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-clothing-shoes-jewelry': + dm_t.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-digital-music': + dm_t.load_amazon_review_digital_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-electronics': + dm_t.load_amazon_review_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-gift-card': + dm_t.load_amazon_review_gift_card(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-grocery-gourmet-food': + dm_t.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-home-kitchen': + dm_t.load_amazon_review_home_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-industrial-scientific': + dm_t.load_amazon_review_industrial_scientific(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-kindle-store': + dm_t.load_amazon_review_kindle_store(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-luxury-beauty': + dm_t.load_amazon_review_luxury_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-magazine-subscription': + dm_t.load_amazon_review_magazine_subscription(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-movies-tv': + dm_t.load_amazon_review_movies_tv(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-musical-instruments': + dm_t.load_amazon_review_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-office-products': + dm_t.load_amazon_review_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-patio-lawn-garden': + dm_t.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-pet-supplies': + dm_t.load_amazon_review_pet_supplies(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-prime-pantry': + dm_t.load_amazon_review_prime_pantry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-software': + dm_t.load_amazon_review_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-sports-outdoors': + dm_t.load_amazon_review_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-tools-home-improvements': + dm_t.load_amazon_review_tools_home_improvements(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-toys-games': + dm_t.load_amazon_review_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-video-games': + dm_t.load_amazon_review_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-books': + dm_t.load_amazon_review_nips_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-dvd': + dm_t.load_amazon_review_nips_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-electronics': + dm_t.load_amazon_review_nips_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-kitchen': + dm_t.load_amazon_review_nips_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-apparel': + dm_t.load_amazon_review_acl_apparel(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-automotive': + dm_t.load_amazon_review_acl_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-baby': + dm_t.load_amazon_review_acl_baby(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-beauty': + dm_t.load_amazon_review_acl_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-books': + dm_t.load_amazon_review_acl_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-camera_photo': + dm_t.load_amazon_review_acl_camera_photo(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-cell_phones_service': + dm_t.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-computer_video_games': + dm_t.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-dvd': + dm_t.load_amazon_review_acl_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-electronics': + dm_t.load_amazon_review_acl_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-gourmet_food': + dm_t.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-grocery': + dm_t.load_amazon_review_acl_grocery(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-health_personal_care': + dm_t.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-jewelry_watches': + dm_t.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-kitchen_housewares': + dm_t.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-magazines': + dm_t.load_amazon_review_acl_magazines(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-music': + dm_t.load_amazon_review_acl_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-musical_instruments': + dm_t.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-office_products': + dm_t.load_amazon_review_acl_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-outdoor_living': + dm_t.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-software': + dm_t.load_amazon_review_acl_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-sports_outdoors': + dm_t.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-tools_hardware': + dm_t.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-toys_games': + dm_t.load_amazon_review_acl_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-video': + dm_t.load_amazon_review_acl_video(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-all': + dm_t.load_news_popularity_obama_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-all': + dm_t.load_news_popularity_economy_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-all': + dm_t.load_news_popularity_microsoft_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-all': + dm_t.load_news_popularity_palestine_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-facebook': + dm_t.load_news_popularity_obama_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-facebook': + dm_t.load_news_popularity_economy_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-facebook': + dm_t.load_news_popularity_microsoft_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-facebook': + dm_t.load_news_popularity_palestine_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-googleplus': + dm_t.load_news_popularity_obama_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-googleplus': + dm_t.load_news_popularity_economy_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-googleplus': + dm_t.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-googleplus': + dm_t.load_news_popularity_palestine_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-linkedin': + dm_t.load_news_popularity_obama_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-linkedin': + dm_t.load_news_popularity_economy_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-linkedin': + dm_t.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-linkedin': + dm_t.load_news_popularity_palestine_linkedin(n_concept_drifts=n_target_concept_drift) + + return dm_s, dm_t + + +def acdc(source, target, + n_source_concept_drift: int = 5, + n_target_concept_drift: int = 7, + internal_epochs: int = 1, is_gpu=False): + def print_metrics(minibatch, metrics, DMs, DMt, NN, DAEt, DA): + print('Minibatch: %d | Execution time (dataset load/pre-processing + model run): %f' % ( + minibatch, time.time() - metrics['start_execution_time'])) + if minibatch > 1: + print(( + 'Total of samples:' + Fore.BLUE + ' %d + %d = %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d + %d = %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1] - metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1] - metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + else: + print(( + 'Total of samples:' + Fore.BLUE + ' %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + + if minibatch > 1: + string_max = '' + Fore.GREEN + 'Max' + Style.RESET_ALL + string_mean = '' + Fore.YELLOW + 'Mean' + Style.RESET_ALL + string_min = '' + Fore.RED + 'Min' + Style.RESET_ALL + string_now = '' + Fore.BLUE + 'Now' + Style.RESET_ALL + string_accu = '' + Fore.MAGENTA + 'Accu' + Style.RESET_ALL + + print(( + '%s %s %s %s %s Training time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['train_time']), + np.nanmean(metrics['train_time']), + np.min(metrics['train_time']), + metrics['train_time'][-1], + np.sum(metrics['train_time']))) + print(( + '%s %s %s %s %s Testing time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['test_time']), + np.nanmean(metrics['test_time']), + np.min(metrics['test_time']), + metrics['test_time'][-1], + np.sum(metrics['test_time']))) + print(( + '%s %s %s %s CR Source:' + Fore.GREEN + ' %f%% ' + Back.BLUE + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_source']) * 100, + np.nanmean(metrics['classification_rate_source']) * 100, + np.min(metrics['classification_rate_source']) * 100, + metrics['classification_rate_source'][-1] * 100)) + print(( + '%s %s %s %s CR Target:' + Fore.GREEN + ' %f%% ' + Back.RED + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_target']) * 100, + np.nanmean(metrics['classification_rate_target']) * 100, + np.min(metrics['classification_rate_target']) * 100, + metrics['classification_rate_target'][-1] * 100)) + print(( + '%s %s %s %s CR Domain Discriminator:' + Fore.GREEN + ' %f%% ' + Fore.YELLOW + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_domain']) * 100, + np.nanmean(metrics['classification_rate_domain']) * 100, + np.min(metrics['classification_rate_domain']) * 100, + metrics['classification_rate_domain'][-1] * 100)) + print(( + '%s %s %s %s Classification Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_source_loss']), + np.nanmean(metrics['classification_source_loss']), + np.min(metrics['classification_source_loss']), + metrics['classification_source_loss'][-1])) + print(( + '%s %s %s %s Classification Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_target_loss']), + np.nanmean(metrics['classification_target_loss']), + np.min(metrics['classification_target_loss']), + metrics['classification_target_loss'][-1])) + print(( + '%s %s %s %s Domain Discriminator Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['domain_regression_loss']), + np.nanmean(metrics['domain_regression_loss']), + np.min(metrics['domain_regression_loss']), + metrics['domain_regression_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_source_loss']), + np.nanmean(metrics['reconstruction_source_loss']), + np.min(metrics['reconstruction_source_loss']), + metrics['reconstruction_source_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_target_loss']), + np.nanmean(metrics['reconstruction_target_loss']), + np.min(metrics['reconstruction_target_loss']), + metrics['reconstruction_target_loss'][-1])) + print(( + '%s %s %s %s Discriminator Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_discriminator']), + np.nanmean(metrics['node_evolution_discriminator']), + np.min(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1])) + print(( + '%s %s %s %s Denoising Autoencoder Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_feature_extraction']), + np.nanmean(metrics['node_evolution_feature_extraction']), + np.min(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1])) + print(( + '%s %s %s %s Domain Classifier Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_domain_classifier']), + np.nanmean(metrics['node_evolution_domain_classifier']), + np.min(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1])) + print(('Network structure:' + Fore.BLUE + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, NN.layers)))) + print(('Domain Discriminator structure:' + Fore.GREEN + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DA.layers)))) + print(('Denoising Auto Encoder:' + Fore.RED + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DAEt.layers)))) + print(Style.RESET_ALL) + + metrics = {'classification_rate_source': [], + 'classification_rate_target': [], + 'classification_rate_domain': [], + 'number_evaluated_samples_source': [], + 'number_evaluated_samples_target': [], + 'train_time': [], + 'test_time': [], + 'node_evolution_discriminator': [], + 'node_evolution_domain_classifier': [], + 'node_evolution_feature_extraction': [], + 'classification_target_loss': [], + 'classification_source_loss': [], + 'reconstruction_source_loss': [], + 'reconstruction_target_loss': [], + 'domain_regression_loss': [], + 'classification_source_misclassified': [], + 'classification_target_misclassified': [], + 'domain_classification_misclassified': [], + 'start_execution_time': time.time()} + MyDevice().set(is_gpu=is_gpu) + internal_epochs = internal_epochs if internal_epochs >= 1 else 1 + + SOURCE_DOMAIN_LABEL = torch.tensor([[1, 0]], dtype=torch.float, device=MyDevice().get()) + TARGET_DOMAIN_LABEL = torch.tensor([[0, 1]], dtype=torch.float, device=MyDevice().get()) + + dm_s, dm_t = __load_source_target(source, target, n_source_concept_drift, n_target_concept_drift) + + dae = DenoisingAutoEncoder([dm_s.number_features(), + int(dm_s.number_features() * 0.5), + dm_s.number_features()]) + nn = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 1, + dm_s.number_classes()]) + da = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 1, + 2]) + + count_source = 0 + count_target = 0 + count_window = 0 + window_size = 1000 + batch_counter = 0 + + x_source = [] + y_source = [] + x_target = [] + y_target = [] + + while count_source < dm_s.number_samples() \ + or count_target < dm_t.number_samples(): + if count_window < window_size \ + and (count_source < dm_s.number_samples() + or count_target < dm_t.number_samples()): + + source_prob = (dm_s.number_samples() - count_source) / ( + dm_s.number_samples() - count_source + dm_t.number_samples() - count_target + 0.) + + if (np.random.rand() <= source_prob and count_source < dm_s.number_samples()) or ( + count_target >= dm_t.number_samples() and count_source < dm_s.number_samples()): + x, y = dm_s.get_x_y(count_source) + x_source.append(x) + y_source.append(y) + count_source += 1 + count_window += 1 + elif count_target < dm_t.number_samples(): + x, y = dm_t.get_x_y(count_target) + x_target.append(x) + y_target.append(y) + count_target += 1 + count_window += 1 + else: + batch_counter += 1 + metrics['number_evaluated_samples_source'].append(count_source) + metrics['number_evaluated_samples_target'].append(count_target) + + # Workaround to avoid empty stream + if batch_counter > 1: + if (count_source - metrics['number_evaluated_samples_source'][-2] == 0): + x, y = dm_s.get_x_y(np.random.randint(0, count_source)) + x_source.append(x) + y_source.append(y) + if (count_target - metrics['number_evaluated_samples_target'][-2] == 0): + x, y = dm_t.get_x_y(np.random.randint(0, count_target)) + x_target.append(x) + y_target.append(y) + # Workaround to avoid empty stream + + x_source = torch.tensor(x_source, dtype=torch.float, device=MyDevice().get()) + y_source = torch.tensor(y_source, dtype=torch.float, device=MyDevice().get()) + x_target = torch.tensor(x_target, dtype=torch.float, device=MyDevice().get()) + y_target = torch.tensor(y_target, dtype=torch.float, device=MyDevice().get()) + + # TEST + if batch_counter > 1: + metrics['test_time'].append(time.time()) + __test(network=nn, x=x_source, y=y_source, + is_source=True, is_discriminative=True, metrics=metrics) + __test(network=nn, x=x_target, y=y_target, + is_source=False, is_discriminative=True, metrics=metrics) + __test(network=dae, x=x_source, + is_source=True, is_discriminative=False, metrics=metrics) + __test(network=dae, x=x_target, + is_source=False, is_discriminative=False, metrics=metrics) + + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + metrics['domain_regression_loss'].append(float(da.loss_value)) + metrics['classification_rate_domain'].append(da.classification_rate) + metrics['domain_classification_misclassified'].append(da.misclassified) + metrics['test_time'][-1] = time.time() - metrics['test_time'][-1] + + # TRAIN + metrics['train_time'].append(time.time()) + + common_source, x_target = __force_same_size(torch.cat((x_source.T, y_source.T)).T, x_target) + x_source, y_source = common_source.T.split(x_source.shape[1]) + x_source, y_source = x_source.T, y_source.T + + epoch = 1 + while epoch <= internal_epochs: + for xs, xt, ys in [(xs.view(1, xs.shape[0]), xt.view(1, xt.shape[0]), ys.view(1, ys.shape[0])) + for xs, xt, ys in zip(x_source, x_target, cycle(y_source))]: + # Evolving + if epoch == 1: + # Evolving Feature Extraction + for j in range(0, 2): + if j == 0: + __width_evolution(network=dae, x=xs, y=xt) + elif j == 1: + __width_evolution(network=dae, x=xt, y=xs) + if __grow_nodes(dae, da, nn): + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + elif __prune_nodes(dae, da, nn): + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Evolving Source + __width_evolution(network=nn, x=xs, y=ys) + # __width_evolution(network=da, x=xs, y=SOURCE_DOMAIN_LABEL) + if not __grow_nodes(da, nn): + if __prune_nodes(da): + __prune_nodes(nn) + elif not __grow_nodes(nn): + __prune_nodes(nn) + + # Evolving Target + # __width_evolution(network=da, x=xt, y=TARGET_DOMAIN_LABEL) + if not __grow_nodes(da, nn): + __prune_nodes(da) + + # Denoising AutoEncoder + __generative(network=dae, x=xs, y=xt) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + __generative(network=dae, x=xt, y=xs) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Domain Discriminator + # da.feedforward(x=xs, y=SOURCE_DOMAIN_LABEL, train=True).backpropagate() + # dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + # dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + # for weight_no in range(da.number_hidden_layers, 0, -1): + # da.update_weight(weight_no=weight_no) + + # da.feedforward(x=xt, y=TARGET_DOMAIN_LABEL, train=True).backpropagate() + # dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + # dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + # for weight_no in range(da.number_hidden_layers, 0, -1): + # da.update_weight(weight_no=weight_no) + # __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Discriminator + __discriminative(network=nn, x=xs, y=ys) + __copy_weights(source=nn, targets=[da, dae], layer_numbers=[1], copy_moment=True) + + epoch += 1 + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + + # Metrics + metrics['train_time'][-1] = time.time() - metrics['train_time'][-1] + metrics['node_evolution_discriminator'].append(nn.layers[-2]) + metrics['node_evolution_domain_classifier'].append(da.layers[-2]) + metrics['node_evolution_feature_extraction'].append(dae.layers[-2]) + print_metrics(batch_counter, metrics, dm_s, dm_t, nn, dae, da) + + # Reset variables for the next batch + x_source = [] + y_source = [] + x_target = [] + y_target = [] + count_window = 0 + + result_string = '%f (T) | %f (S) \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %f' % ( + np.mean(metrics['classification_rate_target']), + np.mean(metrics['classification_rate_source']), + + np.mean(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1], + + np.mean(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1], + + np.mean(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1], + + np.mean(metrics['train_time']), + np.sum(metrics['train_time'])) + + print('CR Rate (Target) | CR Rate (Source) | \t ' \ + 'Feature Extractor Node Evolution (mean | final) \t ' \ + 'Discriminator Node Evolution (mean | final) \t ' \ + 'Domain Classifier Node Evolution (mean | final) \t ' \ + 'Train Time (mean | total)') + print(result_string) + + result = {} + result['string'] = result_string + result['classification_rate_source_batch'] = np.nanmean(metrics['classification_rate_source']) + result['classification_rate_target_batch'] = np.nanmean(metrics['classification_rate_target']) + result['classification_rate_domain_batch'] = np.nanmean(metrics['classification_rate_domain']) + result['classification_rate_source_total'] = 1 - np.sum( + metrics['classification_source_misclassified']) / dm_s.number_samples() + result['classification_rate_target_total'] = 1 - np.sum( + metrics['classification_target_misclassified']) / dm_t.number_samples() + result['classification_rate_domain_total'] = 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()) + result['source_node_mean'] = np.nanmean(metrics['node_evolution_discriminator']) + result['target_node_mean'] = np.nanmean(metrics['node_evolution_feature_extraction']) + result['domain_node_mean'] = np.nanmean(metrics['node_evolution_domain_classifier']) + result['source_node_final'] = metrics['node_evolution_discriminator'][-1] + result['target_node_final'] = metrics['node_evolution_feature_extraction'][-1] + result['domain_node_final'] = metrics['node_evolution_domain_classifier'][-1] + result['train_time_mean'] = np.nanmean(metrics['train_time']) + result['train_time_final'] = np.nansum(metrics['train_time']) + result['test_time_mean'] = np.nanmean(metrics['test_time']) + result['test_time_final'] = np.nansum(metrics['test_time']) + result['classification_source_loss_mean'] = np.nanmean(metrics['classification_source_loss']) + result['classification_target_loss_mean'] = np.nanmean(metrics['classification_target_loss']) + result['reconstruction_source_loss_mean'] = np.nanmean(metrics['reconstruction_source_loss']) + result['reconstruction_target_loss_mean'] = np.nanmean(metrics['reconstruction_target_loss']) + result['domain_adaptation_loss_mean'] = np.nanmean(metrics['domain_regression_loss']) + + print() + print(result) + + __plot_time(metrics['train_time'], + metrics['test_time'], + annotation=False) + __plot_classification_rates(metrics['classification_rate_source'], + metrics['classification_rate_target'], + metrics['classification_rate_domain'], + 1 - np.sum(metrics['classification_source_misclassified']) / dm_s.number_samples(), + 1 - np.sum(metrics['classification_target_misclassified']) / dm_t.number_samples(), + 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()), + class_number=dm_s.number_classes(), + annotation=False) + __plot_node_evolution(metrics['node_evolution_discriminator'], + metrics['node_evolution_domain_classifier'], + metrics['node_evolution_feature_extraction'], + annotation=False) + __plot_losses(metrics['classification_source_loss'], + metrics['classification_target_loss'], + metrics['reconstruction_source_loss'], + metrics['reconstruction_target_loss'], + metrics['domain_regression_loss'], + annotation=False) + __plot_discriminative_network_significance(nn.BIAS, nn.VAR, annotation=False) + __plot_domain_classifier_network_significance(da.BIAS, da.VAR, annotation=False) + __plot_feature_extractor_network_significance(dae.BIAS, dae.VAR, annotation=False) + + return result + + +def generate_csv_from_dataset(dataset_name: str, + n_concept_drift: int = 1, + is_source: bool = True, + is_one_hot_encoding: bool = True, + label_starts_at: int = 0): + import csv, os + from tqdm import tqdm + filename = 'source.csv' if is_source else 'target.csv' + + dm, _ = __load_source_target(source=dataset_name, + target='', + n_source_concept_drift=n_concept_drift) + + try: + os.remove(filename) + except: + pass + f = open(filename, 'x') + f.close() + + print('Exporting dataset "%s" as file "%s"' % (dataset_name, filename)) + with open(filename, 'w', newline='') as csv_file: + writer = csv.writer(csv_file, delimiter=',') + pbar = tqdm(total=dm.number_samples()) + for i in range(dm.number_samples()): + x, y = dm.get_x_y(i) + temp_y = np.zeros(dm.number_classes() + label_starts_at) + temp_y[y.argmax() + label_starts_at] = 1 + y = temp_y + if not is_one_hot_encoding: + y = np.asarray([y.argmax()]) + + writer.writerow(np.concatenate((x, y)).tolist()) + pbar.update(1) + pbar.close() + print('Done!') + + +def pre_download_benchmarks(): + def print_info(dm): + print('Number of samples: %d' % dm.number_samples()) + print('Number of features: %d' % dm.number_features()) + print('Number of classes: %d' % dm.number_classes()) + return DataManipulator() + + dm = DataManipulator() + dm.load_mnist() + dm = print_info(dm) + dm.load_usps() + dm = print_info(dm) + dm.load_cifar10() + dm = print_info(dm) + dm.load_stl10() + dm = print_info(dm) + # dm.load_news_popularity_obama_all() + # dm = print_info(dm) + # dm.load_news_popularity_economy_all() + # dm = print_info(dm) + # dm.load_news_popularity_microsoft_all() + # dm = print_info(dm) + # dm.load_news_popularity_palestine_all() + # dm = print_info(dm) + # dm.load_amazon_review_fashion() + # dm = print_info(dm) + dm.load_amazon_review_all_beauty() + dm = print_info(dm) + # dm.load_amazon_review_appliances() + # dm = print_info(dm) + # dm.load_amazon_review_arts_crafts_sewing() + # dm = print_info(dm) + # dm.load_amazon_review_automotive() + # dm = print_info(dm) + # dm.load_amazon_review_cds_vinyl() + # dm = print_info(dm) + # dm.load_amazon_review_cellphones_accessories() + # dm = print_info(dm) + # dm.load_amazon_review_clothing_shoes_jewelry() + # dm = print_info(dm) + # dm.load_amazon_review_digital_music() + # dm = print_info(dm) + # dm.load_amazon_review_electronics() + # dm = print_info(dm) + # dm.load_amazon_review_gift_card() + # dm = print_info(dm) + # dm.load_amazon_review_grocery_gourmet_food() + # dm = print_info(dm) + # dm.load_amazon_review_home_kitchen() + # dm = print_info(dm) + dm.load_amazon_review_industrial_scientific() + dm = print_info(dm) + # dm.load_amazon_review_kindle_store() + # dm = print_info(dm) + dm.load_amazon_review_luxury_beauty() + dm = print_info(dm) + dm.load_amazon_review_magazine_subscription() + dm = print_info(dm) + # dm.load_amazon_review_movies_tv() + # dm = print_info(dm) + # dm.load_amazon_review_musical_instruments() + # dm = print_info(dm) + # dm.load_amazon_review_office_products() + # dm = print_info(dm) + # dm.load_amazon_review_patio_lawn_garden() + # dm = print_info(dm) + # dm.load_amazon_review_pet_supplies() + # dm = print_info(dm) + # dm.load_amazon_review_prime_pantry() + # dm = print_info(dm) + # dm.load_amazon_review_software() + # dm = print_info(dm) + # dm.load_amazon_review_sports_outdoors() + # dm = print_info(dm) + # dm.load_amazon_review_tools_home_improvements() + # dm = print_info(dm) + # dm.load_amazon_review_toys_games() + # dm = print_info(dm) + # dm.load_amazon_review_video_games() + # dm = print_info(dm) + dm.load_amazon_review_books() + print_info(dm) + + +print('ACDC: Autonomous Cross Domain Conversion') +print('') +print('Available methods:') +print('************************************************************') +print('def acdc(%s,%s,%s,%s,%s,%s\n\t)' % ( + '\n\tsource: str', + '\n\ttarget: str', + '\n\tn_source_concept_drift: int = 5', + '\n\tn_target_concept_drift: int = 7', + '\n\tinternal_epochs: int = 1', + '\n\tis_gpu: bool = False')) +print(' ') +print('source: String representing the source benchmark') +print('target: String representing the target benchmark') +print('n_source_concept_drift: Number of concept drifts at the source stream') +print('n_target_concept_drift: Number of concept drifts at the target stream') +print('internal_epochs: Number of internal epochs per minibatch') +print('is_gpu: False to run on CPU. True to run on GPU. The paper were generated on CPU. The code is not optimized for GPU. Only runs if you have a huge ammount of GRAM. Also, the adaptation procedure is slower on GPU.') +print(' ') +print('Returns a dictionary with all results for the run') +print('************************************************************') +print(' ') +print('************************************************************') +print('pre_download_benchmarks()') +print('************************************************************') +print(' ') +print('************************************************************') +print('generate_csv_from_dataset(%s,%s,%s,%s,%s\n\t)' % ( + '\n\tdataset_name: str', + '\n\tn_concept_drift: int = 1', + '\n\tis_source: bool = True', + '\n\tis_one_hot_enconding: bool = True', + '\n\tlabel_starts_at: int = 0')) +print(' ') +print('dataset_name: String representing which benchmark should be converted to CSV') +print('n_concept_drift: Number of concept drifts applied into the CSV dataset') +print('is_source: True to generate a file "source.csv", False to generate a file "target.csv"') +print('is_one_hot_enconding: If True, label will be the n last columns in an one-hot-encoding format, if False, label will be the last column as a number') +print('label_starts_at: The smallest label. Usually it is 0, but some source_code, specially made in Matlab, can start from 1') +print('************************************************************') +print(' ') +print('List of possible strings for datasets:') +print(' ') +print('mnist-28: MNIST resized to 28x28, which is original size ~ 784 features') +print('mnist-16: MNIST resized to 16x16 ~ 256 features') +print('usps-28: USPS resized to 28x28 ~ 784 features') +print('usps-16: USPS resized to 16x16, which is original size ~ 256 features') +print('cifar10: CIFAR10 extracted from Resnet ~ 512 features') +print('stl10: STL10 extracted from Resnet ~512 features') +print('amazon-review-all-beauty: Amazon Review | All Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-books: Amazon Review | Books | Word2Vec applied ~ 300 features') +print('amazon-review-industrial-scientific: Amazon Review | Industrial and Scientific | Word2Vec applied ~ 300 features') +print('amazon-review-luxury-beauty: Amazon Review | Luxury Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-magazine-subscription: Amazon Review | Magazine Subscription | Word2Vec applied ~ 300 features') \ No newline at end of file diff --git a/ACDC_Ablation_B.py b/ACDC_Ablation_B.py new file mode 100644 index 0000000..a28a722 --- /dev/null +++ b/ACDC_Ablation_B.py @@ -0,0 +1,1323 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +from ACDCDataManipulator import DataManipulator +from NeuralNetwork import NeuralNetwork +from AutoEncoder import DenoisingAutoEncoder +from MySingletons import MyDevice +from colorama import Fore, Back, Style +from itertools import cycle +import numpy as np +import matplotlib.pylab as plt +import math +import torch +import time + + +def __copy_weights(source: NeuralNetwork, targets: list, layer_numbers=None, copy_moment: bool = True): + if layer_numbers is None: + layer_numbers = [1] + if type(targets) is not list: + targets = [targets] + for layer_number in layer_numbers: + layer_number -= 1 + for target in targets: + if layer_number >= source.number_hidden_layers: + target.output_weight = source.output_weight.detach() + target.output_bias = source.output_bias.detach() + if copy_moment: + target.output_momentum = source.output_momentum.detach() + target.output_bias_momentum = source.output_bias_momentum.detach() + else: + target.weight[layer_number] = source.weight[layer_number].detach() + target.bias[layer_number] = source.bias[layer_number].detach() + if copy_moment: + target.momentum[layer_number] = source.momentum[layer_number].detach() + target.bias_momentum[layer_number] = source.bias_momentum[layer_number].detach() + + +def __grow_nodes(*networks): + origin = networks[0] + if origin.growable[origin.number_hidden_layers]: + nodes = 1 + for i in range(nodes): + for network in networks: + network.grow_node(origin.number_hidden_layers) + return True + else: + return False + + +def __prune_nodes(*networks): + origin = networks[0] + if origin.prunable[origin.number_hidden_layers][0] >= 0: + nodes_to_prune = origin.prunable[origin.number_hidden_layers].tolist() + for network in networks: + for node_to_prune in nodes_to_prune[::-1]: + network.prune_node(origin.number_hidden_layers, node_to_prune) + return True + return False + + +def __width_evolution(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None): + if y is None: + y = x + + network.feedforward(x, y) + network.width_adaptation_stepwise(y) + + +def __discriminative(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, is_neg_grad: bool = False): + y = x.detach() if y is None else y + network.train(x=x, y=y, is_neg_grad=is_neg_grad) + + +def __generative(network: DenoisingAutoEncoder, x: torch.tensor, y: torch.tensor = None, + is_tied_weight=True, noise_ratio=0.1, glw_epochs: int = 1): + y = x.detach() if y is None else y + network.greedy_layer_wise_pretrain(x=x, number_epochs=glw_epochs, noise_ratio=noise_ratio) + network.train(x=x, y=y, noise_ratio=noise_ratio, is_tied_weight=is_tied_weight) + + +def __test(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, + is_source: bool = False, is_discriminative: bool = False, metrics=None): + with torch.no_grad(): + y = x.detach() if y is None else y + network.test(x=x, y=y) + + if is_source: + if is_discriminative: + metrics['classification_rate_source'].append(network.classification_rate) + metrics['classification_source_loss'].append(float(network.loss_value)) + metrics['classification_source_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_source_loss'].append(float(network.loss_value)) + else: + if is_discriminative: + metrics['classification_rate_target'].append(network.classification_rate) + metrics['classification_target_loss'].append(float(network.loss_value)) + metrics['classification_target_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_target_loss'].append(float(network.loss_value)) + + +def __force_same_size(a_tensor, b_tensor, shuffle=True, strategy='max'): + common = np.min([a_tensor.shape[0], b_tensor.shape[0]]) + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + if strategy == 'max': + if math.ceil(a_tensor.shape[0] / common) <= math.ceil(b_tensor.shape[0] / common): + b_tensor = torch.stack(list(target for target, source + in zip(b_tensor[torch.randperm(b_tensor.shape[0])], + cycle(a_tensor[torch.randperm(a_tensor.shape[0])])))) + a_tensor = torch.stack(list(source for target, source + in zip(b_tensor[torch.randperm(b_tensor.shape[0])], + cycle(a_tensor[torch.randperm(a_tensor.shape[0])])))) + else: + b_tensor = torch.stack(list(target for target, source + in zip(cycle(b_tensor[torch.randperm(b_tensor.shape[0])]), + a_tensor[torch.randperm(a_tensor.shape[0])]))) + a_tensor = torch.stack(list(source for target, source + in zip(cycle(b_tensor[torch.randperm(b_tensor.shape[0])]), + a_tensor[torch.randperm(a_tensor.shape[0])]))) + + elif strategy == 'min': + a_tensor = a_tensor[:common] + b_tensor = b_tensor[:common] + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + return a_tensor, b_tensor + + +def __print_annotation(lst): + def custom_range(xx): + step = int(len(xx) * 0.25) - 1 + return range(0, len(xx), 1 if step == 0 else step) + + for idx in custom_range(lst): + pos = lst[idx] if isinstance(lst[idx], (int, float, np.int32)) else lst[idx][0] + plt.annotate(format(pos, '.2f'), (idx, pos)) + pos = lst[-1] if isinstance(lst[-1], (int, float, np.int32)) else lst[-1][0] + plt.annotate(format(pos, '.2f'), (len(lst), pos)) + + +def __plot_time(train_time: np.ndarray, + test_time: np.ndarray, + annotation=True): + plt.title('Processing time') + plt.ylabel('Seconds') + plt.xlabel('Minibatches') + + plt.plot(train_time, linewidth=1, + label=('Train time: %f (Mean) %f (Accumulated)' % + (np.nanmean(train_time), np.sum(train_time)))) + plt.plot(test_time, linewidth=1, + label=('Test time: %f (Mean) %f (Accumulated)' % + (np.nanmean(test_time), np.sum(test_time)))) + plt.legend() + + if annotation: + __print_annotation(train_time) + __print_annotation(test_time) + + plt.tight_layout() + plt.show() + + +def __plot_node_evolution(nodes_discriminator: np.ndarray, + nodes_domain_classifier: np.ndarray, + nodes_feature_extraction: np.ndarray, + annotation=True): + plt.title('Node evolution') + plt.ylabel('Nodes') + plt.xlabel('Minibatches') + + plt.plot(nodes_discriminator, linewidth=1, + label=('Discriminator HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_discriminator), nodes_discriminator[-1]))) + plt.plot(nodes_domain_classifier, linewidth=1, + label=('Domain Classifier HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_domain_classifier), nodes_domain_classifier[-1]))) + plt.plot(nodes_feature_extraction, linewidth=1, + label=('Feature Extraction HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_feature_extraction), nodes_feature_extraction[-1]))) + plt.legend() + + if annotation: + __print_annotation(nodes_discriminator) + __print_annotation(nodes_domain_classifier) + __print_annotation(nodes_feature_extraction) + + plt.tight_layout() + plt.show() + + +def __plot_losses(classification_source_loss: np.ndarray, + classification_target_loss: np.ndarray, + reconstruction_source_loss: np.ndarray, + reconstruction_target_loss: np.ndarray, + domain_classifier_loss: np.ndarray, + annotation=True): + plt.title('Losses evolution') + plt.ylabel('Loss value') + plt.xlabel('Minibatches') + + plt.plot(classification_source_loss, linewidth=1, + label=('Classification Source Loss mean: %f' % + (np.nanmean(classification_source_loss)))) + plt.plot(classification_target_loss, linewidth=1, + label=('Classification Target Loss mean: %f' % + (np.nanmean(classification_target_loss)))) + plt.plot(reconstruction_source_loss, linewidth=1, + label=('Reconstruction Source Loss mean: %f' % + (np.nanmean(reconstruction_source_loss)))) + plt.plot(reconstruction_target_loss, linewidth=1, + label=('Reconstruction Target Loss mean: %f' % + (np.nanmean(reconstruction_target_loss)))) + plt.plot(domain_classifier_loss, linewidth=1, + label=('Domain Classifier Loss mean: %f' % + (np.nanmean(domain_classifier_loss)))) + plt.legend() + + if annotation: + __print_annotation(classification_source_loss) + __print_annotation(classification_target_loss) + __print_annotation(reconstruction_source_loss) + __print_annotation(reconstruction_target_loss) + __print_annotation(domain_classifier_loss) + + plt.tight_layout() + plt.show() + + +def __plot_classification_rates(source_rate: np.ndarray, + target_rate: np.ndarray, + domain_rate: np.ndarray, + total_source_rate: float, + total_target_rate: float, + total_domain_classification_rate: float, + annotation=True, + class_number=None): + plt.title('Source and Target Classification Rates') + plt.ylabel('Classification Rate') + plt.xlabel('Minibatches') + + plt.plot(source_rate, linewidth=1, label=('Source CR: %f (batch) | %f (dataset)' % + (np.nanmean(source_rate), total_source_rate))) + plt.plot(target_rate, linewidth=1, label=('Target CR: %f (batch) | %f (dataset)' % + (np.nanmean(target_rate), total_target_rate))) + plt.plot(domain_rate, linewidth=1, label=('Domain CR: %f (batch) | %f (dataset)' % + (np.nanmean(domain_rate), total_domain_classification_rate))) + + if annotation: + __print_annotation(source_rate) + __print_annotation(target_rate) + __print_annotation(domain_rate) + + if class_number is not None: + plt.plot(np.ones(len(source_rate)) * 1 / class_number, + linewidth=1, label='Random Classification Threshold: %f' % (1 / class_number)) + + plt.plot(np.ones(len(source_rate)) * 1 / 2, + linewidth=1, label='Random Domain Classification Threshold: %f' % (1 / 2)) + + plt.legend() + + plt.tight_layout() + plt.show() + + +def __plot_ns(bias, var, ns, annotation=True): + plt.plot(bias, linewidth=1, label=('Bias mean: %f' % (np.nanmean(bias)))) + plt.plot(var, linewidth=1, label=('Variance mean: %f' % (np.nanmean(var)))) + plt.plot(ns, linewidth=1, label=('NS (Bias + Variance) mean: %f' % (np.nanmean(ns)))) + plt.legend() + + if annotation: + __print_annotation(bias) + __print_annotation(var) + __print_annotation(ns) + + plt.tight_layout() + plt.show() + + +def __plot_discriminative_network_significance(bias, var, annotation=True): + plt.title('Discriminative Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_domain_classifier_network_significance(bias, var, annotation=True): + plt.title('Domain Classifier Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_feature_extractor_network_significance(bias, var, annotation=True): + plt.title('Feature Extractor Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __load_source_target(source: str, target: str, n_source_concept_drift: int = 1, n_target_concept_drift: int = 1): + dm_s = DataManipulator() + dm_t = DataManipulator() + + source = source.replace('_', '-').replace(' ', '-').lower() + target = target.replace('_', '-').replace(' ', '-').lower() + + if source == 'mnist-28': + dm_s.load_mnist(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-26': + dm_s.load_mnist(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-24': + dm_s.load_mnist(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-22': + dm_s.load_mnist(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-20': + dm_s.load_mnist(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-18': + dm_s.load_mnist(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-16': + dm_s.load_mnist(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-28': + dm_s.load_usps(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-26': + dm_s.load_usps(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-24': + dm_s.load_usps(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-22': + dm_s.load_usps(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-20': + dm_s.load_usps(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-18': + dm_s.load_usps(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-16': + dm_s.load_usps(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'cifar10': + dm_s.load_cifar10(n_concept_drifts=n_source_concept_drift) + elif source == 'stl10': + dm_s.load_stl10(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-fashion': + dm_s.load_amazon_review_fashion(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-all-beauty': + dm_s.load_amazon_review_all_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-appliances': + dm_s.load_amazon_review_appliances(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-arts-crafts-sewing': + dm_s.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-automotive': + dm_s.load_amazon_review_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-books': + dm_s.load_amazon_review_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cds-vinyl': + dm_s.load_amazon_review_cds_vinyl(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cellphones_accessories': + dm_s.load_amazon_review_cellphones_accessories(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-clothing-shoes-jewelry': + dm_s.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-digital-music': + dm_s.load_amazon_review_digital_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-electronics': + dm_s.load_amazon_review_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-gift-card': + dm_s.load_amazon_review_gift_card(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-grocery-gourmet-food': + dm_s.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-home-kitchen': + dm_s.load_amazon_review_home_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-industrial-scientific': + dm_s.load_amazon_review_industrial_scientific(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-kindle-store': + dm_s.load_amazon_review_kindle_store(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-luxury-beauty': + dm_s.load_amazon_review_luxury_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-magazine-subscription': + dm_s.load_amazon_review_magazine_subscription(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-movies-tv': + dm_s.load_amazon_review_movies_tv(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-musical-instruments': + dm_s.load_amazon_review_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-office-products': + dm_s.load_amazon_review_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-patio-lawn-garden': + dm_s.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-pet-supplies': + dm_s.load_amazon_review_pet_supplies(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-prime-pantry': + dm_s.load_amazon_review_prime_pantry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-software': + dm_s.load_amazon_review_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-sports-outdoors': + dm_s.load_amazon_review_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-tools-home-improvements': + dm_s.load_amazon_review_tools_home_improvements(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-toys-games': + dm_s.load_amazon_review_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-video-games': + dm_s.load_amazon_review_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-books': + dm_s.load_amazon_review_nips_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-dvd': + dm_s.load_amazon_review_nips_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-electronics': + dm_s.load_amazon_review_nips_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-kitchen': + dm_s.load_amazon_review_nips_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-apparel': + dm_s.load_amazon_review_acl_apparel(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-automotive': + dm_s.load_amazon_review_acl_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-baby': + dm_s.load_amazon_review_acl_baby(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-beauty': + dm_s.load_amazon_review_acl_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-books': + dm_s.load_amazon_review_acl_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-camera_photo': + dm_s.load_amazon_review_acl_camera_photo(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-cell_phones_service': + dm_s.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-computer_video_games': + dm_s.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-dvd': + dm_s.load_amazon_review_acl_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-electronics': + dm_s.load_amazon_review_acl_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-gourmet_food': + dm_s.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-grocery': + dm_s.load_amazon_review_acl_grocery(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-health_personal_care': + dm_s.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-jewelry_watches': + dm_s.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-kitchen_housewares': + dm_s.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-magazines': + dm_s.load_amazon_review_acl_magazines(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-music': + dm_s.load_amazon_review_acl_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-musical_instruments': + dm_s.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-office_products': + dm_s.load_amazon_review_acl_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-outdoor_living': + dm_s.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-software': + dm_s.load_amazon_review_acl_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-sports_outdoors': + dm_s.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-tools_hardware': + dm_s.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-toys_games': + dm_s.load_amazon_review_acl_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-video': + dm_s.load_amazon_review_acl_video(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-all': + dm_s.load_news_popularity_obama_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-all': + dm_s.load_news_popularity_economy_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-all': + dm_s.load_news_popularity_microsoft_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-all': + dm_s.load_news_popularity_palestine_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-facebook': + dm_s.load_news_popularity_obama_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-facebook': + dm_s.load_news_popularity_economy_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-facebook': + dm_s.load_news_popularity_microsoft_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-facebook': + dm_s.load_news_popularity_palestine_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-googleplus': + dm_s.load_news_popularity_obama_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-googleplus': + dm_s.load_news_popularity_economy_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-googleplus': + dm_s.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-googleplus': + dm_s.load_news_popularity_palestine_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-linkedin': + dm_s.load_news_popularity_obama_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-linkedin': + dm_s.load_news_popularity_economy_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-linkedin': + dm_s.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-linkedin': + dm_s.load_news_popularity_palestine_linkedin(n_concept_drifts=n_source_concept_drift) + + if target == 'mnist-28': + dm_t.load_mnist(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-26': + dm_t.load_mnist(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-24': + dm_t.load_mnist(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-22': + dm_t.load_mnist(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-20': + dm_t.load_mnist(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-18': + dm_t.load_mnist(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-16': + dm_t.load_mnist(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-28': + dm_t.load_usps(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-26': + dm_t.load_usps(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-24': + dm_t.load_usps(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-22': + dm_t.load_usps(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-20': + dm_t.load_usps(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-18': + dm_t.load_usps(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-16': + dm_t.load_usps(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'cifar10': + dm_t.load_cifar10(n_concept_drifts=n_target_concept_drift) + elif target == 'stl10': + dm_t.load_stl10(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-fashion': + dm_t.load_amazon_review_fashion(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-all-beauty': + dm_t.load_amazon_review_all_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-appliances': + dm_t.load_amazon_review_appliances(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-arts-crafts-sewing': + dm_t.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-automotive': + dm_t.load_amazon_review_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-books': + dm_t.load_amazon_review_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cds-vinyl': + dm_t.load_amazon_review_cds_vinyl(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cellphones_accessories': + dm_t.load_amazon_review_cellphones_accessories(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-clothing-shoes-jewelry': + dm_t.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-digital-music': + dm_t.load_amazon_review_digital_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-electronics': + dm_t.load_amazon_review_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-gift-card': + dm_t.load_amazon_review_gift_card(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-grocery-gourmet-food': + dm_t.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-home-kitchen': + dm_t.load_amazon_review_home_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-industrial-scientific': + dm_t.load_amazon_review_industrial_scientific(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-kindle-store': + dm_t.load_amazon_review_kindle_store(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-luxury-beauty': + dm_t.load_amazon_review_luxury_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-magazine-subscription': + dm_t.load_amazon_review_magazine_subscription(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-movies-tv': + dm_t.load_amazon_review_movies_tv(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-musical-instruments': + dm_t.load_amazon_review_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-office-products': + dm_t.load_amazon_review_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-patio-lawn-garden': + dm_t.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-pet-supplies': + dm_t.load_amazon_review_pet_supplies(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-prime-pantry': + dm_t.load_amazon_review_prime_pantry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-software': + dm_t.load_amazon_review_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-sports-outdoors': + dm_t.load_amazon_review_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-tools-home-improvements': + dm_t.load_amazon_review_tools_home_improvements(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-toys-games': + dm_t.load_amazon_review_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-video-games': + dm_t.load_amazon_review_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-books': + dm_t.load_amazon_review_nips_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-dvd': + dm_t.load_amazon_review_nips_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-electronics': + dm_t.load_amazon_review_nips_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-kitchen': + dm_t.load_amazon_review_nips_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-apparel': + dm_t.load_amazon_review_acl_apparel(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-automotive': + dm_t.load_amazon_review_acl_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-baby': + dm_t.load_amazon_review_acl_baby(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-beauty': + dm_t.load_amazon_review_acl_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-books': + dm_t.load_amazon_review_acl_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-camera_photo': + dm_t.load_amazon_review_acl_camera_photo(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-cell_phones_service': + dm_t.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-computer_video_games': + dm_t.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-dvd': + dm_t.load_amazon_review_acl_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-electronics': + dm_t.load_amazon_review_acl_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-gourmet_food': + dm_t.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-grocery': + dm_t.load_amazon_review_acl_grocery(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-health_personal_care': + dm_t.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-jewelry_watches': + dm_t.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-kitchen_housewares': + dm_t.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-magazines': + dm_t.load_amazon_review_acl_magazines(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-music': + dm_t.load_amazon_review_acl_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-musical_instruments': + dm_t.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-office_products': + dm_t.load_amazon_review_acl_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-outdoor_living': + dm_t.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-software': + dm_t.load_amazon_review_acl_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-sports_outdoors': + dm_t.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-tools_hardware': + dm_t.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-toys_games': + dm_t.load_amazon_review_acl_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-video': + dm_t.load_amazon_review_acl_video(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-all': + dm_t.load_news_popularity_obama_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-all': + dm_t.load_news_popularity_economy_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-all': + dm_t.load_news_popularity_microsoft_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-all': + dm_t.load_news_popularity_palestine_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-facebook': + dm_t.load_news_popularity_obama_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-facebook': + dm_t.load_news_popularity_economy_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-facebook': + dm_t.load_news_popularity_microsoft_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-facebook': + dm_t.load_news_popularity_palestine_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-googleplus': + dm_t.load_news_popularity_obama_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-googleplus': + dm_t.load_news_popularity_economy_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-googleplus': + dm_t.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-googleplus': + dm_t.load_news_popularity_palestine_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-linkedin': + dm_t.load_news_popularity_obama_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-linkedin': + dm_t.load_news_popularity_economy_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-linkedin': + dm_t.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-linkedin': + dm_t.load_news_popularity_palestine_linkedin(n_concept_drifts=n_target_concept_drift) + + return dm_s, dm_t + + +def acdc(source, target, + n_source_concept_drift: int = 5, + n_target_concept_drift: int = 7, + internal_epochs: int = 1, is_gpu=False): + def print_metrics(minibatch, metrics, DMs, DMt, NN, DAEt, DA): + print('Minibatch: %d | Execution time (dataset load/pre-processing + model run): %f' % ( + minibatch, time.time() - metrics['start_execution_time'])) + if minibatch > 1: + print(( + 'Total of samples:' + Fore.BLUE + ' %d + %d = %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d + %d = %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1] - metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1] - metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + else: + print(( + 'Total of samples:' + Fore.BLUE + ' %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + + if minibatch > 1: + string_max = '' + Fore.GREEN + 'Max' + Style.RESET_ALL + string_mean = '' + Fore.YELLOW + 'Mean' + Style.RESET_ALL + string_min = '' + Fore.RED + 'Min' + Style.RESET_ALL + string_now = '' + Fore.BLUE + 'Now' + Style.RESET_ALL + string_accu = '' + Fore.MAGENTA + 'Accu' + Style.RESET_ALL + + print(( + '%s %s %s %s %s Training time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['train_time']), + np.nanmean(metrics['train_time']), + np.min(metrics['train_time']), + metrics['train_time'][-1], + np.sum(metrics['train_time']))) + print(( + '%s %s %s %s %s Testing time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['test_time']), + np.nanmean(metrics['test_time']), + np.min(metrics['test_time']), + metrics['test_time'][-1], + np.sum(metrics['test_time']))) + print(( + '%s %s %s %s CR Source:' + Fore.GREEN + ' %f%% ' + Back.BLUE + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_source']) * 100, + np.nanmean(metrics['classification_rate_source']) * 100, + np.min(metrics['classification_rate_source']) * 100, + metrics['classification_rate_source'][-1] * 100)) + print(( + '%s %s %s %s CR Target:' + Fore.GREEN + ' %f%% ' + Back.RED + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_target']) * 100, + np.nanmean(metrics['classification_rate_target']) * 100, + np.min(metrics['classification_rate_target']) * 100, + metrics['classification_rate_target'][-1] * 100)) + print(( + '%s %s %s %s CR Domain Discriminator:' + Fore.GREEN + ' %f%% ' + Fore.YELLOW + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_domain']) * 100, + np.nanmean(metrics['classification_rate_domain']) * 100, + np.min(metrics['classification_rate_domain']) * 100, + metrics['classification_rate_domain'][-1] * 100)) + print(( + '%s %s %s %s Classification Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_source_loss']), + np.nanmean(metrics['classification_source_loss']), + np.min(metrics['classification_source_loss']), + metrics['classification_source_loss'][-1])) + print(( + '%s %s %s %s Classification Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_target_loss']), + np.nanmean(metrics['classification_target_loss']), + np.min(metrics['classification_target_loss']), + metrics['classification_target_loss'][-1])) + print(( + '%s %s %s %s Domain Discriminator Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['domain_regression_loss']), + np.nanmean(metrics['domain_regression_loss']), + np.min(metrics['domain_regression_loss']), + metrics['domain_regression_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_source_loss']), + np.nanmean(metrics['reconstruction_source_loss']), + np.min(metrics['reconstruction_source_loss']), + metrics['reconstruction_source_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_target_loss']), + np.nanmean(metrics['reconstruction_target_loss']), + np.min(metrics['reconstruction_target_loss']), + metrics['reconstruction_target_loss'][-1])) + print(( + '%s %s %s %s Discriminator Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_discriminator']), + np.nanmean(metrics['node_evolution_discriminator']), + np.min(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1])) + print(( + '%s %s %s %s Denoising Autoencoder Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_feature_extraction']), + np.nanmean(metrics['node_evolution_feature_extraction']), + np.min(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1])) + print(( + '%s %s %s %s Domain Classifier Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_domain_classifier']), + np.nanmean(metrics['node_evolution_domain_classifier']), + np.min(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1])) + print(('Network structure:' + Fore.BLUE + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, NN.layers)))) + print(('Domain Discriminator structure:' + Fore.GREEN + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DA.layers)))) + print(('Denoising Auto Encoder:' + Fore.RED + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DAEt.layers)))) + print(Style.RESET_ALL) + + metrics = {'classification_rate_source': [], + 'classification_rate_target': [], + 'classification_rate_domain': [], + 'number_evaluated_samples_source': [], + 'number_evaluated_samples_target': [], + 'train_time': [], + 'test_time': [], + 'node_evolution_discriminator': [], + 'node_evolution_domain_classifier': [], + 'node_evolution_feature_extraction': [], + 'classification_target_loss': [], + 'classification_source_loss': [], + 'reconstruction_source_loss': [], + 'reconstruction_target_loss': [], + 'domain_regression_loss': [], + 'classification_source_misclassified': [], + 'classification_target_misclassified': [], + 'domain_classification_misclassified': [], + 'start_execution_time': time.time()} + MyDevice().set(is_gpu=is_gpu) + internal_epochs = internal_epochs if internal_epochs >= 1 else 1 + + SOURCE_DOMAIN_LABEL = torch.tensor([[1, 0]], dtype=torch.float, device=MyDevice().get()) + TARGET_DOMAIN_LABEL = torch.tensor([[0, 1]], dtype=torch.float, device=MyDevice().get()) + + dm_s, dm_t = __load_source_target(source, target, n_source_concept_drift, n_target_concept_drift) + + dae = DenoisingAutoEncoder([dm_s.number_features(), + 100, + dm_s.number_features()]) + nn = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 100, + dm_s.number_classes()]) + da = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 100, + 2]) + + count_source = 0 + count_target = 0 + count_window = 0 + window_size = 1000 + batch_counter = 0 + + x_source = [] + y_source = [] + x_target = [] + y_target = [] + + while count_source < dm_s.number_samples() \ + or count_target < dm_t.number_samples(): + if count_window < window_size \ + and (count_source < dm_s.number_samples() + or count_target < dm_t.number_samples()): + + source_prob = (dm_s.number_samples() - count_source) / ( + dm_s.number_samples() - count_source + dm_t.number_samples() - count_target + 0.) + + if (np.random.rand() <= source_prob and count_source < dm_s.number_samples()) or ( + count_target >= dm_t.number_samples() and count_source < dm_s.number_samples()): + x, y = dm_s.get_x_y(count_source) + x_source.append(x) + y_source.append(y) + count_source += 1 + count_window += 1 + elif count_target < dm_t.number_samples(): + x, y = dm_t.get_x_y(count_target) + x_target.append(x) + y_target.append(y) + count_target += 1 + count_window += 1 + else: + batch_counter += 1 + metrics['number_evaluated_samples_source'].append(count_source) + metrics['number_evaluated_samples_target'].append(count_target) + + # Workaround to avoid empty stream + if batch_counter > 1: + if (count_source - metrics['number_evaluated_samples_source'][-2] == 0): + x, y = dm_s.get_x_y(np.random.randint(0, count_source)) + x_source.append(x) + y_source.append(y) + if (count_target - metrics['number_evaluated_samples_target'][-2] == 0): + x, y = dm_t.get_x_y(np.random.randint(0, count_target)) + x_target.append(x) + y_target.append(y) + # Workaround to avoid empty stream + + x_source = torch.tensor(x_source, dtype=torch.float, device=MyDevice().get()) + y_source = torch.tensor(y_source, dtype=torch.float, device=MyDevice().get()) + x_target = torch.tensor(x_target, dtype=torch.float, device=MyDevice().get()) + y_target = torch.tensor(y_target, dtype=torch.float, device=MyDevice().get()) + + # TEST + if batch_counter > 1: + metrics['test_time'].append(time.time()) + __test(network=nn, x=x_source, y=y_source, + is_source=True, is_discriminative=True, metrics=metrics) + __test(network=nn, x=x_target, y=y_target, + is_source=False, is_discriminative=True, metrics=metrics) + __test(network=dae, x=x_source, + is_source=True, is_discriminative=False, metrics=metrics) + __test(network=dae, x=x_target, + is_source=False, is_discriminative=False, metrics=metrics) + + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + metrics['domain_regression_loss'].append(float(da.loss_value)) + metrics['classification_rate_domain'].append(da.classification_rate) + metrics['domain_classification_misclassified'].append(da.misclassified) + metrics['test_time'][-1] = time.time() - metrics['test_time'][-1] + + # TRAIN + metrics['train_time'].append(time.time()) + + common_source, x_target = __force_same_size(torch.cat((x_source.T, y_source.T)).T, x_target) + x_source, y_source = common_source.T.split(x_source.shape[1]) + x_source, y_source = x_source.T, y_source.T + + epoch = 1 + while epoch <= internal_epochs: + for xs, xt, ys in [(xs.view(1, xs.shape[0]), xt.view(1, xt.shape[0]), ys.view(1, ys.shape[0])) + for xs, xt, ys in zip(x_source, x_target, cycle(y_source))]: + # Evolving + # if epoch == 1: + # # Evolving Feature Extraction + # for j in range(0, 2): + # if j == 0: + # __width_evolution(network=dae, x=xs, y=xt) + # elif i == 1: + # __width_evolution(network=dae, x=xt, y=xs) + # if __grow_nodes(dae, da, nn): + # __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + # elif __prune_nodes(dae, da, nn): + # __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # # Evolving Source + # __width_evolution(network=nn, x=xs, y=ys) + # __width_evolution(network=da, x=xs, y=SOURCE_DOMAIN_LABEL) + # if not __grow_nodes(da, nn): + # if __prune_nodes(da): + # __prune_nodes(nn) + # elif not __grow_nodes(nn): + # __prune_nodes(nn) + + # # Evolving Target + # __width_evolution(network=da, x=xt, y=TARGET_DOMAIN_LABEL) + # if not __grow_nodes(da, nn): + # __prune_nodes(da) + + # Denoising AutoEncoder + __generative(network=dae, x=xs, y=xt) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + __generative(network=dae, x=xt, y=xs) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Domain Discriminator + da.feedforward(x=xs, y=SOURCE_DOMAIN_LABEL, train=True).backpropagate() + dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + for weight_no in range(da.number_hidden_layers, 0, -1): + da.update_weight(weight_no=weight_no) + + da.feedforward(x=xt, y=TARGET_DOMAIN_LABEL, train=True).backpropagate() + dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + for weight_no in range(da.number_hidden_layers, 0, -1): + da.update_weight(weight_no=weight_no) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Discriminator + __discriminative(network=nn, x=xs, y=ys) + __copy_weights(source=nn, targets=[da, dae], layer_numbers=[1], copy_moment=True) + + epoch += 1 + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + + # Metrics + metrics['train_time'][-1] = time.time() - metrics['train_time'][-1] + metrics['node_evolution_discriminator'].append(nn.layers[-2]) + metrics['node_evolution_domain_classifier'].append(da.layers[-2]) + metrics['node_evolution_feature_extraction'].append(dae.layers[-2]) + print_metrics(batch_counter, metrics, dm_s, dm_t, nn, dae, da) + + # Reset variables for the next batch + x_source = [] + y_source = [] + x_target = [] + y_target = [] + count_window = 0 + + result_string = '%f (T) | %f (S) \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %f' % ( + np.mean(metrics['classification_rate_target']), + np.mean(metrics['classification_rate_source']), + + np.mean(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1], + + np.mean(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1], + + np.mean(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1], + + np.mean(metrics['train_time']), + np.sum(metrics['train_time'])) + + print('CR Rate (Target) | CR Rate (Source) | \t ' \ + 'Feature Extractor Node Evolution (mean | final) \t ' \ + 'Discriminator Node Evolution (mean | final) \t ' \ + 'Domain Classifier Node Evolution (mean | final) \t ' \ + 'Train Time (mean | total)') + print(result_string) + + result = {} + result['string'] = result_string + result['classification_rate_source_batch'] = np.nanmean(metrics['classification_rate_source']) + result['classification_rate_target_batch'] = np.nanmean(metrics['classification_rate_target']) + result['classification_rate_domain_batch'] = np.nanmean(metrics['classification_rate_domain']) + result['classification_rate_source_total'] = 1 - np.sum( + metrics['classification_source_misclassified']) / dm_s.number_samples() + result['classification_rate_target_total'] = 1 - np.sum( + metrics['classification_target_misclassified']) / dm_t.number_samples() + result['classification_rate_domain_total'] = 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()) + result['source_node_mean'] = np.nanmean(metrics['node_evolution_discriminator']) + result['target_node_mean'] = np.nanmean(metrics['node_evolution_feature_extraction']) + result['domain_node_mean'] = np.nanmean(metrics['node_evolution_domain_classifier']) + result['source_node_final'] = metrics['node_evolution_discriminator'][-1] + result['target_node_final'] = metrics['node_evolution_feature_extraction'][-1] + result['domain_node_final'] = metrics['node_evolution_domain_classifier'][-1] + result['train_time_mean'] = np.nanmean(metrics['train_time']) + result['train_time_final'] = np.nansum(metrics['train_time']) + result['test_time_mean'] = np.nanmean(metrics['test_time']) + result['test_time_final'] = np.nansum(metrics['test_time']) + result['classification_source_loss_mean'] = np.nanmean(metrics['classification_source_loss']) + result['classification_target_loss_mean'] = np.nanmean(metrics['classification_target_loss']) + result['reconstruction_source_loss_mean'] = np.nanmean(metrics['reconstruction_source_loss']) + result['reconstruction_target_loss_mean'] = np.nanmean(metrics['reconstruction_target_loss']) + result['domain_adaptation_loss_mean'] = np.nanmean(metrics['domain_regression_loss']) + + print() + print(result) + + __plot_time(metrics['train_time'], + metrics['test_time'], + annotation=False) + __plot_classification_rates(metrics['classification_rate_source'], + metrics['classification_rate_target'], + metrics['classification_rate_domain'], + 1 - np.sum(metrics['classification_source_misclassified']) / dm_s.number_samples(), + 1 - np.sum(metrics['classification_target_misclassified']) / dm_t.number_samples(), + 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()), + class_number=dm_s.number_classes(), + annotation=False) + __plot_node_evolution(metrics['node_evolution_discriminator'], + metrics['node_evolution_domain_classifier'], + metrics['node_evolution_feature_extraction'], + annotation=False) + __plot_losses(metrics['classification_source_loss'], + metrics['classification_target_loss'], + metrics['reconstruction_source_loss'], + metrics['reconstruction_target_loss'], + metrics['domain_regression_loss'], + annotation=False) + __plot_discriminative_network_significance(nn.BIAS, nn.VAR, annotation=False) + __plot_domain_classifier_network_significance(da.BIAS, da.VAR, annotation=False) + __plot_feature_extractor_network_significance(dae.BIAS, dae.VAR, annotation=False) + + return result + + +def generate_csv_from_dataset(dataset_name: str, + n_concept_drift: int = 1, + is_source: bool = True, + is_one_hot_encoding: bool = True, + label_starts_at: int = 0): + import csv, os + from tqdm import tqdm + filename = 'source.csv' if is_source else 'target.csv' + + dm, _ = __load_source_target(source=dataset_name, + target='', + n_source_concept_drift=n_concept_drift) + + try: + os.remove(filename) + except: + pass + f = open(filename, 'x') + f.close() + + print('Exporting dataset "%s" as file "%s"' % (dataset_name, filename)) + with open(filename, 'w', newline='') as csv_file: + writer = csv.writer(csv_file, delimiter=',') + pbar = tqdm(total=dm.number_samples()) + for i in range(dm.number_samples()): + x, y = dm.get_x_y(i) + temp_y = np.zeros(dm.number_classes() + label_starts_at) + temp_y[y.argmax() + label_starts_at] = 1 + y = temp_y + if not is_one_hot_encoding: + y = np.asarray([y.argmax()]) + + writer.writerow(np.concatenate((x, y)).tolist()) + pbar.update(1) + pbar.close() + print('Done!') + + +def pre_download_benchmarks(): + def print_info(dm): + print('Number of samples: %d' % dm.number_samples()) + print('Number of features: %d' % dm.number_features()) + print('Number of classes: %d' % dm.number_classes()) + return DataManipulator() + + dm = DataManipulator() + dm.load_mnist() + dm = print_info(dm) + dm.load_usps() + dm = print_info(dm) + dm.load_cifar10() + dm = print_info(dm) + dm.load_stl10() + dm = print_info(dm) + # dm.load_news_popularity_obama_all() + # dm = print_info(dm) + # dm.load_news_popularity_economy_all() + # dm = print_info(dm) + # dm.load_news_popularity_microsoft_all() + # dm = print_info(dm) + # dm.load_news_popularity_palestine_all() + # dm = print_info(dm) + # dm.load_amazon_review_fashion() + # dm = print_info(dm) + dm.load_amazon_review_all_beauty() + dm = print_info(dm) + # dm.load_amazon_review_appliances() + # dm = print_info(dm) + # dm.load_amazon_review_arts_crafts_sewing() + # dm = print_info(dm) + # dm.load_amazon_review_automotive() + # dm = print_info(dm) + # dm.load_amazon_review_cds_vinyl() + # dm = print_info(dm) + # dm.load_amazon_review_cellphones_accessories() + # dm = print_info(dm) + # dm.load_amazon_review_clothing_shoes_jewelry() + # dm = print_info(dm) + # dm.load_amazon_review_digital_music() + # dm = print_info(dm) + # dm.load_amazon_review_electronics() + # dm = print_info(dm) + # dm.load_amazon_review_gift_card() + # dm = print_info(dm) + # dm.load_amazon_review_grocery_gourmet_food() + # dm = print_info(dm) + # dm.load_amazon_review_home_kitchen() + # dm = print_info(dm) + dm.load_amazon_review_industrial_scientific() + dm = print_info(dm) + # dm.load_amazon_review_kindle_store() + # dm = print_info(dm) + dm.load_amazon_review_luxury_beauty() + dm = print_info(dm) + dm.load_amazon_review_magazine_subscription() + dm = print_info(dm) + # dm.load_amazon_review_movies_tv() + # dm = print_info(dm) + # dm.load_amazon_review_musical_instruments() + # dm = print_info(dm) + # dm.load_amazon_review_office_products() + # dm = print_info(dm) + # dm.load_amazon_review_patio_lawn_garden() + # dm = print_info(dm) + # dm.load_amazon_review_pet_supplies() + # dm = print_info(dm) + # dm.load_amazon_review_prime_pantry() + # dm = print_info(dm) + # dm.load_amazon_review_software() + # dm = print_info(dm) + # dm.load_amazon_review_sports_outdoors() + # dm = print_info(dm) + # dm.load_amazon_review_tools_home_improvements() + # dm = print_info(dm) + # dm.load_amazon_review_toys_games() + # dm = print_info(dm) + # dm.load_amazon_review_video_games() + # dm = print_info(dm) + dm.load_amazon_review_books() + print_info(dm) + + +print('ACDC: Autonomous Cross Domain Conversion') +print('') +print('Available methods:') +print('************************************************************') +print('def acdc(%s,%s,%s,%s,%s,%s\n\t)' % ( + '\n\tsource: str', + '\n\ttarget: str', + '\n\tn_source_concept_drift: int = 5', + '\n\tn_target_concept_drift: int = 7', + '\n\tinternal_epochs: int = 1', + '\n\tis_gpu: bool = False')) +print(' ') +print('source: String representing the source benchmark') +print('target: String representing the target benchmark') +print('n_source_concept_drift: Number of concept drifts at the source stream') +print('n_target_concept_drift: Number of concept drifts at the target stream') +print('internal_epochs: Number of internal epochs per minibatch') +print('is_gpu: False to run on CPU. True to run on GPU. The paper were generated on CPU. The code is not optimized for GPU. Only runs if you have a huge ammount of GRAM. Also, the adaptation procedure is slower on GPU.') +print(' ') +print('Returns a dictionary with all results for the run') +print('************************************************************') +print(' ') +print('************************************************************') +print('pre_download_benchmarks()') +print('************************************************************') +print(' ') +print('************************************************************') +print('generate_csv_from_dataset(%s,%s,%s,%s,%s\n\t)' % ( + '\n\tdataset_name: str', + '\n\tn_concept_drift: int = 1', + '\n\tis_source: bool = True', + '\n\tis_one_hot_enconding: bool = True', + '\n\tlabel_starts_at: int = 0')) +print(' ') +print('dataset_name: String representing which benchmark should be converted to CSV') +print('n_concept_drift: Number of concept drifts applied into the CSV dataset') +print('is_source: True to generate a file "source.csv", False to generate a file "target.csv"') +print('is_one_hot_enconding: If True, label will be the n last columns in an one-hot-encoding format, if False, label will be the last column as a number') +print('label_starts_at: The smallest label. Usually it is 0, but some source_code, specially made in Matlab, can start from 1') +print('************************************************************') +print(' ') +print('List of possible strings for datasets:') +print(' ') +print('mnist-28: MNIST resized to 28x28, which is original size ~ 784 features') +print('mnist-16: MNIST resized to 16x16 ~ 256 features') +print('usps-28: USPS resized to 28x28 ~ 784 features') +print('usps-16: USPS resized to 16x16, which is original size ~ 256 features') +print('cifar10: CIFAR10 extracted from Resnet ~ 512 features') +print('stl10: STL10 extracted from Resnet ~512 features') +print('amazon-review-all-beauty: Amazon Review | All Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-books: Amazon Review | Books | Word2Vec applied ~ 300 features') +print('amazon-review-industrial-scientific: Amazon Review | Industrial and Scientific | Word2Vec applied ~ 300 features') +print('amazon-review-luxury-beauty: Amazon Review | Luxury Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-magazine-subscription: Amazon Review | Magazine Subscription | Word2Vec applied ~ 300 features') \ No newline at end of file diff --git a/ACDC_Ablation_C.py b/ACDC_Ablation_C.py new file mode 100644 index 0000000..fd3de90 --- /dev/null +++ b/ACDC_Ablation_C.py @@ -0,0 +1,1323 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +from ACDCDataManipulator import DataManipulator +from NeuralNetwork import NeuralNetwork +from AutoEncoder import DenoisingAutoEncoder +from MySingletons import MyDevice +from colorama import Fore, Back, Style +from itertools import cycle +import numpy as np +import matplotlib.pylab as plt +import math +import torch +import time + + +def __copy_weights(source: NeuralNetwork, targets: list, layer_numbers=None, copy_moment: bool = True): + if layer_numbers is None: + layer_numbers = [1] + if type(targets) is not list: + targets = [targets] + for layer_number in layer_numbers: + layer_number -= 1 + for target in targets: + if layer_number >= source.number_hidden_layers: + target.output_weight = source.output_weight.detach() + target.output_bias = source.output_bias.detach() + if copy_moment: + target.output_momentum = source.output_momentum.detach() + target.output_bias_momentum = source.output_bias_momentum.detach() + else: + target.weight[layer_number] = source.weight[layer_number].detach() + target.bias[layer_number] = source.bias[layer_number].detach() + if copy_moment: + target.momentum[layer_number] = source.momentum[layer_number].detach() + target.bias_momentum[layer_number] = source.bias_momentum[layer_number].detach() + + +def __grow_nodes(*networks): + origin = networks[0] + if origin.growable[origin.number_hidden_layers]: + nodes = 1 + for i in range(nodes): + for network in networks: + network.grow_node(origin.number_hidden_layers) + return True + else: + return False + + +def __prune_nodes(*networks): + origin = networks[0] + if origin.prunable[origin.number_hidden_layers][0] >= 0: + nodes_to_prune = origin.prunable[origin.number_hidden_layers].tolist() + for network in networks: + for node_to_prune in nodes_to_prune[::-1]: + network.prune_node(origin.number_hidden_layers, node_to_prune) + return True + return False + + +def __width_evolution(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None): + if y is None: + y = x + + network.feedforward(x, y) + network.width_adaptation_stepwise(y) + + +def __discriminative(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, is_neg_grad: bool = False): + y = x.detach() if y is None else y + network.train(x=x, y=y, is_neg_grad=is_neg_grad) + + +def __generative(network: DenoisingAutoEncoder, x: torch.tensor, y: torch.tensor = None, + is_tied_weight=True, noise_ratio=0.1, glw_epochs: int = 1): + y = x.detach() if y is None else y + network.greedy_layer_wise_pretrain(x=x, number_epochs=glw_epochs, noise_ratio=noise_ratio) + network.train(x=x, y=y, noise_ratio=noise_ratio, is_tied_weight=is_tied_weight) + + +def __test(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, + is_source: bool = False, is_discriminative: bool = False, metrics=None): + with torch.no_grad(): + y = x.detach() if y is None else y + network.test(x=x, y=y) + + if is_source: + if is_discriminative: + metrics['classification_rate_source'].append(network.classification_rate) + metrics['classification_source_loss'].append(float(network.loss_value)) + metrics['classification_source_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_source_loss'].append(float(network.loss_value)) + else: + if is_discriminative: + metrics['classification_rate_target'].append(network.classification_rate) + metrics['classification_target_loss'].append(float(network.loss_value)) + metrics['classification_target_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_target_loss'].append(float(network.loss_value)) + + +def __force_same_size(a_tensor, b_tensor, shuffle=True, strategy='max'): + common = np.min([a_tensor.shape[0], b_tensor.shape[0]]) + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + if strategy == 'max': + if math.ceil(a_tensor.shape[0] / common) <= math.ceil(b_tensor.shape[0] / common): + b_tensor = torch.stack(list(target for target, source + in zip(b_tensor[torch.randperm(b_tensor.shape[0])], + cycle(a_tensor[torch.randperm(a_tensor.shape[0])])))) + a_tensor = torch.stack(list(source for target, source + in zip(b_tensor[torch.randperm(b_tensor.shape[0])], + cycle(a_tensor[torch.randperm(a_tensor.shape[0])])))) + else: + b_tensor = torch.stack(list(target for target, source + in zip(cycle(b_tensor[torch.randperm(b_tensor.shape[0])]), + a_tensor[torch.randperm(a_tensor.shape[0])]))) + a_tensor = torch.stack(list(source for target, source + in zip(cycle(b_tensor[torch.randperm(b_tensor.shape[0])]), + a_tensor[torch.randperm(a_tensor.shape[0])]))) + + elif strategy == 'min': + a_tensor = a_tensor[:common] + b_tensor = b_tensor[:common] + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + return a_tensor, b_tensor + + +def __print_annotation(lst): + def custom_range(xx): + step = int(len(xx) * 0.25) - 1 + return range(0, len(xx), 1 if step == 0 else step) + + for idx in custom_range(lst): + pos = lst[idx] if isinstance(lst[idx], (int, float, np.int32)) else lst[idx][0] + plt.annotate(format(pos, '.2f'), (idx, pos)) + pos = lst[-1] if isinstance(lst[-1], (int, float, np.int32)) else lst[-1][0] + plt.annotate(format(pos, '.2f'), (len(lst), pos)) + + +def __plot_time(train_time: np.ndarray, + test_time: np.ndarray, + annotation=True): + plt.title('Processing time') + plt.ylabel('Seconds') + plt.xlabel('Minibatches') + + plt.plot(train_time, linewidth=1, + label=('Train time: %f (Mean) %f (Accumulated)' % + (np.nanmean(train_time), np.sum(train_time)))) + plt.plot(test_time, linewidth=1, + label=('Test time: %f (Mean) %f (Accumulated)' % + (np.nanmean(test_time), np.sum(test_time)))) + plt.legend() + + if annotation: + __print_annotation(train_time) + __print_annotation(test_time) + + plt.tight_layout() + plt.show() + + +def __plot_node_evolution(nodes_discriminator: np.ndarray, + nodes_domain_classifier: np.ndarray, + nodes_feature_extraction: np.ndarray, + annotation=True): + plt.title('Node evolution') + plt.ylabel('Nodes') + plt.xlabel('Minibatches') + + plt.plot(nodes_discriminator, linewidth=1, + label=('Discriminator HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_discriminator), nodes_discriminator[-1]))) + plt.plot(nodes_domain_classifier, linewidth=1, + label=('Domain Classifier HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_domain_classifier), nodes_domain_classifier[-1]))) + plt.plot(nodes_feature_extraction, linewidth=1, + label=('Feature Extraction HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_feature_extraction), nodes_feature_extraction[-1]))) + plt.legend() + + if annotation: + __print_annotation(nodes_discriminator) + __print_annotation(nodes_domain_classifier) + __print_annotation(nodes_feature_extraction) + + plt.tight_layout() + plt.show() + + +def __plot_losses(classification_source_loss: np.ndarray, + classification_target_loss: np.ndarray, + reconstruction_source_loss: np.ndarray, + reconstruction_target_loss: np.ndarray, + domain_classifier_loss: np.ndarray, + annotation=True): + plt.title('Losses evolution') + plt.ylabel('Loss value') + plt.xlabel('Minibatches') + + plt.plot(classification_source_loss, linewidth=1, + label=('Classification Source Loss mean: %f' % + (np.nanmean(classification_source_loss)))) + plt.plot(classification_target_loss, linewidth=1, + label=('Classification Target Loss mean: %f' % + (np.nanmean(classification_target_loss)))) + plt.plot(reconstruction_source_loss, linewidth=1, + label=('Reconstruction Source Loss mean: %f' % + (np.nanmean(reconstruction_source_loss)))) + plt.plot(reconstruction_target_loss, linewidth=1, + label=('Reconstruction Target Loss mean: %f' % + (np.nanmean(reconstruction_target_loss)))) + plt.plot(domain_classifier_loss, linewidth=1, + label=('Domain Classifier Loss mean: %f' % + (np.nanmean(domain_classifier_loss)))) + plt.legend() + + if annotation: + __print_annotation(classification_source_loss) + __print_annotation(classification_target_loss) + __print_annotation(reconstruction_source_loss) + __print_annotation(reconstruction_target_loss) + __print_annotation(domain_classifier_loss) + + plt.tight_layout() + plt.show() + + +def __plot_classification_rates(source_rate: np.ndarray, + target_rate: np.ndarray, + domain_rate: np.ndarray, + total_source_rate: float, + total_target_rate: float, + total_domain_classification_rate: float, + annotation=True, + class_number=None): + plt.title('Source and Target Classification Rates') + plt.ylabel('Classification Rate') + plt.xlabel('Minibatches') + + plt.plot(source_rate, linewidth=1, label=('Source CR: %f (batch) | %f (dataset)' % + (np.nanmean(source_rate), total_source_rate))) + plt.plot(target_rate, linewidth=1, label=('Target CR: %f (batch) | %f (dataset)' % + (np.nanmean(target_rate), total_target_rate))) + plt.plot(domain_rate, linewidth=1, label=('Domain CR: %f (batch) | %f (dataset)' % + (np.nanmean(domain_rate), total_domain_classification_rate))) + + if annotation: + __print_annotation(source_rate) + __print_annotation(target_rate) + __print_annotation(domain_rate) + + if class_number is not None: + plt.plot(np.ones(len(source_rate)) * 1 / class_number, + linewidth=1, label='Random Classification Threshold: %f' % (1 / class_number)) + + plt.plot(np.ones(len(source_rate)) * 1 / 2, + linewidth=1, label='Random Domain Classification Threshold: %f' % (1 / 2)) + + plt.legend() + + plt.tight_layout() + plt.show() + + +def __plot_ns(bias, var, ns, annotation=True): + plt.plot(bias, linewidth=1, label=('Bias mean: %f' % (np.nanmean(bias)))) + plt.plot(var, linewidth=1, label=('Variance mean: %f' % (np.nanmean(var)))) + plt.plot(ns, linewidth=1, label=('NS (Bias + Variance) mean: %f' % (np.nanmean(ns)))) + plt.legend() + + if annotation: + __print_annotation(bias) + __print_annotation(var) + __print_annotation(ns) + + plt.tight_layout() + plt.show() + + +def __plot_discriminative_network_significance(bias, var, annotation=True): + plt.title('Discriminative Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_domain_classifier_network_significance(bias, var, annotation=True): + plt.title('Domain Classifier Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_feature_extractor_network_significance(bias, var, annotation=True): + plt.title('Feature Extractor Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __load_source_target(source: str, target: str, n_source_concept_drift: int = 1, n_target_concept_drift: int = 1): + dm_s = DataManipulator() + dm_t = DataManipulator() + + source = source.replace('_', '-').replace(' ', '-').lower() + target = target.replace('_', '-').replace(' ', '-').lower() + + if source == 'mnist-28': + dm_s.load_mnist(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-26': + dm_s.load_mnist(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-24': + dm_s.load_mnist(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-22': + dm_s.load_mnist(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-20': + dm_s.load_mnist(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-18': + dm_s.load_mnist(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-16': + dm_s.load_mnist(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-28': + dm_s.load_usps(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-26': + dm_s.load_usps(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-24': + dm_s.load_usps(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-22': + dm_s.load_usps(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-20': + dm_s.load_usps(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-18': + dm_s.load_usps(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-16': + dm_s.load_usps(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'cifar10': + dm_s.load_cifar10(n_concept_drifts=n_source_concept_drift) + elif source == 'stl10': + dm_s.load_stl10(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-fashion': + dm_s.load_amazon_review_fashion(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-all-beauty': + dm_s.load_amazon_review_all_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-appliances': + dm_s.load_amazon_review_appliances(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-arts-crafts-sewing': + dm_s.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-automotive': + dm_s.load_amazon_review_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-books': + dm_s.load_amazon_review_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cds-vinyl': + dm_s.load_amazon_review_cds_vinyl(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cellphones_accessories': + dm_s.load_amazon_review_cellphones_accessories(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-clothing-shoes-jewelry': + dm_s.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-digital-music': + dm_s.load_amazon_review_digital_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-electronics': + dm_s.load_amazon_review_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-gift-card': + dm_s.load_amazon_review_gift_card(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-grocery-gourmet-food': + dm_s.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-home-kitchen': + dm_s.load_amazon_review_home_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-industrial-scientific': + dm_s.load_amazon_review_industrial_scientific(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-kindle-store': + dm_s.load_amazon_review_kindle_store(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-luxury-beauty': + dm_s.load_amazon_review_luxury_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-magazine-subscription': + dm_s.load_amazon_review_magazine_subscription(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-movies-tv': + dm_s.load_amazon_review_movies_tv(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-musical-instruments': + dm_s.load_amazon_review_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-office-products': + dm_s.load_amazon_review_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-patio-lawn-garden': + dm_s.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-pet-supplies': + dm_s.load_amazon_review_pet_supplies(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-prime-pantry': + dm_s.load_amazon_review_prime_pantry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-software': + dm_s.load_amazon_review_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-sports-outdoors': + dm_s.load_amazon_review_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-tools-home-improvements': + dm_s.load_amazon_review_tools_home_improvements(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-toys-games': + dm_s.load_amazon_review_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-video-games': + dm_s.load_amazon_review_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-books': + dm_s.load_amazon_review_nips_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-dvd': + dm_s.load_amazon_review_nips_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-electronics': + dm_s.load_amazon_review_nips_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-kitchen': + dm_s.load_amazon_review_nips_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-apparel': + dm_s.load_amazon_review_acl_apparel(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-automotive': + dm_s.load_amazon_review_acl_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-baby': + dm_s.load_amazon_review_acl_baby(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-beauty': + dm_s.load_amazon_review_acl_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-books': + dm_s.load_amazon_review_acl_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-camera_photo': + dm_s.load_amazon_review_acl_camera_photo(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-cell_phones_service': + dm_s.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-computer_video_games': + dm_s.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-dvd': + dm_s.load_amazon_review_acl_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-electronics': + dm_s.load_amazon_review_acl_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-gourmet_food': + dm_s.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-grocery': + dm_s.load_amazon_review_acl_grocery(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-health_personal_care': + dm_s.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-jewelry_watches': + dm_s.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-kitchen_housewares': + dm_s.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-magazines': + dm_s.load_amazon_review_acl_magazines(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-music': + dm_s.load_amazon_review_acl_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-musical_instruments': + dm_s.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-office_products': + dm_s.load_amazon_review_acl_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-outdoor_living': + dm_s.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-software': + dm_s.load_amazon_review_acl_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-sports_outdoors': + dm_s.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-tools_hardware': + dm_s.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-toys_games': + dm_s.load_amazon_review_acl_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-video': + dm_s.load_amazon_review_acl_video(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-all': + dm_s.load_news_popularity_obama_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-all': + dm_s.load_news_popularity_economy_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-all': + dm_s.load_news_popularity_microsoft_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-all': + dm_s.load_news_popularity_palestine_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-facebook': + dm_s.load_news_popularity_obama_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-facebook': + dm_s.load_news_popularity_economy_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-facebook': + dm_s.load_news_popularity_microsoft_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-facebook': + dm_s.load_news_popularity_palestine_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-googleplus': + dm_s.load_news_popularity_obama_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-googleplus': + dm_s.load_news_popularity_economy_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-googleplus': + dm_s.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-googleplus': + dm_s.load_news_popularity_palestine_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-linkedin': + dm_s.load_news_popularity_obama_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-linkedin': + dm_s.load_news_popularity_economy_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-linkedin': + dm_s.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-linkedin': + dm_s.load_news_popularity_palestine_linkedin(n_concept_drifts=n_source_concept_drift) + + if target == 'mnist-28': + dm_t.load_mnist(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-26': + dm_t.load_mnist(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-24': + dm_t.load_mnist(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-22': + dm_t.load_mnist(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-20': + dm_t.load_mnist(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-18': + dm_t.load_mnist(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-16': + dm_t.load_mnist(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-28': + dm_t.load_usps(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-26': + dm_t.load_usps(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-24': + dm_t.load_usps(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-22': + dm_t.load_usps(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-20': + dm_t.load_usps(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-18': + dm_t.load_usps(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-16': + dm_t.load_usps(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'cifar10': + dm_t.load_cifar10(n_concept_drifts=n_target_concept_drift) + elif target == 'stl10': + dm_t.load_stl10(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-fashion': + dm_t.load_amazon_review_fashion(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-all-beauty': + dm_t.load_amazon_review_all_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-appliances': + dm_t.load_amazon_review_appliances(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-arts-crafts-sewing': + dm_t.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-automotive': + dm_t.load_amazon_review_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-books': + dm_t.load_amazon_review_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cds-vinyl': + dm_t.load_amazon_review_cds_vinyl(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cellphones_accessories': + dm_t.load_amazon_review_cellphones_accessories(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-clothing-shoes-jewelry': + dm_t.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-digital-music': + dm_t.load_amazon_review_digital_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-electronics': + dm_t.load_amazon_review_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-gift-card': + dm_t.load_amazon_review_gift_card(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-grocery-gourmet-food': + dm_t.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-home-kitchen': + dm_t.load_amazon_review_home_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-industrial-scientific': + dm_t.load_amazon_review_industrial_scientific(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-kindle-store': + dm_t.load_amazon_review_kindle_store(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-luxury-beauty': + dm_t.load_amazon_review_luxury_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-magazine-subscription': + dm_t.load_amazon_review_magazine_subscription(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-movies-tv': + dm_t.load_amazon_review_movies_tv(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-musical-instruments': + dm_t.load_amazon_review_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-office-products': + dm_t.load_amazon_review_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-patio-lawn-garden': + dm_t.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-pet-supplies': + dm_t.load_amazon_review_pet_supplies(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-prime-pantry': + dm_t.load_amazon_review_prime_pantry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-software': + dm_t.load_amazon_review_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-sports-outdoors': + dm_t.load_amazon_review_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-tools-home-improvements': + dm_t.load_amazon_review_tools_home_improvements(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-toys-games': + dm_t.load_amazon_review_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-video-games': + dm_t.load_amazon_review_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-books': + dm_t.load_amazon_review_nips_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-dvd': + dm_t.load_amazon_review_nips_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-electronics': + dm_t.load_amazon_review_nips_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-kitchen': + dm_t.load_amazon_review_nips_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-apparel': + dm_t.load_amazon_review_acl_apparel(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-automotive': + dm_t.load_amazon_review_acl_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-baby': + dm_t.load_amazon_review_acl_baby(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-beauty': + dm_t.load_amazon_review_acl_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-books': + dm_t.load_amazon_review_acl_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-camera_photo': + dm_t.load_amazon_review_acl_camera_photo(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-cell_phones_service': + dm_t.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-computer_video_games': + dm_t.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-dvd': + dm_t.load_amazon_review_acl_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-electronics': + dm_t.load_amazon_review_acl_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-gourmet_food': + dm_t.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-grocery': + dm_t.load_amazon_review_acl_grocery(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-health_personal_care': + dm_t.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-jewelry_watches': + dm_t.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-kitchen_housewares': + dm_t.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-magazines': + dm_t.load_amazon_review_acl_magazines(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-music': + dm_t.load_amazon_review_acl_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-musical_instruments': + dm_t.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-office_products': + dm_t.load_amazon_review_acl_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-outdoor_living': + dm_t.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-software': + dm_t.load_amazon_review_acl_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-sports_outdoors': + dm_t.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-tools_hardware': + dm_t.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-toys_games': + dm_t.load_amazon_review_acl_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-video': + dm_t.load_amazon_review_acl_video(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-all': + dm_t.load_news_popularity_obama_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-all': + dm_t.load_news_popularity_economy_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-all': + dm_t.load_news_popularity_microsoft_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-all': + dm_t.load_news_popularity_palestine_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-facebook': + dm_t.load_news_popularity_obama_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-facebook': + dm_t.load_news_popularity_economy_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-facebook': + dm_t.load_news_popularity_microsoft_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-facebook': + dm_t.load_news_popularity_palestine_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-googleplus': + dm_t.load_news_popularity_obama_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-googleplus': + dm_t.load_news_popularity_economy_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-googleplus': + dm_t.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-googleplus': + dm_t.load_news_popularity_palestine_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-linkedin': + dm_t.load_news_popularity_obama_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-linkedin': + dm_t.load_news_popularity_economy_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-linkedin': + dm_t.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-linkedin': + dm_t.load_news_popularity_palestine_linkedin(n_concept_drifts=n_target_concept_drift) + + return dm_s, dm_t + + +def acdc(source, target, + n_source_concept_drift: int = 5, + n_target_concept_drift: int = 7, + internal_epochs: int = 1, is_gpu=False): + def print_metrics(minibatch, metrics, DMs, DMt, NN, DAEt, DA): + print('Minibatch: %d | Execution time (dataset load/pre-processing + model run): %f' % ( + minibatch, time.time() - metrics['start_execution_time'])) + if minibatch > 1: + print(( + 'Total of samples:' + Fore.BLUE + ' %d + %d = %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d + %d = %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1] - metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1] - metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + else: + print(( + 'Total of samples:' + Fore.BLUE + ' %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + + if minibatch > 1: + string_max = '' + Fore.GREEN + 'Max' + Style.RESET_ALL + string_mean = '' + Fore.YELLOW + 'Mean' + Style.RESET_ALL + string_min = '' + Fore.RED + 'Min' + Style.RESET_ALL + string_now = '' + Fore.BLUE + 'Now' + Style.RESET_ALL + string_accu = '' + Fore.MAGENTA + 'Accu' + Style.RESET_ALL + + print(( + '%s %s %s %s %s Training time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['train_time']), + np.nanmean(metrics['train_time']), + np.min(metrics['train_time']), + metrics['train_time'][-1], + np.sum(metrics['train_time']))) + print(( + '%s %s %s %s %s Testing time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['test_time']), + np.nanmean(metrics['test_time']), + np.min(metrics['test_time']), + metrics['test_time'][-1], + np.sum(metrics['test_time']))) + print(( + '%s %s %s %s CR Source:' + Fore.GREEN + ' %f%% ' + Back.BLUE + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_source']) * 100, + np.nanmean(metrics['classification_rate_source']) * 100, + np.min(metrics['classification_rate_source']) * 100, + metrics['classification_rate_source'][-1] * 100)) + print(( + '%s %s %s %s CR Target:' + Fore.GREEN + ' %f%% ' + Back.RED + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_target']) * 100, + np.nanmean(metrics['classification_rate_target']) * 100, + np.min(metrics['classification_rate_target']) * 100, + metrics['classification_rate_target'][-1] * 100)) + print(( + '%s %s %s %s CR Domain Discriminator:' + Fore.GREEN + ' %f%% ' + Fore.YELLOW + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_domain']) * 100, + np.nanmean(metrics['classification_rate_domain']) * 100, + np.min(metrics['classification_rate_domain']) * 100, + metrics['classification_rate_domain'][-1] * 100)) + print(( + '%s %s %s %s Classification Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_source_loss']), + np.nanmean(metrics['classification_source_loss']), + np.min(metrics['classification_source_loss']), + metrics['classification_source_loss'][-1])) + print(( + '%s %s %s %s Classification Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_target_loss']), + np.nanmean(metrics['classification_target_loss']), + np.min(metrics['classification_target_loss']), + metrics['classification_target_loss'][-1])) + print(( + '%s %s %s %s Domain Discriminator Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['domain_regression_loss']), + np.nanmean(metrics['domain_regression_loss']), + np.min(metrics['domain_regression_loss']), + metrics['domain_regression_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_source_loss']), + np.nanmean(metrics['reconstruction_source_loss']), + np.min(metrics['reconstruction_source_loss']), + metrics['reconstruction_source_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_target_loss']), + np.nanmean(metrics['reconstruction_target_loss']), + np.min(metrics['reconstruction_target_loss']), + metrics['reconstruction_target_loss'][-1])) + print(( + '%s %s %s %s Discriminator Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_discriminator']), + np.nanmean(metrics['node_evolution_discriminator']), + np.min(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1])) + print(( + '%s %s %s %s Denoising Autoencoder Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_feature_extraction']), + np.nanmean(metrics['node_evolution_feature_extraction']), + np.min(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1])) + print(( + '%s %s %s %s Domain Classifier Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_domain_classifier']), + np.nanmean(metrics['node_evolution_domain_classifier']), + np.min(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1])) + print(('Network structure:' + Fore.BLUE + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, NN.layers)))) + print(('Domain Discriminator structure:' + Fore.GREEN + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DA.layers)))) + print(('Denoising Auto Encoder:' + Fore.RED + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DAEt.layers)))) + print(Style.RESET_ALL) + + metrics = {'classification_rate_source': [], + 'classification_rate_target': [], + 'classification_rate_domain': [], + 'number_evaluated_samples_source': [], + 'number_evaluated_samples_target': [], + 'train_time': [], + 'test_time': [], + 'node_evolution_discriminator': [], + 'node_evolution_domain_classifier': [], + 'node_evolution_feature_extraction': [], + 'classification_target_loss': [], + 'classification_source_loss': [], + 'reconstruction_source_loss': [], + 'reconstruction_target_loss': [], + 'domain_regression_loss': [], + 'classification_source_misclassified': [], + 'classification_target_misclassified': [], + 'domain_classification_misclassified': [], + 'start_execution_time': time.time()} + MyDevice().set(is_gpu=is_gpu) + internal_epochs = internal_epochs if internal_epochs >= 1 else 1 + + SOURCE_DOMAIN_LABEL = torch.tensor([[1, 0]], dtype=torch.float, device=MyDevice().get()) + TARGET_DOMAIN_LABEL = torch.tensor([[0, 1]], dtype=torch.float, device=MyDevice().get()) + + dm_s, dm_t = __load_source_target(source, target, n_source_concept_drift, n_target_concept_drift) + + dae = DenoisingAutoEncoder([dm_s.number_features(), + 1, + dm_s.number_features()]) + nn = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 1, + dm_s.number_classes()]) + da = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 1, + 2]) + + count_source = 0 + count_target = 0 + count_window = 0 + window_size = 1000 + batch_counter = 0 + + x_source = [] + y_source = [] + x_target = [] + y_target = [] + + while count_source < dm_s.number_samples() \ + or count_target < dm_t.number_samples(): + if count_window < window_size \ + and (count_source < dm_s.number_samples() + or count_target < dm_t.number_samples()): + + source_prob = (dm_s.number_samples() - count_source) / ( + dm_s.number_samples() - count_source + dm_t.number_samples() - count_target + 0.) + + if (np.random.rand() <= source_prob and count_source < dm_s.number_samples()) or ( + count_target >= dm_t.number_samples() and count_source < dm_s.number_samples()): + x, y = dm_s.get_x_y(count_source) + x_source.append(x) + y_source.append(y) + count_source += 1 + count_window += 1 + elif count_target < dm_t.number_samples(): + x, y = dm_t.get_x_y(count_target) + x_target.append(x) + y_target.append(y) + count_target += 1 + count_window += 1 + else: + batch_counter += 1 + metrics['number_evaluated_samples_source'].append(count_source) + metrics['number_evaluated_samples_target'].append(count_target) + + # Workaround to avoid empty stream + if batch_counter > 1: + if (count_source - metrics['number_evaluated_samples_source'][-2] == 0): + x, y = dm_s.get_x_y(np.random.randint(0, count_source)) + x_source.append(x) + y_source.append(y) + if (count_target - metrics['number_evaluated_samples_target'][-2] == 0): + x, y = dm_t.get_x_y(np.random.randint(0, count_target)) + x_target.append(x) + y_target.append(y) + # Workaround to avoid empty stream + + x_source = torch.tensor(x_source, dtype=torch.float, device=MyDevice().get()) + y_source = torch.tensor(y_source, dtype=torch.float, device=MyDevice().get()) + x_target = torch.tensor(x_target, dtype=torch.float, device=MyDevice().get()) + y_target = torch.tensor(y_target, dtype=torch.float, device=MyDevice().get()) + + # TEST + if batch_counter > 1: + metrics['test_time'].append(time.time()) + __test(network=nn, x=x_source, y=y_source, + is_source=True, is_discriminative=True, metrics=metrics) + __test(network=nn, x=x_target, y=y_target, + is_source=False, is_discriminative=True, metrics=metrics) + __test(network=dae, x=x_source, + is_source=True, is_discriminative=False, metrics=metrics) + __test(network=dae, x=x_target, + is_source=False, is_discriminative=False, metrics=metrics) + + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + metrics['domain_regression_loss'].append(float(da.loss_value)) + metrics['classification_rate_domain'].append(da.classification_rate) + metrics['domain_classification_misclassified'].append(da.misclassified) + metrics['test_time'][-1] = time.time() - metrics['test_time'][-1] + + # TRAIN + metrics['train_time'].append(time.time()) + + common_source, x_target = __force_same_size(torch.cat((x_source.T, y_source.T)).T, x_target) + x_source, y_source = common_source.T.split(x_source.shape[1]) + x_source, y_source = x_source.T, y_source.T + + epoch = 1 + while epoch <= internal_epochs: + for xs, xt, ys in [(xs.view(1, xs.shape[0]), xt.view(1, xt.shape[0]), ys.view(1, ys.shape[0])) + for xs, xt, ys in zip(x_source, x_target, cycle(y_source))]: + # Evolving + if epoch == 1: + # Evolving Feature Extraction + for j in range(0, 2): + if j == 0: + __width_evolution(network=dae, x=xs, y=xt) + elif i == 1: + __width_evolution(network=dae, x=xt, y=xs) + if __grow_nodes(dae, da, nn): + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + elif __prune_nodes(dae, da, nn): + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Evolving Source + __width_evolution(network=nn, x=xs, y=ys) + __width_evolution(network=da, x=xs, y=SOURCE_DOMAIN_LABEL) + if not __grow_nodes(da, nn): + if __prune_nodes(da): + __prune_nodes(nn) + elif not __grow_nodes(nn): + __prune_nodes(nn) + + # Evolving Target + __width_evolution(network=da, x=xt, y=TARGET_DOMAIN_LABEL) + if not __grow_nodes(da, nn): + __prune_nodes(da) + + # Denoising AutoEncoder + __generative(network=dae, x=xs, y=xt) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + __generative(network=dae, x=xt, y=xs) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Domain Discriminator + da.feedforward(x=xs, y=SOURCE_DOMAIN_LABEL, train=True).backpropagate() + dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + for weight_no in range(da.number_hidden_layers, 0, -1): + da.update_weight(weight_no=weight_no) + + da.feedforward(x=xt, y=TARGET_DOMAIN_LABEL, train=True).backpropagate() + dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + for weight_no in range(da.number_hidden_layers, 0, -1): + da.update_weight(weight_no=weight_no) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Discriminator + __discriminative(network=nn, x=xs, y=ys) + __copy_weights(source=nn, targets=[da, dae], layer_numbers=[1], copy_moment=True) + + epoch += 1 + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + + # Metrics + metrics['train_time'][-1] = time.time() - metrics['train_time'][-1] + metrics['node_evolution_discriminator'].append(nn.layers[-2]) + metrics['node_evolution_domain_classifier'].append(da.layers[-2]) + metrics['node_evolution_feature_extraction'].append(dae.layers[-2]) + print_metrics(batch_counter, metrics, dm_s, dm_t, nn, dae, da) + + # Reset variables for the next batch + x_source = [] + y_source = [] + x_target = [] + y_target = [] + count_window = 0 + + result_string = '%f (T) | %f (S) \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %f' % ( + np.mean(metrics['classification_rate_target']), + np.mean(metrics['classification_rate_source']), + + np.mean(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1], + + np.mean(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1], + + np.mean(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1], + + np.mean(metrics['train_time']), + np.sum(metrics['train_time'])) + + print('CR Rate (Target) | CR Rate (Source) | \t ' \ + 'Feature Extractor Node Evolution (mean | final) \t ' \ + 'Discriminator Node Evolution (mean | final) \t ' \ + 'Domain Classifier Node Evolution (mean | final) \t ' \ + 'Train Time (mean | total)') + print(result_string) + + result = {} + result['string'] = result_string + result['classification_rate_source_batch'] = np.nanmean(metrics['classification_rate_source']) + result['classification_rate_target_batch'] = np.nanmean(metrics['classification_rate_target']) + result['classification_rate_domain_batch'] = np.nanmean(metrics['classification_rate_domain']) + result['classification_rate_source_total'] = 1 - np.sum( + metrics['classification_source_misclassified']) / dm_s.number_samples() + result['classification_rate_target_total'] = 1 - np.sum( + metrics['classification_target_misclassified']) / dm_t.number_samples() + result['classification_rate_domain_total'] = 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()) + result['source_node_mean'] = np.nanmean(metrics['node_evolution_discriminator']) + result['target_node_mean'] = np.nanmean(metrics['node_evolution_feature_extraction']) + result['domain_node_mean'] = np.nanmean(metrics['node_evolution_domain_classifier']) + result['source_node_final'] = metrics['node_evolution_discriminator'][-1] + result['target_node_final'] = metrics['node_evolution_feature_extraction'][-1] + result['domain_node_final'] = metrics['node_evolution_domain_classifier'][-1] + result['train_time_mean'] = np.nanmean(metrics['train_time']) + result['train_time_final'] = np.nansum(metrics['train_time']) + result['test_time_mean'] = np.nanmean(metrics['test_time']) + result['test_time_final'] = np.nansum(metrics['test_time']) + result['classification_source_loss_mean'] = np.nanmean(metrics['classification_source_loss']) + result['classification_target_loss_mean'] = np.nanmean(metrics['classification_target_loss']) + result['reconstruction_source_loss_mean'] = np.nanmean(metrics['reconstruction_source_loss']) + result['reconstruction_target_loss_mean'] = np.nanmean(metrics['reconstruction_target_loss']) + result['domain_adaptation_loss_mean'] = np.nanmean(metrics['domain_regression_loss']) + + print() + print(result) + + __plot_time(metrics['train_time'], + metrics['test_time'], + annotation=False) + __plot_classification_rates(metrics['classification_rate_source'], + metrics['classification_rate_target'], + metrics['classification_rate_domain'], + 1 - np.sum(metrics['classification_source_misclassified']) / dm_s.number_samples(), + 1 - np.sum(metrics['classification_target_misclassified']) / dm_t.number_samples(), + 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()), + class_number=dm_s.number_classes(), + annotation=False) + __plot_node_evolution(metrics['node_evolution_discriminator'], + metrics['node_evolution_domain_classifier'], + metrics['node_evolution_feature_extraction'], + annotation=False) + __plot_losses(metrics['classification_source_loss'], + metrics['classification_target_loss'], + metrics['reconstruction_source_loss'], + metrics['reconstruction_target_loss'], + metrics['domain_regression_loss'], + annotation=False) + __plot_discriminative_network_significance(nn.BIAS, nn.VAR, annotation=False) + __plot_domain_classifier_network_significance(da.BIAS, da.VAR, annotation=False) + __plot_feature_extractor_network_significance(dae.BIAS, dae.VAR, annotation=False) + + return result + + +def generate_csv_from_dataset(dataset_name: str, + n_concept_drift: int = 1, + is_source: bool = True, + is_one_hot_encoding: bool = True, + label_starts_at: int = 0): + import csv, os + from tqdm import tqdm + filename = 'source.csv' if is_source else 'target.csv' + + dm, _ = __load_source_target(source=dataset_name, + target='', + n_source_concept_drift=n_concept_drift) + + try: + os.remove(filename) + except: + pass + f = open(filename, 'x') + f.close() + + print('Exporting dataset "%s" as file "%s"' % (dataset_name, filename)) + with open(filename, 'w', newline='') as csv_file: + writer = csv.writer(csv_file, delimiter=',') + pbar = tqdm(total=dm.number_samples()) + for i in range(dm.number_samples()): + x, y = dm.get_x_y(i) + temp_y = np.zeros(dm.number_classes() + label_starts_at) + temp_y[y.argmax() + label_starts_at] = 1 + y = temp_y + if not is_one_hot_encoding: + y = np.asarray([y.argmax()]) + + writer.writerow(np.concatenate((x, y)).tolist()) + pbar.update(1) + pbar.close() + print('Done!') + + +def pre_download_benchmarks(): + def print_info(dm): + print('Number of samples: %d' % dm.number_samples()) + print('Number of features: %d' % dm.number_features()) + print('Number of classes: %d' % dm.number_classes()) + return DataManipulator() + + dm = DataManipulator() + dm.load_mnist() + dm = print_info(dm) + dm.load_usps() + dm = print_info(dm) + dm.load_cifar10() + dm = print_info(dm) + dm.load_stl10() + dm = print_info(dm) + # dm.load_news_popularity_obama_all() + # dm = print_info(dm) + # dm.load_news_popularity_economy_all() + # dm = print_info(dm) + # dm.load_news_popularity_microsoft_all() + # dm = print_info(dm) + # dm.load_news_popularity_palestine_all() + # dm = print_info(dm) + # dm.load_amazon_review_fashion() + # dm = print_info(dm) + dm.load_amazon_review_all_beauty() + dm = print_info(dm) + # dm.load_amazon_review_appliances() + # dm = print_info(dm) + # dm.load_amazon_review_arts_crafts_sewing() + # dm = print_info(dm) + # dm.load_amazon_review_automotive() + # dm = print_info(dm) + # dm.load_amazon_review_cds_vinyl() + # dm = print_info(dm) + # dm.load_amazon_review_cellphones_accessories() + # dm = print_info(dm) + # dm.load_amazon_review_clothing_shoes_jewelry() + # dm = print_info(dm) + # dm.load_amazon_review_digital_music() + # dm = print_info(dm) + # dm.load_amazon_review_electronics() + # dm = print_info(dm) + # dm.load_amazon_review_gift_card() + # dm = print_info(dm) + # dm.load_amazon_review_grocery_gourmet_food() + # dm = print_info(dm) + # dm.load_amazon_review_home_kitchen() + # dm = print_info(dm) + dm.load_amazon_review_industrial_scientific() + dm = print_info(dm) + # dm.load_amazon_review_kindle_store() + # dm = print_info(dm) + dm.load_amazon_review_luxury_beauty() + dm = print_info(dm) + dm.load_amazon_review_magazine_subscription() + dm = print_info(dm) + # dm.load_amazon_review_movies_tv() + # dm = print_info(dm) + # dm.load_amazon_review_musical_instruments() + # dm = print_info(dm) + # dm.load_amazon_review_office_products() + # dm = print_info(dm) + # dm.load_amazon_review_patio_lawn_garden() + # dm = print_info(dm) + # dm.load_amazon_review_pet_supplies() + # dm = print_info(dm) + # dm.load_amazon_review_prime_pantry() + # dm = print_info(dm) + # dm.load_amazon_review_software() + # dm = print_info(dm) + # dm.load_amazon_review_sports_outdoors() + # dm = print_info(dm) + # dm.load_amazon_review_tools_home_improvements() + # dm = print_info(dm) + # dm.load_amazon_review_toys_games() + # dm = print_info(dm) + # dm.load_amazon_review_video_games() + # dm = print_info(dm) + dm.load_amazon_review_books() + print_info(dm) + + +print('ACDC: Autonomous Cross Domain Conversion') +print('') +print('Available methods:') +print('************************************************************') +print('def acdc(%s,%s,%s,%s,%s,%s\n\t)' % ( + '\n\tsource: str', + '\n\ttarget: str', + '\n\tn_source_concept_drift: int = 5', + '\n\tn_target_concept_drift: int = 7', + '\n\tinternal_epochs: int = 1', + '\n\tis_gpu: bool = False')) +print(' ') +print('source: String representing the source benchmark') +print('target: String representing the target benchmark') +print('n_source_concept_drift: Number of concept drifts at the source stream') +print('n_target_concept_drift: Number of concept drifts at the target stream') +print('internal_epochs: Number of internal epochs per minibatch') +print('is_gpu: False to run on CPU. True to run on GPU. The paper were generated on CPU. The code is not optimized for GPU. Only runs if you have a huge ammount of GRAM. Also, the adaptation procedure is slower on GPU.') +print(' ') +print('Returns a dictionary with all results for the run') +print('************************************************************') +print(' ') +print('************************************************************') +print('pre_download_benchmarks()') +print('************************************************************') +print(' ') +print('************************************************************') +print('generate_csv_from_dataset(%s,%s,%s,%s,%s\n\t)' % ( + '\n\tdataset_name: str', + '\n\tn_concept_drift: int = 1', + '\n\tis_source: bool = True', + '\n\tis_one_hot_enconding: bool = True', + '\n\tlabel_starts_at: int = 0')) +print(' ') +print('dataset_name: String representing which benchmark should be converted to CSV') +print('n_concept_drift: Number of concept drifts applied into the CSV dataset') +print('is_source: True to generate a file "source.csv", False to generate a file "target.csv"') +print('is_one_hot_enconding: If True, label will be the n last columns in an one-hot-encoding format, if False, label will be the last column as a number') +print('label_starts_at: The smallest label. Usually it is 0, but some source_code, specially made in Matlab, can start from 1') +print('************************************************************') +print(' ') +print('List of possible strings for datasets:') +print(' ') +print('mnist-28: MNIST resized to 28x28, which is original size ~ 784 features') +print('mnist-16: MNIST resized to 16x16 ~ 256 features') +print('usps-28: USPS resized to 28x28 ~ 784 features') +print('usps-16: USPS resized to 16x16, which is original size ~ 256 features') +print('cifar10: CIFAR10 extracted from Resnet ~ 512 features') +print('stl10: STL10 extracted from Resnet ~512 features') +print('amazon-review-all-beauty: Amazon Review | All Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-books: Amazon Review | Books | Word2Vec applied ~ 300 features') +print('amazon-review-industrial-scientific: Amazon Review | Industrial and Scientific | Word2Vec applied ~ 300 features') +print('amazon-review-luxury-beauty: Amazon Review | Luxury Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-magazine-subscription: Amazon Review | Magazine Subscription | Word2Vec applied ~ 300 features') \ No newline at end of file diff --git a/ACDC_Ablation_D.py b/ACDC_Ablation_D.py new file mode 100644 index 0000000..23b3753 --- /dev/null +++ b/ACDC_Ablation_D.py @@ -0,0 +1,1322 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +from ACDCDataManipulator import DataManipulator +from NeuralNetwork import NeuralNetwork +from AutoEncoder import DenoisingAutoEncoder +from MySingletons import MyDevice +from colorama import Fore, Back, Style +from itertools import cycle +import numpy as np +import matplotlib.pylab as plt +import math +import torch +import time + + +def __copy_weights(source: NeuralNetwork, targets: list, layer_numbers=None, copy_moment: bool = True): + if layer_numbers is None: + layer_numbers = [1] + if type(targets) is not list: + targets = [targets] + for layer_number in layer_numbers: + layer_number -= 1 + for target in targets: + if layer_number >= source.number_hidden_layers: + target.output_weight = source.output_weight.detach() + target.output_bias = source.output_bias.detach() + if copy_moment: + target.output_momentum = source.output_momentum.detach() + target.output_bias_momentum = source.output_bias_momentum.detach() + else: + target.weight[layer_number] = source.weight[layer_number].detach() + target.bias[layer_number] = source.bias[layer_number].detach() + if copy_moment: + target.momentum[layer_number] = source.momentum[layer_number].detach() + target.bias_momentum[layer_number] = source.bias_momentum[layer_number].detach() + + +def __grow_nodes(*networks): + origin = networks[0] + if origin.growable[origin.number_hidden_layers]: + nodes = 1 + for i in range(nodes): + for network in networks: + network.grow_node(origin.number_hidden_layers) + return True + else: + return False + + +def __prune_nodes(*networks): + origin = networks[0] + if origin.prunable[origin.number_hidden_layers][0] >= 0: + nodes_to_prune = origin.prunable[origin.number_hidden_layers].tolist() + for network in networks: + for node_to_prune in nodes_to_prune[::-1]: + network.prune_node(origin.number_hidden_layers, node_to_prune) + return True + return False + + +def __width_evolution(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None): + if y is None: + y = x + + network.feedforward(x, y) + network.width_adaptation_stepwise(y) + + +def __discriminative(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, is_neg_grad: bool = False): + y = x.detach() if y is None else y + network.train(x=x, y=y, is_neg_grad=is_neg_grad) + + +def __generative(network: DenoisingAutoEncoder, x: torch.tensor, y: torch.tensor = None, + is_tied_weight=True, noise_ratio=0.1, glw_epochs: int = 1): + y = x.detach() if y is None else y + network.greedy_layer_wise_pretrain(x=x, number_epochs=glw_epochs, noise_ratio=noise_ratio) + network.train(x=x, y=y, noise_ratio=noise_ratio, is_tied_weight=is_tied_weight) + + +def __test(network: NeuralNetwork, x: torch.tensor, y: torch.tensor = None, + is_source: bool = False, is_discriminative: bool = False, metrics=None): + with torch.no_grad(): + y = x.detach() if y is None else y + network.test(x=x, y=y) + + if is_source: + if is_discriminative: + metrics['classification_rate_source'].append(network.classification_rate) + metrics['classification_source_loss'].append(float(network.loss_value)) + metrics['classification_source_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_source_loss'].append(float(network.loss_value)) + else: + if is_discriminative: + metrics['classification_rate_target'].append(network.classification_rate) + metrics['classification_target_loss'].append(float(network.loss_value)) + metrics['classification_target_misclassified'].append(float(network.misclassified)) + else: + metrics['reconstruction_target_loss'].append(float(network.loss_value)) + + +def __force_same_size(a_tensor, b_tensor, shuffle=True, strategy='max'): + common = np.min([a_tensor.shape[0], b_tensor.shape[0]]) + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + if strategy == 'max': + if math.ceil(a_tensor.shape[0] / common) <= math.ceil(b_tensor.shape[0] / common): + b_tensor = torch.stack(list(target for target, source + in zip(b_tensor[torch.randperm(b_tensor.shape[0])], + cycle(a_tensor[torch.randperm(a_tensor.shape[0])])))) + a_tensor = torch.stack(list(source for target, source + in zip(b_tensor[torch.randperm(b_tensor.shape[0])], + cycle(a_tensor[torch.randperm(a_tensor.shape[0])])))) + else: + b_tensor = torch.stack(list(target for target, source + in zip(cycle(b_tensor[torch.randperm(b_tensor.shape[0])]), + a_tensor[torch.randperm(a_tensor.shape[0])]))) + a_tensor = torch.stack(list(source for target, source + in zip(cycle(b_tensor[torch.randperm(b_tensor.shape[0])]), + a_tensor[torch.randperm(a_tensor.shape[0])]))) + + elif strategy == 'min': + a_tensor = a_tensor[:common] + b_tensor = b_tensor[:common] + + if shuffle: + a_tensor = a_tensor[torch.randperm(a_tensor.shape[0])] + b_tensor = b_tensor[torch.randperm(b_tensor.shape[0])] + + return a_tensor, b_tensor + + +def __print_annotation(lst): + def custom_range(xx): + step = int(len(xx) * 0.25) - 1 + return range(0, len(xx), 1 if step == 0 else step) + + for idx in custom_range(lst): + pos = lst[idx] if isinstance(lst[idx], (int, float, np.int32)) else lst[idx][0] + plt.annotate(format(pos, '.2f'), (idx, pos)) + pos = lst[-1] if isinstance(lst[-1], (int, float, np.int32)) else lst[-1][0] + plt.annotate(format(pos, '.2f'), (len(lst), pos)) + + +def __plot_time(train_time: np.ndarray, + test_time: np.ndarray, + annotation=True): + plt.title('Processing time') + plt.ylabel('Seconds') + plt.xlabel('Minibatches') + + plt.plot(train_time, linewidth=1, + label=('Train time: %f (Mean) %f (Accumulated)' % + (np.nanmean(train_time), np.sum(train_time)))) + plt.plot(test_time, linewidth=1, + label=('Test time: %f (Mean) %f (Accumulated)' % + (np.nanmean(test_time), np.sum(test_time)))) + plt.legend() + + if annotation: + __print_annotation(train_time) + __print_annotation(test_time) + + plt.tight_layout() + plt.show() + + +def __plot_node_evolution(nodes_discriminator: np.ndarray, + nodes_domain_classifier: np.ndarray, + nodes_feature_extraction: np.ndarray, + annotation=True): + plt.title('Node evolution') + plt.ylabel('Nodes') + plt.xlabel('Minibatches') + + plt.plot(nodes_discriminator, linewidth=1, + label=('Discriminator HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_discriminator), nodes_discriminator[-1]))) + plt.plot(nodes_domain_classifier, linewidth=1, + label=('Domain Classifier HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_domain_classifier), nodes_domain_classifier[-1]))) + plt.plot(nodes_feature_extraction, linewidth=1, + label=('Feature Extraction HL nodes: %f (Mean) %d (Final)' % + (np.nanmean(nodes_feature_extraction), nodes_feature_extraction[-1]))) + plt.legend() + + if annotation: + __print_annotation(nodes_discriminator) + __print_annotation(nodes_domain_classifier) + __print_annotation(nodes_feature_extraction) + + plt.tight_layout() + plt.show() + + +def __plot_losses(classification_source_loss: np.ndarray, + classification_target_loss: np.ndarray, + reconstruction_source_loss: np.ndarray, + reconstruction_target_loss: np.ndarray, + domain_classifier_loss: np.ndarray, + annotation=True): + plt.title('Losses evolution') + plt.ylabel('Loss value') + plt.xlabel('Minibatches') + + plt.plot(classification_source_loss, linewidth=1, + label=('Classification Source Loss mean: %f' % + (np.nanmean(classification_source_loss)))) + plt.plot(classification_target_loss, linewidth=1, + label=('Classification Target Loss mean: %f' % + (np.nanmean(classification_target_loss)))) + plt.plot(reconstruction_source_loss, linewidth=1, + label=('Reconstruction Source Loss mean: %f' % + (np.nanmean(reconstruction_source_loss)))) + plt.plot(reconstruction_target_loss, linewidth=1, + label=('Reconstruction Target Loss mean: %f' % + (np.nanmean(reconstruction_target_loss)))) + plt.plot(domain_classifier_loss, linewidth=1, + label=('Domain Classifier Loss mean: %f' % + (np.nanmean(domain_classifier_loss)))) + plt.legend() + + if annotation: + __print_annotation(classification_source_loss) + __print_annotation(classification_target_loss) + __print_annotation(reconstruction_source_loss) + __print_annotation(reconstruction_target_loss) + __print_annotation(domain_classifier_loss) + + plt.tight_layout() + plt.show() + + +def __plot_classification_rates(source_rate: np.ndarray, + target_rate: np.ndarray, + domain_rate: np.ndarray, + total_source_rate: float, + total_target_rate: float, + total_domain_classification_rate: float, + annotation=True, + class_number=None): + plt.title('Source and Target Classification Rates') + plt.ylabel('Classification Rate') + plt.xlabel('Minibatches') + + plt.plot(source_rate, linewidth=1, label=('Source CR: %f (batch) | %f (dataset)' % + (np.nanmean(source_rate), total_source_rate))) + plt.plot(target_rate, linewidth=1, label=('Target CR: %f (batch) | %f (dataset)' % + (np.nanmean(target_rate), total_target_rate))) + plt.plot(domain_rate, linewidth=1, label=('Domain CR: %f (batch) | %f (dataset)' % + (np.nanmean(domain_rate), total_domain_classification_rate))) + + if annotation: + __print_annotation(source_rate) + __print_annotation(target_rate) + __print_annotation(domain_rate) + + if class_number is not None: + plt.plot(np.ones(len(source_rate)) * 1 / class_number, + linewidth=1, label='Random Classification Threshold: %f' % (1 / class_number)) + + plt.plot(np.ones(len(source_rate)) * 1 / 2, + linewidth=1, label='Random Domain Classification Threshold: %f' % (1 / 2)) + + plt.legend() + + plt.tight_layout() + plt.show() + + +def __plot_ns(bias, var, ns, annotation=True): + plt.plot(bias, linewidth=1, label=('Bias mean: %f' % (np.nanmean(bias)))) + plt.plot(var, linewidth=1, label=('Variance mean: %f' % (np.nanmean(var)))) + plt.plot(ns, linewidth=1, label=('NS (Bias + Variance) mean: %f' % (np.nanmean(ns)))) + plt.legend() + + if annotation: + __print_annotation(bias) + __print_annotation(var) + __print_annotation(ns) + + plt.tight_layout() + plt.show() + + +def __plot_discriminative_network_significance(bias, var, annotation=True): + plt.title('Discriminative Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_domain_classifier_network_significance(bias, var, annotation=True): + plt.title('Domain Classifier Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __plot_feature_extractor_network_significance(bias, var, annotation=True): + plt.title('Feature Extractor Network Significance') + plt.ylabel('Value') + plt.xlabel('Sample') + + __plot_ns(bias, var, (np.array(bias) + np.array(var)).tolist(), annotation) + + +def __load_source_target(source: str, target: str, n_source_concept_drift: int = 1, n_target_concept_drift: int = 1): + dm_s = DataManipulator() + dm_t = DataManipulator() + + source = source.replace('_', '-').replace(' ', '-').lower() + target = target.replace('_', '-').replace(' ', '-').lower() + + if source == 'mnist-28': + dm_s.load_mnist(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-26': + dm_s.load_mnist(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-24': + dm_s.load_mnist(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-22': + dm_s.load_mnist(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-20': + dm_s.load_mnist(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-18': + dm_s.load_mnist(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'mnist-16': + dm_s.load_mnist(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-28': + dm_s.load_usps(resize=28, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-26': + dm_s.load_usps(resize=26, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-24': + dm_s.load_usps(resize=24, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-22': + dm_s.load_usps(resize=22, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-20': + dm_s.load_usps(resize=20, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-18': + dm_s.load_usps(resize=18, n_concept_drifts=n_source_concept_drift) + elif source == 'usps-16': + dm_s.load_usps(resize=16, n_concept_drifts=n_source_concept_drift) + elif source == 'cifar10': + dm_s.load_cifar10(n_concept_drifts=n_source_concept_drift) + elif source == 'stl10': + dm_s.load_stl10(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-fashion': + dm_s.load_amazon_review_fashion(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-all-beauty': + dm_s.load_amazon_review_all_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-appliances': + dm_s.load_amazon_review_appliances(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-arts-crafts-sewing': + dm_s.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-automotive': + dm_s.load_amazon_review_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-books': + dm_s.load_amazon_review_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cds-vinyl': + dm_s.load_amazon_review_cds_vinyl(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-cellphones_accessories': + dm_s.load_amazon_review_cellphones_accessories(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-clothing-shoes-jewelry': + dm_s.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-digital-music': + dm_s.load_amazon_review_digital_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-electronics': + dm_s.load_amazon_review_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-gift-card': + dm_s.load_amazon_review_gift_card(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-grocery-gourmet-food': + dm_s.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-home-kitchen': + dm_s.load_amazon_review_home_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-industrial-scientific': + dm_s.load_amazon_review_industrial_scientific(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-kindle-store': + dm_s.load_amazon_review_kindle_store(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-luxury-beauty': + dm_s.load_amazon_review_luxury_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-magazine-subscription': + dm_s.load_amazon_review_magazine_subscription(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-movies-tv': + dm_s.load_amazon_review_movies_tv(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-musical-instruments': + dm_s.load_amazon_review_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-office-products': + dm_s.load_amazon_review_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-patio-lawn-garden': + dm_s.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-pet-supplies': + dm_s.load_amazon_review_pet_supplies(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-prime-pantry': + dm_s.load_amazon_review_prime_pantry(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-software': + dm_s.load_amazon_review_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-sports-outdoors': + dm_s.load_amazon_review_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-tools-home-improvements': + dm_s.load_amazon_review_tools_home_improvements(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-toys-games': + dm_s.load_amazon_review_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-video-games': + dm_s.load_amazon_review_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-books': + dm_s.load_amazon_review_nips_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-dvd': + dm_s.load_amazon_review_nips_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-electronics': + dm_s.load_amazon_review_nips_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-nips-kitchen': + dm_s.load_amazon_review_nips_kitchen(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-apparel': + dm_s.load_amazon_review_acl_apparel(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-automotive': + dm_s.load_amazon_review_acl_automotive(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-baby': + dm_s.load_amazon_review_acl_baby(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-beauty': + dm_s.load_amazon_review_acl_beauty(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-books': + dm_s.load_amazon_review_acl_books(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-camera_photo': + dm_s.load_amazon_review_acl_camera_photo(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-cell_phones_service': + dm_s.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-computer_video_games': + dm_s.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-dvd': + dm_s.load_amazon_review_acl_dvd(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-electronics': + dm_s.load_amazon_review_acl_electronics(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-gourmet_food': + dm_s.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-grocery': + dm_s.load_amazon_review_acl_grocery(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-health_personal_care': + dm_s.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-jewelry_watches': + dm_s.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-kitchen_housewares': + dm_s.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-magazines': + dm_s.load_amazon_review_acl_magazines(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-music': + dm_s.load_amazon_review_acl_music(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-musical_instruments': + dm_s.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-office_products': + dm_s.load_amazon_review_acl_office_products(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-outdoor_living': + dm_s.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-software': + dm_s.load_amazon_review_acl_software(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-sports_outdoors': + dm_s.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-tools_hardware': + dm_s.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-toys_games': + dm_s.load_amazon_review_acl_toys_games(n_concept_drifts=n_source_concept_drift) + elif source == 'amazon-review-acl-video': + dm_s.load_amazon_review_acl_video(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-all': + dm_s.load_news_popularity_obama_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-all': + dm_s.load_news_popularity_economy_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-all': + dm_s.load_news_popularity_microsoft_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-all': + dm_s.load_news_popularity_palestine_all(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-facebook': + dm_s.load_news_popularity_obama_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-facebook': + dm_s.load_news_popularity_economy_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-facebook': + dm_s.load_news_popularity_microsoft_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-facebook': + dm_s.load_news_popularity_palestine_facebook(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-googleplus': + dm_s.load_news_popularity_obama_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-googleplus': + dm_s.load_news_popularity_economy_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-googleplus': + dm_s.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-googleplus': + dm_s.load_news_popularity_palestine_googleplus(n_concept_drifts=n_source_concept_drift) + elif source == 'news-obama-linkedin': + dm_s.load_news_popularity_obama_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-economy-linkedin': + dm_s.load_news_popularity_economy_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-microsoft-linkedin': + dm_s.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_source_concept_drift) + elif source == 'news-palestine-linkedin': + dm_s.load_news_popularity_palestine_linkedin(n_concept_drifts=n_source_concept_drift) + + if target == 'mnist-28': + dm_t.load_mnist(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-26': + dm_t.load_mnist(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-24': + dm_t.load_mnist(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-22': + dm_t.load_mnist(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-20': + dm_t.load_mnist(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-18': + dm_t.load_mnist(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'mnist-16': + dm_t.load_mnist(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-28': + dm_t.load_usps(resize=28, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-26': + dm_t.load_usps(resize=26, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-24': + dm_t.load_usps(resize=24, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-22': + dm_t.load_usps(resize=22, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-20': + dm_t.load_usps(resize=20, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-18': + dm_t.load_usps(resize=18, n_concept_drifts=n_target_concept_drift) + elif target == 'usps-16': + dm_t.load_usps(resize=16, n_concept_drifts=n_target_concept_drift) + elif target == 'cifar10': + dm_t.load_cifar10(n_concept_drifts=n_target_concept_drift) + elif target == 'stl10': + dm_t.load_stl10(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-fashion': + dm_t.load_amazon_review_fashion(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-all-beauty': + dm_t.load_amazon_review_all_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-appliances': + dm_t.load_amazon_review_appliances(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-arts-crafts-sewing': + dm_t.load_amazon_review_arts_crafts_sewing(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-automotive': + dm_t.load_amazon_review_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-books': + dm_t.load_amazon_review_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cds-vinyl': + dm_t.load_amazon_review_cds_vinyl(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-cellphones_accessories': + dm_t.load_amazon_review_cellphones_accessories(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-clothing-shoes-jewelry': + dm_t.load_amazon_review_clothing_shoes_jewelry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-digital-music': + dm_t.load_amazon_review_digital_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-electronics': + dm_t.load_amazon_review_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-gift-card': + dm_t.load_amazon_review_gift_card(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-grocery-gourmet-food': + dm_t.load_amazon_review_grocery_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-home-kitchen': + dm_t.load_amazon_review_home_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-industrial-scientific': + dm_t.load_amazon_review_industrial_scientific(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-kindle-store': + dm_t.load_amazon_review_kindle_store(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-luxury-beauty': + dm_t.load_amazon_review_luxury_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-magazine-subscription': + dm_t.load_amazon_review_magazine_subscription(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-movies-tv': + dm_t.load_amazon_review_movies_tv(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-musical-instruments': + dm_t.load_amazon_review_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-office-products': + dm_t.load_amazon_review_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-patio-lawn-garden': + dm_t.load_amazon_review_patio_lawn_garden(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-pet-supplies': + dm_t.load_amazon_review_pet_supplies(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-prime-pantry': + dm_t.load_amazon_review_prime_pantry(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-software': + dm_t.load_amazon_review_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-sports-outdoors': + dm_t.load_amazon_review_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-tools-home-improvements': + dm_t.load_amazon_review_tools_home_improvements(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-toys-games': + dm_t.load_amazon_review_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-video-games': + dm_t.load_amazon_review_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-books': + dm_t.load_amazon_review_nips_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-dvd': + dm_t.load_amazon_review_nips_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-electronics': + dm_t.load_amazon_review_nips_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-nips-kitchen': + dm_t.load_amazon_review_nips_kitchen(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-apparel': + dm_t.load_amazon_review_acl_apparel(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-automotive': + dm_t.load_amazon_review_acl_automotive(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-baby': + dm_t.load_amazon_review_acl_baby(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-beauty': + dm_t.load_amazon_review_acl_beauty(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-books': + dm_t.load_amazon_review_acl_books(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-camera_photo': + dm_t.load_amazon_review_acl_camera_photo(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-cell_phones_service': + dm_t.load_amazon_review_acl_cell_phones_service(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-computer_video_games': + dm_t.load_amazon_review_acl_computer_video_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-dvd': + dm_t.load_amazon_review_acl_dvd(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-electronics': + dm_t.load_amazon_review_acl_electronics(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-gourmet_food': + dm_t.load_amazon_review_acl_gourmet_food(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-grocery': + dm_t.load_amazon_review_acl_grocery(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-health_personal_care': + dm_t.load_amazon_review_acl_health_personal_care(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-jewelry_watches': + dm_t.load_amazon_review_acl_jewelry_watches(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-kitchen_housewares': + dm_t.load_amazon_review_acl_kitchen_housewares(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-magazines': + dm_t.load_amazon_review_acl_magazines(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-music': + dm_t.load_amazon_review_acl_music(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-musical_instruments': + dm_t.load_amazon_review_acl_musical_instruments(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-office_products': + dm_t.load_amazon_review_acl_office_products(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-outdoor_living': + dm_t.load_amazon_review_acl_outdoor_living(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-software': + dm_t.load_amazon_review_acl_software(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-sports_outdoors': + dm_t.load_amazon_review_acl_sports_outdoors(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-tools_hardware': + dm_t.load_amazon_review_acl_tools_hardware(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-toys_games': + dm_t.load_amazon_review_acl_toys_games(n_concept_drifts=n_target_concept_drift) + elif target == 'amazon-review-acl-video': + dm_t.load_amazon_review_acl_video(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-all': + dm_t.load_news_popularity_obama_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-all': + dm_t.load_news_popularity_economy_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-all': + dm_t.load_news_popularity_microsoft_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-all': + dm_t.load_news_popularity_palestine_all(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-facebook': + dm_t.load_news_popularity_obama_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-facebook': + dm_t.load_news_popularity_economy_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-facebook': + dm_t.load_news_popularity_microsoft_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-facebook': + dm_t.load_news_popularity_palestine_facebook(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-googleplus': + dm_t.load_news_popularity_obama_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-googleplus': + dm_t.load_news_popularity_economy_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-googleplus': + dm_t.load_news_popularity_microsoft_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-googleplus': + dm_t.load_news_popularity_palestine_googleplus(n_concept_drifts=n_target_concept_drift) + elif target == 'news-obama-linkedin': + dm_t.load_news_popularity_obama_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-economy-linkedin': + dm_t.load_news_popularity_economy_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-microsoft-linkedin': + dm_t.load_news_popularity_microsoft_linkedin(n_concept_drifts=n_target_concept_drift) + elif target == 'news-palestine-linkedin': + dm_t.load_news_popularity_palestine_linkedin(n_concept_drifts=n_target_concept_drift) + + return dm_s, dm_t + + +def acdc(source, target, + n_source_concept_drift: int = 5, + n_target_concept_drift: int = 7, + internal_epochs: int = 1, is_gpu=False): + def print_metrics(minibatch, metrics, DMs, DMt, NN, DAEt, DA): + print('Minibatch: %d | Execution time (dataset load/pre-processing + model run): %f' % ( + minibatch, time.time() - metrics['start_execution_time'])) + if minibatch > 1: + print(( + 'Total of samples:' + Fore.BLUE + ' %d + %d = %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d + %d = %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1] - metrics['number_evaluated_samples_source'][-2], + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1] - metrics['number_evaluated_samples_target'][-2], + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + else: + print(( + 'Total of samples:' + Fore.BLUE + ' %d/%d (%.2f%%) Source' + Style.RESET_ALL + ' |' + Fore.RED + ' %d/%d (%.2f%%) Target' + Style.RESET_ALL + ' | %d/%d (%.2f%%) Samples in total') % ( + metrics['number_evaluated_samples_source'][-1], + DMs.number_samples(), + float(metrics['number_evaluated_samples_source'][-1] / DMs.number_samples()) * 100, + metrics['number_evaluated_samples_target'][-1], + DMt.number_samples(), + float(metrics['number_evaluated_samples_target'][-1] / DMt.number_samples()) * 100, + metrics['number_evaluated_samples_source'][-1] + metrics['number_evaluated_samples_target'][-1], + DMs.number_samples() + DMt.number_samples(), + float((metrics['number_evaluated_samples_source'][-1] + + metrics['number_evaluated_samples_target'][-1]) / ( + DMs.number_samples() + DMt.number_samples())) * 100)) + + if minibatch > 1: + string_max = '' + Fore.GREEN + 'Max' + Style.RESET_ALL + string_mean = '' + Fore.YELLOW + 'Mean' + Style.RESET_ALL + string_min = '' + Fore.RED + 'Min' + Style.RESET_ALL + string_now = '' + Fore.BLUE + 'Now' + Style.RESET_ALL + string_accu = '' + Fore.MAGENTA + 'Accu' + Style.RESET_ALL + + print(( + '%s %s %s %s %s Training time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['train_time']), + np.nanmean(metrics['train_time']), + np.min(metrics['train_time']), + metrics['train_time'][-1], + np.sum(metrics['train_time']))) + print(( + '%s %s %s %s %s Testing time:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Fore.MAGENTA + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, string_accu, + np.max(metrics['test_time']), + np.nanmean(metrics['test_time']), + np.min(metrics['test_time']), + metrics['test_time'][-1], + np.sum(metrics['test_time']))) + print(( + '%s %s %s %s CR Source:' + Fore.GREEN + ' %f%% ' + Back.BLUE + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_source']) * 100, + np.nanmean(metrics['classification_rate_source']) * 100, + np.min(metrics['classification_rate_source']) * 100, + metrics['classification_rate_source'][-1] * 100)) + print(( + '%s %s %s %s CR Target:' + Fore.GREEN + ' %f%% ' + Back.RED + Fore.YELLOW + Style.BRIGHT + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_target']) * 100, + np.nanmean(metrics['classification_rate_target']) * 100, + np.min(metrics['classification_rate_target']) * 100, + metrics['classification_rate_target'][-1] * 100)) + print(( + '%s %s %s %s CR Domain Discriminator:' + Fore.GREEN + ' %f%% ' + Fore.YELLOW + '%f%%' + Style.RESET_ALL + Fore.RED + ' %f%%' + Fore.BLUE + ' %f%%' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_rate_domain']) * 100, + np.nanmean(metrics['classification_rate_domain']) * 100, + np.min(metrics['classification_rate_domain']) * 100, + metrics['classification_rate_domain'][-1] * 100)) + print(( + '%s %s %s %s Classification Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_source_loss']), + np.nanmean(metrics['classification_source_loss']), + np.min(metrics['classification_source_loss']), + metrics['classification_source_loss'][-1])) + print(( + '%s %s %s %s Classification Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['classification_target_loss']), + np.nanmean(metrics['classification_target_loss']), + np.min(metrics['classification_target_loss']), + metrics['classification_target_loss'][-1])) + print(( + '%s %s %s %s Domain Discriminator Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['domain_regression_loss']), + np.nanmean(metrics['domain_regression_loss']), + np.min(metrics['domain_regression_loss']), + metrics['domain_regression_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Source Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_source_loss']), + np.nanmean(metrics['reconstruction_source_loss']), + np.min(metrics['reconstruction_source_loss']), + metrics['reconstruction_source_loss'][-1])) + print(( + '%s %s %s %s Reconstruction Target Loss:' + Fore.GREEN + ' %f' + Fore.YELLOW + ' %f' + Fore.RED + ' %f' + Fore.BLUE + ' %f' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['reconstruction_target_loss']), + np.nanmean(metrics['reconstruction_target_loss']), + np.min(metrics['reconstruction_target_loss']), + metrics['reconstruction_target_loss'][-1])) + print(( + '%s %s %s %s Discriminator Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_discriminator']), + np.nanmean(metrics['node_evolution_discriminator']), + np.min(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1])) + print(( + '%s %s %s %s Denoising Autoencoder Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_feature_extraction']), + np.nanmean(metrics['node_evolution_feature_extraction']), + np.min(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1])) + print(( + '%s %s %s %s Domain Classifier Nodes:' + Fore.GREEN + ' %d' + Fore.YELLOW + ' %f' + Fore.RED + ' %d' + Fore.BLUE + ' %d' + Style.RESET_ALL) % ( + string_max, string_mean, string_min, string_now, + np.max(metrics['node_evolution_domain_classifier']), + np.nanmean(metrics['node_evolution_domain_classifier']), + np.min(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1])) + print(('Network structure:' + Fore.BLUE + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, NN.layers)))) + print(('Domain Discriminator structure:' + Fore.GREEN + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DA.layers)))) + print(('Denoising Auto Encoder:' + Fore.RED + ' %s' + Style.RESET_ALL) % ( + " ".join(map(str, DAEt.layers)))) + print(Style.RESET_ALL) + + metrics = {'classification_rate_source': [], + 'classification_rate_target': [], + 'classification_rate_domain': [], + 'number_evaluated_samples_source': [], + 'number_evaluated_samples_target': [], + 'train_time': [], + 'test_time': [], + 'node_evolution_discriminator': [], + 'node_evolution_domain_classifier': [], + 'node_evolution_feature_extraction': [], + 'classification_target_loss': [], + 'classification_source_loss': [], + 'reconstruction_source_loss': [], + 'reconstruction_target_loss': [], + 'domain_regression_loss': [], + 'classification_source_misclassified': [], + 'classification_target_misclassified': [], + 'domain_classification_misclassified': [], + 'start_execution_time': time.time()} + MyDevice().set(is_gpu=is_gpu) + internal_epochs = internal_epochs if internal_epochs >= 1 else 1 + + SOURCE_DOMAIN_LABEL = torch.tensor([[1, 0]], dtype=torch.float, device=MyDevice().get()) + TARGET_DOMAIN_LABEL = torch.tensor([[0, 1]], dtype=torch.float, device=MyDevice().get()) + + dm_s, dm_t = __load_source_target(source, target, n_source_concept_drift, n_target_concept_drift) + + dae = DenoisingAutoEncoder([dm_s.number_features(), + int(dm_s.number_features() * 0.5), + dm_s.number_features()]) + nn = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 1, + dm_s.number_classes()]) + da = NeuralNetwork([dm_s.number_features(), + dae.layers[1], + 1, + 2]) + + count_source = 0 + count_target = 0 + count_window = 0 + window_size = 1000 + batch_counter = 0 + + x_source = [] + y_source = [] + x_target = [] + y_target = [] + + while count_source < dm_s.number_samples() \ + or count_target < dm_t.number_samples(): + if count_window < window_size \ + and (count_source < dm_s.number_samples() + or count_target < dm_t.number_samples()): + + source_prob = (dm_s.number_samples() - count_source) / ( + dm_s.number_samples() - count_source + dm_t.number_samples() - count_target + 0.) + + if (np.random.rand() <= source_prob and count_source < dm_s.number_samples()) or ( + count_target >= dm_t.number_samples() and count_source < dm_s.number_samples()): + x, y = dm_s.get_x_y(count_source) + x_source.append(x) + y_source.append(y) + count_source += 1 + count_window += 1 + elif count_target < dm_t.number_samples(): + x, y = dm_t.get_x_y(count_target) + x_target.append(x) + y_target.append(y) + count_target += 1 + count_window += 1 + else: + batch_counter += 1 + metrics['number_evaluated_samples_source'].append(count_source) + metrics['number_evaluated_samples_target'].append(count_target) + + # Workaround to avoid empty stream + if batch_counter > 1: + if (count_source - metrics['number_evaluated_samples_source'][-2] == 0): + x, y = dm_s.get_x_y(np.random.randint(0, count_source)) + x_source.append(x) + y_source.append(y) + if (count_target - metrics['number_evaluated_samples_target'][-2] == 0): + x, y = dm_t.get_x_y(np.random.randint(0, count_target)) + x_target.append(x) + y_target.append(y) + # Workaround to avoid empty stream + + x_source = torch.tensor(x_source, dtype=torch.float, device=MyDevice().get()) + y_source = torch.tensor(y_source, dtype=torch.float, device=MyDevice().get()) + x_target = torch.tensor(x_target, dtype=torch.float, device=MyDevice().get()) + y_target = torch.tensor(y_target, dtype=torch.float, device=MyDevice().get()) + + # TEST + if batch_counter > 1: + metrics['test_time'].append(time.time()) + __test(network=nn, x=x_source, y=y_source, + is_source=True, is_discriminative=True, metrics=metrics) + __test(network=nn, x=x_target, y=y_target, + is_source=False, is_discriminative=True, metrics=metrics) + __test(network=dae, x=x_source, + is_source=True, is_discriminative=False, metrics=metrics) + __test(network=dae, x=x_target, + is_source=False, is_discriminative=False, metrics=metrics) + + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + metrics['domain_regression_loss'].append(float(da.loss_value)) + metrics['classification_rate_domain'].append(da.classification_rate) + metrics['domain_classification_misclassified'].append(da.misclassified) + metrics['test_time'][-1] = time.time() - metrics['test_time'][-1] + + # TRAIN + metrics['train_time'].append(time.time()) + + common_source, x_target = __force_same_size(torch.cat((x_source.T, y_source.T)).T, x_target) + x_source, y_source = common_source.T.split(x_source.shape[1]) + x_source, y_source = x_source.T, y_source.T + + epoch = 1 + while epoch <= internal_epochs: + for xs, xt, ys in [(xs.view(1, xs.shape[0]), xt.view(1, xt.shape[0]), ys.view(1, ys.shape[0])) + for xs, xt, ys in zip(x_source, x_target, cycle(y_source))]: + # Evolving + if epoch == 1: + # Evolving Feature Extraction + for j in range(0, 2): + if j == 0: + __width_evolution(network=dae, x=xs, y=xt) + elif j == 1: + __width_evolution(network=dae, x=xt, y=xs) + if __grow_nodes(dae, da, nn): + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + elif __prune_nodes(dae, da, nn): + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Evolving Source + __width_evolution(network=nn, x=xs, y=ys) + if not __grow_nodes(nn): + __prune_nodes(nn) + __width_evolution(network=da, x=xs, y=SOURCE_DOMAIN_LABEL) + if not __grow_nodes(da): + __prune_nodes(da) + + # Evolving Target + __width_evolution(network=da, x=xt, y=TARGET_DOMAIN_LABEL) + if not __grow_nodes(da): + __prune_nodes(da) + + # Denoising AutoEncoder + __generative(network=dae, x=xs, y=xt) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + __generative(network=dae, x=xt, y=xs) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Domain Discriminator + da.feedforward(x=xs, y=SOURCE_DOMAIN_LABEL, train=True).backpropagate() + dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + for weight_no in range(da.number_hidden_layers, 0, -1): + da.update_weight(weight_no=weight_no) + + da.feedforward(x=xt, y=TARGET_DOMAIN_LABEL, train=True).backpropagate() + dae.weight[0] = dae.weight[0] - da.learning_rate * da.weight[0].grad.neg() + dae.bias[0] = dae.bias[0] - da.learning_rate * da.bias[0].grad.neg() + for weight_no in range(da.number_hidden_layers, 0, -1): + da.update_weight(weight_no=weight_no) + __copy_weights(source=dae, targets=[da, nn], layer_numbers=[1], copy_moment=False) + + # Discriminator + __discriminative(network=nn, x=xs, y=ys) + __copy_weights(source=nn, targets=[da, dae], layer_numbers=[1], copy_moment=True) + + epoch += 1 + da.test(x=torch.cat([x_source, x_target]), + y=torch.cat([SOURCE_DOMAIN_LABEL.repeat(x_source.shape[0], 1), + TARGET_DOMAIN_LABEL.repeat(x_target.shape[0], 1)])) + + # Metrics + metrics['train_time'][-1] = time.time() - metrics['train_time'][-1] + metrics['node_evolution_discriminator'].append(nn.layers[-2]) + metrics['node_evolution_domain_classifier'].append(da.layers[-2]) + metrics['node_evolution_feature_extraction'].append(dae.layers[-2]) + print_metrics(batch_counter, metrics, dm_s, dm_t, nn, dae, da) + + # Reset variables for the next batch + x_source = [] + y_source = [] + x_target = [] + y_target = [] + count_window = 0 + + result_string = '%f (T) | %f (S) \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %d \t ' \ + '%f | %f' % ( + np.mean(metrics['classification_rate_target']), + np.mean(metrics['classification_rate_source']), + + np.mean(metrics['node_evolution_feature_extraction']), + metrics['node_evolution_feature_extraction'][-1], + + np.mean(metrics['node_evolution_discriminator']), + metrics['node_evolution_discriminator'][-1], + + np.mean(metrics['node_evolution_domain_classifier']), + metrics['node_evolution_domain_classifier'][-1], + + np.mean(metrics['train_time']), + np.sum(metrics['train_time'])) + + print('CR Rate (Target) | CR Rate (Source) | \t ' \ + 'Feature Extractor Node Evolution (mean | final) \t ' \ + 'Discriminator Node Evolution (mean | final) \t ' \ + 'Domain Classifier Node Evolution (mean | final) \t ' \ + 'Train Time (mean | total)') + print(result_string) + + result = {} + result['string'] = result_string + result['classification_rate_source_batch'] = np.nanmean(metrics['classification_rate_source']) + result['classification_rate_target_batch'] = np.nanmean(metrics['classification_rate_target']) + result['classification_rate_domain_batch'] = np.nanmean(metrics['classification_rate_domain']) + result['classification_rate_source_total'] = 1 - np.sum( + metrics['classification_source_misclassified']) / dm_s.number_samples() + result['classification_rate_target_total'] = 1 - np.sum( + metrics['classification_target_misclassified']) / dm_t.number_samples() + result['classification_rate_domain_total'] = 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()) + result['source_node_mean'] = np.nanmean(metrics['node_evolution_discriminator']) + result['target_node_mean'] = np.nanmean(metrics['node_evolution_feature_extraction']) + result['domain_node_mean'] = np.nanmean(metrics['node_evolution_domain_classifier']) + result['source_node_final'] = metrics['node_evolution_discriminator'][-1] + result['target_node_final'] = metrics['node_evolution_feature_extraction'][-1] + result['domain_node_final'] = metrics['node_evolution_domain_classifier'][-1] + result['train_time_mean'] = np.nanmean(metrics['train_time']) + result['train_time_final'] = np.nansum(metrics['train_time']) + result['test_time_mean'] = np.nanmean(metrics['test_time']) + result['test_time_final'] = np.nansum(metrics['test_time']) + result['classification_source_loss_mean'] = np.nanmean(metrics['classification_source_loss']) + result['classification_target_loss_mean'] = np.nanmean(metrics['classification_target_loss']) + result['reconstruction_source_loss_mean'] = np.nanmean(metrics['reconstruction_source_loss']) + result['reconstruction_target_loss_mean'] = np.nanmean(metrics['reconstruction_target_loss']) + result['domain_adaptation_loss_mean'] = np.nanmean(metrics['domain_regression_loss']) + + print() + print(result) + + __plot_time(metrics['train_time'], + metrics['test_time'], + annotation=False) + __plot_classification_rates(metrics['classification_rate_source'], + metrics['classification_rate_target'], + metrics['classification_rate_domain'], + 1 - np.sum(metrics['classification_source_misclassified']) / dm_s.number_samples(), + 1 - np.sum(metrics['classification_target_misclassified']) / dm_t.number_samples(), + 1 - np.sum(metrics['domain_classification_misclassified']) / ( + dm_s.number_samples() + dm_t.number_samples()), + class_number=dm_s.number_classes(), + annotation=False) + __plot_node_evolution(metrics['node_evolution_discriminator'], + metrics['node_evolution_domain_classifier'], + metrics['node_evolution_feature_extraction'], + annotation=False) + __plot_losses(metrics['classification_source_loss'], + metrics['classification_target_loss'], + metrics['reconstruction_source_loss'], + metrics['reconstruction_target_loss'], + metrics['domain_regression_loss'], + annotation=False) + __plot_discriminative_network_significance(nn.BIAS, nn.VAR, annotation=False) + __plot_domain_classifier_network_significance(da.BIAS, da.VAR, annotation=False) + __plot_feature_extractor_network_significance(dae.BIAS, dae.VAR, annotation=False) + + return result + + +def generate_csv_from_dataset(dataset_name: str, + n_concept_drift: int = 1, + is_source: bool = True, + is_one_hot_encoding: bool = True, + label_starts_at: int = 0): + import csv, os + from tqdm import tqdm + filename = 'source.csv' if is_source else 'target.csv' + + dm, _ = __load_source_target(source=dataset_name, + target='', + n_source_concept_drift=n_concept_drift) + + try: + os.remove(filename) + except: + pass + f = open(filename, 'x') + f.close() + + print('Exporting dataset "%s" as file "%s"' % (dataset_name, filename)) + with open(filename, 'w', newline='') as csv_file: + writer = csv.writer(csv_file, delimiter=',') + pbar = tqdm(total=dm.number_samples()) + for i in range(dm.number_samples()): + x, y = dm.get_x_y(i) + temp_y = np.zeros(dm.number_classes() + label_starts_at) + temp_y[y.argmax() + label_starts_at] = 1 + y = temp_y + if not is_one_hot_encoding: + y = np.asarray([y.argmax()]) + + writer.writerow(np.concatenate((x, y)).tolist()) + pbar.update(1) + pbar.close() + print('Done!') + + +def pre_download_benchmarks(): + def print_info(dm): + print('Number of samples: %d' % dm.number_samples()) + print('Number of features: %d' % dm.number_features()) + print('Number of classes: %d' % dm.number_classes()) + return DataManipulator() + + dm = DataManipulator() + dm.load_mnist() + dm = print_info(dm) + dm.load_usps() + dm = print_info(dm) + dm.load_cifar10() + dm = print_info(dm) + dm.load_stl10() + dm = print_info(dm) + # dm.load_news_popularity_obama_all() + # dm = print_info(dm) + # dm.load_news_popularity_economy_all() + # dm = print_info(dm) + # dm.load_news_popularity_microsoft_all() + # dm = print_info(dm) + # dm.load_news_popularity_palestine_all() + # dm = print_info(dm) + # dm.load_amazon_review_fashion() + # dm = print_info(dm) + dm.load_amazon_review_all_beauty() + dm = print_info(dm) + # dm.load_amazon_review_appliances() + # dm = print_info(dm) + # dm.load_amazon_review_arts_crafts_sewing() + # dm = print_info(dm) + # dm.load_amazon_review_automotive() + # dm = print_info(dm) + # dm.load_amazon_review_cds_vinyl() + # dm = print_info(dm) + # dm.load_amazon_review_cellphones_accessories() + # dm = print_info(dm) + # dm.load_amazon_review_clothing_shoes_jewelry() + # dm = print_info(dm) + # dm.load_amazon_review_digital_music() + # dm = print_info(dm) + # dm.load_amazon_review_electronics() + # dm = print_info(dm) + # dm.load_amazon_review_gift_card() + # dm = print_info(dm) + # dm.load_amazon_review_grocery_gourmet_food() + # dm = print_info(dm) + # dm.load_amazon_review_home_kitchen() + # dm = print_info(dm) + dm.load_amazon_review_industrial_scientific() + dm = print_info(dm) + # dm.load_amazon_review_kindle_store() + # dm = print_info(dm) + dm.load_amazon_review_luxury_beauty() + dm = print_info(dm) + dm.load_amazon_review_magazine_subscription() + dm = print_info(dm) + # dm.load_amazon_review_movies_tv() + # dm = print_info(dm) + # dm.load_amazon_review_musical_instruments() + # dm = print_info(dm) + # dm.load_amazon_review_office_products() + # dm = print_info(dm) + # dm.load_amazon_review_patio_lawn_garden() + # dm = print_info(dm) + # dm.load_amazon_review_pet_supplies() + # dm = print_info(dm) + # dm.load_amazon_review_prime_pantry() + # dm = print_info(dm) + # dm.load_amazon_review_software() + # dm = print_info(dm) + # dm.load_amazon_review_sports_outdoors() + # dm = print_info(dm) + # dm.load_amazon_review_tools_home_improvements() + # dm = print_info(dm) + # dm.load_amazon_review_toys_games() + # dm = print_info(dm) + # dm.load_amazon_review_video_games() + # dm = print_info(dm) + dm.load_amazon_review_books() + print_info(dm) + + +print('ACDC: Autonomous Cross Domain Conversion') +print('') +print('Available methods:') +print('************************************************************') +print('def acdc(%s,%s,%s,%s,%s,%s\n\t)' % ( + '\n\tsource: str', + '\n\ttarget: str', + '\n\tn_source_concept_drift: int = 5', + '\n\tn_target_concept_drift: int = 7', + '\n\tinternal_epochs: int = 1', + '\n\tis_gpu: bool = False')) +print(' ') +print('source: String representing the source benchmark') +print('target: String representing the target benchmark') +print('n_source_concept_drift: Number of concept drifts at the source stream') +print('n_target_concept_drift: Number of concept drifts at the target stream') +print('internal_epochs: Number of internal epochs per minibatch') +print('is_gpu: False to run on CPU. True to run on GPU. The paper were generated on CPU. The code is not optimized for GPU. Only runs if you have a huge ammount of GRAM. Also, the adaptation procedure is slower on GPU.') +print(' ') +print('Returns a dictionary with all results for the run') +print('************************************************************') +print(' ') +print('************************************************************') +print('pre_download_benchmarks()') +print('************************************************************') +print(' ') +print('************************************************************') +print('generate_csv_from_dataset(%s,%s,%s,%s,%s\n\t)' % ( + '\n\tdataset_name: str', + '\n\tn_concept_drift: int = 1', + '\n\tis_source: bool = True', + '\n\tis_one_hot_enconding: bool = True', + '\n\tlabel_starts_at: int = 0')) +print(' ') +print('dataset_name: String representing which benchmark should be converted to CSV') +print('n_concept_drift: Number of concept drifts applied into the CSV dataset') +print('is_source: True to generate a file "source.csv", False to generate a file "target.csv"') +print('is_one_hot_enconding: If True, label will be the n last columns in an one-hot-encoding format, if False, label will be the last column as a number') +print('label_starts_at: The smallest label. Usually it is 0, but some source_code, specially made in Matlab, can start from 1') +print('************************************************************') +print(' ') +print('List of possible strings for datasets:') +print(' ') +print('mnist-28: MNIST resized to 28x28, which is original size ~ 784 features') +print('mnist-16: MNIST resized to 16x16 ~ 256 features') +print('usps-28: USPS resized to 28x28 ~ 784 features') +print('usps-16: USPS resized to 16x16, which is original size ~ 256 features') +print('cifar10: CIFAR10 extracted from Resnet ~ 512 features') +print('stl10: STL10 extracted from Resnet ~512 features') +print('amazon-review-all-beauty: Amazon Review | All Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-books: Amazon Review | Books | Word2Vec applied ~ 300 features') +print('amazon-review-industrial-scientific: Amazon Review | Industrial and Scientific | Word2Vec applied ~ 300 features') +print('amazon-review-luxury-beauty: Amazon Review | Luxury Beauty | Word2Vec applied ~ 300 features') +print('amazon-review-magazine-subscription: Amazon Review | Magazine Subscription | Word2Vec applied ~ 300 features') \ No newline at end of file diff --git a/AutoEncoder.py b/AutoEncoder.py new file mode 100644 index 0000000..6cb25a7 --- /dev/null +++ b/AutoEncoder.py @@ -0,0 +1,191 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +from NeuralNetwork import NeuralNetwork +from MySingletons import MyDevice + +import numpy as np +import torch + + +class AutoEncoder(NeuralNetwork): + _greedy_layer_bias = None + _greedy_layer_output_bias = None + + @property + def latent_space(self): + return self.layer_value[self.latent_space_position] + + @property + def latent_space_size(self): + return self.layers[self.latent_space_position] + + @property + def latent_space_position(self): + return int((len(self.layers) - 1) / 2) + + def __init__(self, layers=[]): + NeuralNetwork.__init__(self, layers) + for i in range(self.number_hidden_layers): + self.activation_function[i] = self.ACTIVATION_FUNCTION_SIGMOID + self.output_activation_function = self.ACTIVATION_FUNCTION_SIGMOID + self.loss_function = self.LOSS_FUNCTION_MSE + + def train(self, x: torch.tensor, is_tied_weight: bool = False, noise_ratio: float = 0.0, weight_number: int = None, y: torch.tensor = None): + if is_tied_weight: + for i in range(int(self.number_hidden_layers/2)): + if i == 0: + self.output_weight = self.weight[i].T + else: + self.weight[-i] = self.weight[i].T + + if y is None: + y = x + NeuralNetwork.train(self, x=self.masking_noise(x=x, noise_ratio=noise_ratio), y=y, weight_no=weight_number) + + def test(self, x: torch.tensor, is_beta_updatable: bool = False, y: torch.tensor = None): + if y is None: + y = x + return NeuralNetwork.test(self, x=x, y=y, is_beta_updatable=is_beta_updatable) + + def grow_node(self, layer_number): + NeuralNetwork.grow_node(self, layer_number) + self.grow_greedy_layer_bias(layer_number) + + def prune_node(self, layer_number, node_number): + NeuralNetwork.prune_node(self, layer_number, node_number) + self.prune_greedy_layer_bias(layer_number, node_number) + + def grow_greedy_layer_bias(self, layer_number): + b = layer_number + if b is self.number_hidden_layers: + [n_out, n_in] = self._greedy_layer_output_bias.shape + self._greedy_layer_output_bias = torch.cat((self._greedy_layer_output_bias, self.xavier_weight_initialization(1, 1)), axis=1) + else: + [n_out, n_in] = self._greedy_layer_bias[b].shape + n_in = n_in + 1 + self._greedy_layer_bias[b] = np.append(self._greedy_layer_bias[b], self.xavier_weight_initialization(n_out, n_in, shape=(n_out, 1))) + + def grow_layer(self, option, number_of_nodes): + raise TypeError('Not implemented') + + def prune_greedy_layer_bias(self, layer_number, node_number): + def remove_nth_element(greedy_bias_tensor, n): + bias_tensor = torch.cat([greedy_bias_tensor[0][:n], greedy_bias_tensor[0][n + 1:]]) + return bias_tensor.view(1, bias_tensor.shape[0]) + + b = layer_number # readability + n = node_number # readability + + if b is self.number_hidden_layers: + self._greedy_layer_output_bias = remove_nth_element(self._greedy_layer_output_bias, n) + else: + self._greedy_layer_bias[b] = remove_nth_element(self._greedy_layer_bias[b], n) + + def greedy_layer_wise_pretrain(self, x: torch.tensor, number_epochs: int = 1, is_tied_weight: bool = False, + noise_ratio: float = 0.0): + for i in range(len(self.layers) - 1): + if i > self.number_hidden_layers: + nn = NeuralNetwork([self.layers[i], self.layers[-1], self.layers[i]], init_weights=False) + else: + nn = NeuralNetwork([self.layers[i], self.layers[i + 1], self.layers[i]], init_weights=False) + + nn.activation_function[0] = nn.ACTIVATION_FUNCTION_SIGMOID + nn.output_activation_function = nn.ACTIVATION_FUNCTION_SIGMOID + nn.loss_function = nn.LOSS_FUNCTION_MSE + nn.momentum_rate = 0 + + if i >= self.number_hidden_layers: + nn.weight[0] = self.output_weight.clone() + nn.bias[0] = self.output_bias.clone() + nn.output_weight = self.output_weight.T.clone() + if self._greedy_layer_output_bias is None: + nodes_after = nn.layers[-1] + + self._greedy_layer_output_bias = self.xavier_weight_initialization(1, nodes_after) + nn.output_bias = self._greedy_layer_output_bias.clone() + else: + nn.weight[0] = self.weight[i].clone() + nn.bias[0] = self.bias[i].clone() + nn.output_weight = self.weight[i].T.clone() + try: + nn.output_bias = self._greedy_layer_bias[i].detach() + except (TypeError, IndexError): + nodes_after = nn.layers[-1] + + if self._greedy_layer_bias is None: + self._greedy_layer_bias = [] + + self._greedy_layer_bias.append(self.xavier_weight_initialization(1, nodes_after)) + nn.output_bias = self._greedy_layer_bias[i].clone() + + for j in range(0, number_epochs): + training_x = self.forward_pass(x=x).layer_value[i].detach() + nn.train(x=self.masking_noise(x=training_x, noise_ratio=noise_ratio), y=training_x) + + if i >= self.number_hidden_layers: + self.output_weight = nn.weight[0].clone() + self.output_bias = nn.bias[0].clone() + else: + self.weight[i] = nn.weight[0].clone() + self.bias[i] = nn.bias[0].clone() + + def update_weights_kullback_leibler(self, Xs, Xt, gamma=0.0001): + loss = NeuralNetwork.update_weights_kullback_leibler(self, Xs, Xs, Xt, Xt, gamma) + return loss + + def compute_evaluation_window(self, x): + raise TypeError('Not implemented') + + def compute_bias(self, y): + return torch.mean((self.Ey.T - y) ** 2) + + @property + def network_variance(self): + return torch.mean(self.Ey2 - self.Ey ** 2) + + +class DenoisingAutoEncoder(AutoEncoder): + def __init__(self, layers=[]): + AutoEncoder.__init__(self, layers) + # FIXME: The lines below are just to build the greedy_layer_bias. Find a more intuitive way to perform it + random_x = np.random.rand(layers[0]) + random_x = torch.tensor(np.atleast_2d(random_x), dtype=torch.float, device=MyDevice().get()) + self.greedy_layer_wise_pretrain(x=random_x, number_epochs=0) + + def train(self, x: torch.tensor, noise_ratio: float = 0.0, is_tied_weight: bool = False, weight_number: int = None, y: torch.tensor = None): + AutoEncoder.train(self, x=x, noise_ratio=noise_ratio, is_tied_weight=is_tied_weight, weight_number=weight_number, y=y) + + def greedy_layer_wise_pretrain(self, x: torch.tensor, number_epochs: int = 1, is_tied_weight: bool = False, noise_ratio: float = 0.0, y: torch.tensor = None): + AutoEncoder.greedy_layer_wise_pretrain(self, x=x, number_epochs=number_epochs, is_tied_weight=is_tied_weight, noise_ratio=noise_ratio) \ No newline at end of file diff --git a/ElasticNodes.py b/ElasticNodes.py new file mode 100644 index 0000000..e5c444a --- /dev/null +++ b/ElasticNodes.py @@ -0,0 +1,94 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +import numpy as np + + +class ElasticNodes: + growable = None + prunable = None + + data_mean = 0 + data_standard_deviation = 0 + data_variance = 0 + + number_samples_feed = 0 + number_samples_layer = None + + bias_mean = None + bias_variance = None + bias_standard_deviation = None + minimum_bias_mean = None + minimum_bias_standard_deviation = None + bias = None + + var_mean = None + var_variance = None + var_standard_deviation = None + minimum_var_mean = None + minimum_var_standard_deviation = None + var = None + + node_evolution = None + + bias_gradient = None + bias_mean_net = None + var_mean_net = None + + def __init__(self, number_hidden_layers=1): + nhl = number_hidden_layers # readability + + self.number_samples_layer = np.zeros(nhl) + self.bias_mean = np.zeros(nhl) + self.bias_variance = np.zeros(nhl) + self.bias_standard_deviation = np.zeros(nhl) + self.minimum_bias_mean = np.ones(nhl) * np.inf + self.minimum_bias_standard_deviation = np.ones(nhl) * np.inf + self.BIAS = [] + + self.var_mean = np.zeros(nhl) + self.var_variance = np.zeros(nhl) + self.var_standard_deviation = np.zeros(nhl) + self.minimum_var_mean = np.ones(nhl) * np.inf + self.minimum_var_standard_deviation = np.ones(nhl) * np.inf + self.VAR = [] + + self.growable = np.ones(nhl) * False + self.prunable = [] + + for i in range(nhl): + self.prunable.append([-1]) + + + diff --git a/LICENSE b/LICENSE index 0e525e2..dc89c1e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,33 @@ -MIT License - -Copyright (c) 2021 ACDC-paper-double-review - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. \ No newline at end of file diff --git a/MySingletons.py b/MySingletons.py new file mode 100644 index 0000000..f80946f --- /dev/null +++ b/MySingletons.py @@ -0,0 +1,120 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +import torch +import gensim.downloader as gensim_downloader + + +class MyWord2Vec: + def get(self): + return Word2Vec.instance().word2vec + + def set(self, word2vec): + Word2Vec.instance().word2vec = word2vec + + +class Word2Vec: + class __Word2Vec: + def __init__(self, word2vec=None): + if word2vec: + self.word2vec = word2vec + else: + print('Downloading (if needed) and setting Word2Vec Google-News-300 from gensim') + print('This might take a while. Be patient...') + self.word2vec = gensim_downloader.load('word2vec-google-news-300') + print('Done!') + + def __str__(self): + return repr(self) + self.word2vec + + _instance = None + __instance = None + + def __init__(self): + raise RuntimeError('Call instance() instead') + + @classmethod + def instance(cls, word2vec=None): + if cls._instance is None: + cls._instance = cls.__new__(cls) + if word2vec is None: + cls.__instance = Word2Vec.__Word2Vec() + else: + cls.__instance = Word2Vec.__Word2Vec(word2vec) + return cls._instance + + def __getattr__(self, name): + return getattr(self.__instance, name) + + +class MyDevice: + def get(self): + return TorchDevice.instance().device + + def set(self, is_gpu=True): + if is_gpu: + TorchDevice.instance().device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + else: + TorchDevice.instance().device = torch.device("cpu") + + +class TorchDevice: + class __TorchDevice: + def __init__(self, device: torch.device = None): + if device: + self.device = device + else: + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + def __str__(self): + return repr(self) + self.device + + _instance = None + __instance = None + + def __init__(self): + raise RuntimeError('Call instance() instead') + + @classmethod + def instance(cls, device: torch.device = None): + if cls._instance is None: + cls._instance = cls.__new__(cls) + if device is None: + cls.__instance = TorchDevice.__TorchDevice() + else: + cls.__instance = TorchDevice.__TorchDevice(device) + return cls._instance + + def __getattr__(self, name): + return getattr(self.__instance, name) \ No newline at end of file diff --git a/MyUtil.py b/MyUtil.py new file mode 100644 index 0000000..1c7b079 --- /dev/null +++ b/MyUtil.py @@ -0,0 +1,64 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +import math +import torch + + +class MyUtil: + def __init__(self): + pass + + @staticmethod + def recursive_mean_standard_deviation(x, old_mean, old_variance, number_samples): + mean = old_mean + (x - old_mean) / number_samples + var = old_variance + (x - old_mean) * (x - mean) + return mean, var, torch.sqrt(var/number_samples) + + @staticmethod + def probit(mean, standard_deviation): + p = (1 + math.pi * (standard_deviation ** 2) / 8) + return mean / torch.sqrt(p) + + @staticmethod + def norm_1(x): + return torch.norm(x, 1) + + @staticmethod + def norm_2(x): + return torch.norm(x, 2) + + @staticmethod + def frobenius_norm(x): + return torch.norm(x, 'fro') \ No newline at end of file diff --git a/NeuralNetwork.py b/NeuralNetwork.py new file mode 100644 index 0000000..e180407 --- /dev/null +++ b/NeuralNetwork.py @@ -0,0 +1,579 @@ +# Marcus Vinicius Sousa Leite de Carvalho +# marcus.decarvalho@ntu.edu.sg +# ivsucram@gmail.com +# +# NANYANG TECHNOLOGICAL UNIVERSITY - NTUITIVE PTE LTD Dual License Agreement +# Non-Commercial Use Only +# This NTUITIVE License Agreement, including all exhibits ("NTUITIVE-LA") is a legal agreement between you and NTUITIVE (or “we”) located at 71 Nanyang Drive, NTU Innovation Centre, #01-109, Singapore 637722, a wholly owned subsidiary of Nanyang Technological University (“NTU”) for the software or data identified above, which may include source code, and any associated materials, text or speech files, associated media and "online" or electronic documentation and any updates we provide in our discretion (together, the "Software"). +# +# By installing, copying, or otherwise using this Software, found at https://github.com/Ivsucram/ATL_Matlab, you agree to be bound by the terms of this NTUITIVE-LA. If you do not agree, do not install copy or use the Software. The Software is protected by copyright and other intellectual property laws and is licensed, not sold. If you wish to obtain a commercial royalty bearing license to this software please contact us at marcus.decarvalho@ntu.edu.sg. +# +# SCOPE OF RIGHTS: +# You may use, copy, reproduce, and distribute this Software for any non-commercial purpose, subject to the restrictions in this NTUITIVE-LA. Some purposes which can be non-commercial are teaching, academic research, public demonstrations and personal experimentation. You may also distribute this Software with books or other teaching materials, or publish the Software on websites, that are intended to teach the use of the Software for academic or other non-commercial purposes. +# You may not use or distribute this Software or any derivative works in any form for commercial purposes. Examples of commercial purposes would be running business operations, licensing, leasing, or selling the Software, distributing the Software for use with commercial products, using the Software in the creation or use of commercial products or any other activity which purpose is to procure a commercial gain to you or others. +# If the Software includes source code or data, you may create derivative works of such portions of the Software and distribute the modified Software for non-commercial purposes, as provided herein. +# If you distribute the Software or any derivative works of the Software, you will distribute them under the same terms and conditions as in this license, and you will not grant other rights to the Software or derivative works that are different from those provided by this NTUITIVE-LA. +# If you have created derivative works of the Software, and distribute such derivative works, you will cause the modified files to carry prominent notices so that recipients know that they are not receiving the original Software. Such notices must state: (i) that you have changed the Software; and (ii) the date of any changes. +# +# You may not distribute this Software or any derivative works. +# In return, we simply require that you agree: +# 1. That you will not remove any copyright or other notices from the Software. +# 2. That if any of the Software is in binary format, you will not attempt to modify such portions of the Software, or to reverse engineer or decompile them, except and only to the extent authorized by applicable law. +# 3. That NTUITIVE is granted back, without any restrictions or limitations, a non-exclusive, perpetual, irrevocable, royalty-free, assignable and sub-licensable license, to reproduce, publicly perform or display, install, use, modify, post, distribute, make and have made, sell and transfer your modifications to and/or derivative works of the Software source code or data, for any purpose. +# 4. That any feedback about the Software provided by you to us is voluntarily given, and NTUITIVE shall be free to use the feedback as it sees fit without obligation or restriction of any kind, even if the feedback is designated by you as confidential. +# 5. THAT THE SOFTWARE COMES "AS IS", WITH NO WARRANTIES. THIS MEANS NO EXPRESS, IMPLIED OR STATUTORY WARRANTY, INCLUDING WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, ANY WARRANTY AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE SOFTWARE OR ANY WARRANTY OF TITLE OR NON-INFRINGEMENT. THERE IS NO WARRANTY THAT THIS SOFTWARE WILL FULFILL ANY OF YOUR PARTICULAR PURPOSES OR NEEDS. ALSO, YOU MUST PASS THIS DISCLAIMER ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 6. THAT NEITHER NTUITIVE NOR NTU NOR ANY CONTRIBUTOR TO THE SOFTWARE WILL BE LIABLE FOR ANY DAMAGES RELATED TO THE SOFTWARE OR THIS NTUITIVE-LA, INCLUDING DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL OR INCIDENTAL DAMAGES, TO THE MAXIMUM EXTENT THE LAW PERMITS, NO MATTER WHAT LEGAL THEORY IT IS BASED ON. ALSO, YOU MUST PASS THIS LIMITATION OF LIABILITY ON WHENEVER YOU DISTRIBUTE THE SOFTWARE OR DERIVATIVE WORKS. +# 7. That we have no duty of reasonable care or lack of negligence, and we are not obligated to (and will not) provide technical support for the Software. +# 8. That if you breach this NTUITIVE-LA or if you sue anyone over patents that you think may apply to or read on the Software or anyone's use of the Software, this NTUITIVE-LA (and your license and rights obtained herein) terminate automatically. Upon any such termination, you shall destroy all of your copies of the Software immediately. Sections 3, 4, 5, 6, 7, 8, 11 and 12 of this NTUITIVE-LA shall survive any termination of this NTUITIVE-LA. +# 9. That the patent rights, if any, granted to you in this NTUITIVE-LA only apply to the Software, not to any derivative works you make. +# 10. That the Software may be subject to U.S. export jurisdiction at the time it is licensed to you, and it may be subject to additional export or import laws in other places. You agree to comply with all such laws and regulations that may apply to the Software after delivery of the software to you. +# 11. That all rights not expressly granted to you in this NTUITIVE-LA are reserved. +# 12. That this NTUITIVE-LA shall be construed and controlled by the laws of the Republic of Singapore without regard to conflicts of law. If any provision of this NTUITIVE-LA shall be deemed unenforceable or contrary to law, the rest of this NTUITIVE-LA shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. +# +# Copyright (c) NTUITIVE. All rights reserved. + +from MyUtil import MyUtil as MyUtil +from ElasticNodes import ElasticNodes +from MySingletons import MyDevice + +import numpy as np +import torch + + +# class ReverseLayerFunction(torch.autograd.Function): +# @staticmethod +# def forward(self, x, alpha=1.0): +# self.alpha = alpha +# +# return x.view_as(x) +# +# @staticmethod +# def backward(self, grad_output): +# output = grad_output.neg() * self.alpha +# +# return output, None + + +class NeuralNetwork(ElasticNodes): + layers = None + layer_value = None + output_layer_value = None + + weight = None + bias = None + momentum = None + bias_momentum = None + + output_weight = None + output_bias = None + output_momentum = None + output_bias_momentum = None + + activation_function = None + output_activation_function = None + loss_function = None + + learning_rate = 0.01 + momentum_rate = 0.95 + + error_value = None + loss_value = None + classification_rate = None + misclassified = None + + output_beta = None + output_beta_decreasing_factor = None + + __Eh = None + __Eh2 = None + + @property + def number_hidden_layers(self): + return len(self.layers) - 2 + + @property + def input_size(self): + return self.layers[0] + + @property + def output_size(self): + return self.layers[-1] + + @property + def output(self): + return self.output_layer_value + + @property + def raw_output(self): + return torch.max(self.output, axis=1) + + @property + def outputed_classes(self): + return torch.argmax(self.output, axis=1) + + @property + def residual_error(self): + return 1 - self.raw_output.values + + ACTIVATION_FUNCTION_AFFINE = 1 + ACTIVATION_FUNCTION_SIGMOID = ACTIVATION_FUNCTION_AFFINE + 1 + ACTIVATION_FUNCTION_TANH = ACTIVATION_FUNCTION_SIGMOID + 1 + ACTIVATION_FUNCTION_RELU = ACTIVATION_FUNCTION_TANH + 1 + ACTIVATION_FUNCTION_LINEAR = ACTIVATION_FUNCTION_RELU + 1 + ACTIVATION_FUNCTION_SOFTMAX = ACTIVATION_FUNCTION_LINEAR + 1 + ACTIVATION_FUNCTION_REVERSE_LAYER = ACTIVATION_FUNCTION_SOFTMAX + 1 + + LOSS_FUNCTION_MSE = ACTIVATION_FUNCTION_REVERSE_LAYER + 1 + LOSS_FUNCTION_CROSS_ENTROPY = LOSS_FUNCTION_MSE + 1 + + PRUNE_NODE_STRATEGY_SINGLE = LOSS_FUNCTION_CROSS_ENTROPY + 1 + PRUNE_NODE_STRATEGY_MULTIPLE = PRUNE_NODE_STRATEGY_SINGLE + 1 + + def __init__(self, layers: list, init_weights: bool = True): + self.layers = layers + + self.weight = [] + self.bias = [] + self.momentum = [] + self.bias_momentum = [] + self.activation_function = [] + + for i in range(self.number_hidden_layers): + nodes_before = layers[i] + nodes_after = layers[i + 1] + + if init_weights: + self.weight.append(self.xavier_weight_initialization(nodes_after, nodes_before)) + self.bias.append(self.xavier_weight_initialization(1, nodes_after)) + self.momentum.append(torch.zeros(self.weight[i].shape, dtype=torch.float, device=MyDevice().get())) + self.bias_momentum.append(torch.zeros(self.bias[i].shape, dtype=torch.float, device=MyDevice().get())) + else: + self.weight.append(None) + self.bias.append(None) + self.momentum.append(None) + self.bias_momentum.append(None) + self.momentum_rate = 0 + + self.activation_function.append(self.ACTIVATION_FUNCTION_SIGMOID) + + if init_weights: + nodes_before = layers[-2] + nodes_after = layers[-1] + + self.output_weight = self.xavier_weight_initialization(nodes_after, nodes_before) + self.output_bias = self.xavier_weight_initialization(1, nodes_after) + self.output_momentum = torch.zeros(self.output_weight.shape, dtype=torch.float, device=MyDevice().get()) + self.output_bias_momentum = torch.zeros(self.output_bias.shape, dtype=torch.float, device=MyDevice().get()) + else: + self.output_weight = None + self.output_bias = None + self.output_momentum = None + self.output_bias_momentum = None + self.momentum_rate = 0 + + self.output_activation_function = self.ACTIVATION_FUNCTION_SOFTMAX + self.loss_function = self.LOSS_FUNCTION_CROSS_ENTROPY + + ElasticNodes.__init__(self, len(self.layers)) + + ##### Weight initializations ##### + + def xavier_weight_initialization(self, n_out: int, n_in: int, uniform: bool = False): + if uniform: + return torch.nn.init.xavier_uniform(tensor=torch.zeros(int(n_out), int(n_in), dtype=torch.float, + requires_grad=True, device=MyDevice().get())) + return torch.nn.init.xavier_normal_(tensor=torch.zeros(int(n_out), int(n_in), dtype=torch.float, + requires_grad=True, device=MyDevice().get())) + + def he_weight_initialization(self, n_out, n_in, shape=None): + #TODO + mean = 0.0 + sigma = np.sqrt(2 / n_in) + if shape is None: + shape = (n_out, n_in) + return np.random.normal(mean, sigma, shape) + + ##### Noise ##### + + def masking_noise(self, x: torch.tensor, noise_ratio: float = 0.0): + return x.detach().masked_fill(torch.rand(x.shape, device=MyDevice().get()) <= noise_ratio, 0) + + ##### Activation functions ##### + + @staticmethod + def sigmoid(z: torch.tensor): + return torch.sigmoid(z) + + @staticmethod + def tanh(z): + return torch.tanh(z) + + @staticmethod + def relu(z): + return torch.nn.functional.relu(z) + + @staticmethod + def linear(layer_value: torch.tensor, weight: torch.tensor, bias: torch.tensor): + return torch.nn.functional.linear(layer_value, weight, bias) + + @staticmethod + def softmax(z, axis: int = 1): + return torch.nn.functional.softmax(z, dim=axis) + + def reset_grad(self): + for i in range(self.number_hidden_layers): + self.weight[i] = self.weight[i].detach() + self.bias[i] = self.bias[i].detach() + self.weight[i].requires_grad = True + self.bias[i].requires_grad = True + + self.output_weight = self.output_weight.detach() + self.output_bias = self.output_bias.detach() + self.output_weight.requires_grad = True + self.output_bias.requires_grad = True + + def feedforward(self, x: torch.Tensor, y: torch.Tensor, train: bool = False): + return self.forward_pass(x, train=train).calculate_error(y) + + def backpropagate(self): + self.loss_value.backward() + + return self + + def test(self, x: torch.Tensor, y: torch.Tensor, is_beta_updatable: bool = False): + self.feedforward(x=x, y=y) + + m = y.shape[0] + + true_classes = torch.argmax(y, axis=1) + self.misclassified = torch.sum(torch.ne(self.outputed_classes, true_classes)).item() + self.classification_rate = 1 - self.misclassified / m + + if is_beta_updatable: + class_label = self.output_layer_value.max(axis=2) + for i in range(m): + if self.true_classes[i] == class_label[i]: + self.output_beta = np.max(self.output_beta * self.output_beta_decreasing_factor, 0) + self.output_beta_decreasing_factor = np.max(self.output_beta_decreasing_factor - 0.01, 0) + else: + self.output_beta = max(self.output_beta * (1 + self.output_beta_decreasing_factor), 1) + self.output_beta_decreasing_factor = max(self.output_beta_decreasing_factor + 0.01, 1) + + return self + + def train(self, x: torch.Tensor, y: torch.Tensor, weight_no: int = None, is_neg_grad: bool = False): + self.feedforward(x=x, y=y, train=True).backpropagate() + + if weight_no is None: + for weight_no in range(self.number_hidden_layers, -1, -1): + self.update_weight(weight_no=weight_no, is_neg_grad=is_neg_grad) + else: + self.update_weight(weight_no=weight_no, is_neg_grad=is_neg_grad) + + def update_weight(self, weight_no: int, is_neg_grad: bool = False): + if weight_no >= self.number_hidden_layers: + dW: torch.Tensor = self.learning_rate * self.output_weight.grad + db: torch.Tensor = self.learning_rate * self.output_bias.grad + if self.momentum_rate > 0: + self.output_momentum: torch.Tensor = self.momentum_rate * self.output_momentum + dW + self.output_bias_momentum: torch.Tensor = self.momentum_rate * self.output_bias_momentum + db + dW: torch.Tensor = self.output_momentum + db: torch.Tensor = self.output_bias_momentum + if is_neg_grad: + self.output_weight: torch.Tensor = self.output_weight - dW.neg() + self.output_bias: torch.Tensor = self.output_bias - db.neg() + else: + self.output_weight: torch.Tensor = self.output_weight - dW + self.output_bias: torch.Tensor = self.output_bias - db + else: + dW: torch.Tensor = self.learning_rate * self.weight[weight_no].grad + db: torch.Tensor = self.learning_rate * self.bias[weight_no].grad + if self.momentum_rate > 0: + self.momentum[weight_no]: torch.Tensor = self.momentum_rate * self.momentum[weight_no] + dW + self.bias_momentum[weight_no]: torch.Tensor = self.momentum_rate * self.bias_momentum[weight_no] + db + dW: torch.Tensor = self.momentum[weight_no] + db: torch.Tensor = self.bias_momentum[weight_no] + if is_neg_grad: + self.weight[weight_no]: torch.Tensor = self.weight[weight_no] - dW.neg() + self.bias[weight_no]: torch.Tensor = self.bias[weight_no] - db.neg() + else: + self.weight[weight_no]: torch.Tensor = self.weight[weight_no] - dW + self.bias[weight_no]: torch.Tensor = self.bias[weight_no] - db + + def forward_pass(self, x: torch.Tensor, train: bool = False): + if train: + self.reset_grad() + self.layer_value = [] + self.layer_value.append(x) + + for i in range(self.number_hidden_layers): + if self.activation_function[i] == self.ACTIVATION_FUNCTION_AFFINE: + self.layer_value.append(self.linear(self.layer_value[i], self.weight[i], self.bias[i])) + elif self.activation_function[i] == self.ACTIVATION_FUNCTION_SIGMOID: + self.layer_value.append(self.sigmoid(self.linear(self.layer_value[i], self.weight[i], self.bias[i]))) + elif self.activation_function[i] == self.ACTIVATION_FUNCTION_TANH: + self.layer_value.append(self.tanh(self.linear(self.layer_value[i], self.weight[i], self.bias[i]))) + elif self.activation_function[i] == self.ACTIVATION_FUNCTION_RELU: + self.layer_value.append(self.relu(self.linear(self.layer_value[i], self.weight[i], self.bias[i]))) + elif self.activation_function[i] == self.ACTIVATION_FUNCTION_LINEAR: + raise TypeError('Not implemented') + elif self.activation_function[i] == self.ACTIVATION_FUNCTION_SOFTMAX: + self.layer_value.append(self.softmax(self.linear(self.layer_value[i], self.weight[i], self.bias[i]))) + elif self.activation_function[i] == self.ACTIVATION_FUNCTION_REVERSE_LAYER: + self.layer_value.append(self.reverse_layer(self.layer_value[i])) + + if self.output_activation_function == self.ACTIVATION_FUNCTION_AFFINE: + self.output_layer_value = self.linear(self.layer_value[-1], self.output_weight, self.output_bias) + elif self.output_activation_function == self.ACTIVATION_FUNCTION_SIGMOID: + self.output_layer_value = self.sigmoid(self.linear(self.layer_value[-1], self.output_weight, self.output_bias)) + elif self.output_activation_function == self.ACTIVATION_FUNCTION_TANH: + self.output_layer_value = self.tanh(self.linear(self.layer_value[-1], self.output_weight, self.output_bias)) + elif self.output_activation_function == self.ACTIVATION_FUNCTION_RELU: + self.output_layer_value = self.relu(self.linear(self.layer_value[-1], self.output_weight, self.output_bias)) + elif self.output_activation_function == self.ACTIVATION_FUNCTION_SOFTMAX: + self.output_layer_value = self.softmax(self.linear(self.layer_value[-1], self.output_weight, self.output_bias), axis=1) + elif self.output_activation_function == self.ACTIVATION_FUNCTION_REVERSE_LAYER: + self.output_layer_value = self.reverse_layer(self.layer_value[-1]) + + return self + + def calculate_error(self, y: torch.tensor): + self.error_value = y - self.output_layer_value + + if self.loss_function == self.LOSS_FUNCTION_MSE: + self.loss_value = torch.nn.functional.mse_loss(self.output_layer_value, y) + elif self.loss_function == self.LOSS_FUNCTION_CROSS_ENTROPY: + self.loss_value = torch.nn.functional.cross_entropy(self.output_layer_value, torch.argmax(y, 1)) + + return self + + def compute_expected_values(self, in_place: bool = False): + self.data_mean, self.data_variance, self.data_standard_deviation = \ + MyUtil.recursive_mean_standard_deviation(self.layer_value[0], + self.data_mean, + self.data_variance, + self.number_samples_feed) + + self.Eh, self.Eh2 = self.compute_inbound_expected_values() + + def compute_inbound_expected_values(self, number_hidden_layer: int = None): + nhl = number_hidden_layer # readability + if nhl is None: + nhl = self.number_hidden_layers - 1 + + if nhl == 0: + inference, center, std = (1, self.data_mean, self.data_standard_deviation) + py = MyUtil.probit(center, std) + Eh = inference * self.sigmoid(self.linear(self.weight[0], py, self.bias[0].T)) + else: + Eh, _ = self.compute_inbound_expected_values(number_hidden_layer=nhl - 1) + weight, bias = (self.weight[nhl], self.bias[nhl]) if nhl < self.number_hidden_layers + 1 else (self.output_weight, self.output_bias) + Eh = self.sigmoid(self.linear(weight, Eh.T, bias.T)) + + return Eh, Eh ** 2 + + @property + def Eh(self): + return self.__Eh + + @Eh.setter + def Eh(self, value: torch.tensor): + self.__Eh = value + + @property + def Eh2(self): + return self.__Eh2 + + @Eh2.setter + def Eh2(self, value: torch.tensor): + self.__Eh2 = value + + @property + def Ey(self): + return self.softmax(self.linear(self.output_weight, self.Eh.T, self.output_bias.T), axis=0) + + @property + def Ey2(self): + return self.softmax(self.linear(self.output_weight, self.Eh2.T, self.output_bias.T), axis=0) + + @property + def network_variance(self): + return MyUtil.frobenius_norm(self.Ey2 - self.Ey ** 2) + + def compute_bias(self, y): + return MyUtil.frobenius_norm((self.Ey.T - y) ** 2) + + def width_adaptation_stepwise(self, y, prune_strategy: int = None): + if prune_strategy is None: + prune_strategy = self.PRUNE_NODE_STRATEGY_MULTIPLE + + nhl: int = self.number_hidden_layers + + self.number_samples_feed = self.number_samples_feed + 1 + self.number_samples_layer[nhl] = self.number_samples_layer[nhl] + 1 + self.compute_expected_values() + + self.bias_mean[nhl], self.bias_variance[nhl], self.bias_standard_deviation[nhl] = \ + MyUtil.recursive_mean_standard_deviation(self.compute_bias(y), + self.bias_mean[nhl], + self.bias_variance[nhl], + self.number_samples_feed) + + self.var_mean[nhl], self.var_variance[nhl], self.var_standard_deviation[nhl] = \ + MyUtil.recursive_mean_standard_deviation(self.network_variance, + self.var_mean[nhl], + self.var_variance[nhl], + self.number_samples_feed) + + if self.number_samples_layer[nhl] <= 1 or self.growable[nhl]: + self.minimum_bias_mean[nhl] = self.bias_mean[nhl] + self.minimum_bias_standard_deviation[nhl] = self.bias_standard_deviation[nhl] + else: + self.minimum_bias_mean[nhl] = np.min([self.minimum_bias_mean[nhl], self.bias_mean[nhl]]) + self.minimum_bias_standard_deviation[nhl] = np.min([self.minimum_bias_standard_deviation[nhl], self.bias_standard_deviation[nhl]]) + + if self.number_samples_layer[nhl] <= self.input_size + 1 or self.prunable[nhl][0] != -1: + self.minimum_var_mean[nhl] = self.var_mean[nhl] + self.minimum_var_standard_deviation[nhl] = self.var_standard_deviation[nhl] + else: + self.minimum_var_mean[nhl] = np.min([self.minimum_var_mean[nhl], self.var_mean[nhl]]) + self.minimum_var_standard_deviation[nhl] = np.min([self.minimum_var_standard_deviation[nhl], self.var_standard_deviation[nhl]]) + + self.BIAS.append(self.bias_mean[nhl]) + self.VAR.append(self.var_mean[nhl]) + + if self.output_size == 512: # STL or CIFAR + alpha_1 = 1.45 + alpha_2 = 0.95 + else: + alpha_1 = 1.25 + alpha_2 = 0.75 + + self.growable[nhl] = self.is_growable(self.compute_bias(y), alpha_1, alpha_2) + self.prunable[nhl] = self.is_prunable(prune_strategy, 2 * alpha_1, 2 * alpha_2) + + def is_growable(self, bias: torch.tensor, alpha_1: float = 1.25, alpha_2: float = 0.75): + nhl = self.number_hidden_layers # readability + + current = self.bias_mean[nhl] + self.bias_standard_deviation[nhl] + biased_min = self.minimum_bias_mean[nhl] \ + + (alpha_1 * torch.exp(-bias) + alpha_2) * self.minimum_bias_standard_deviation[nhl] + + if self.number_samples_layer[nhl] > 1 and current >= biased_min: + return True + return False + + def is_prunable(self, prune_strategy: int = None, alpha_1: float = 2.5, alpha_2: float = 1.5): + if prune_strategy is None: + prune_strategy = self.PRUNE_NODE_STRATEGY_MULTIPLE + nhl = self.number_hidden_layers # readability + + current = self.var_mean[nhl] + self.var_standard_deviation[nhl] + biased_min = self.minimum_var_mean[nhl] \ + + (alpha_1 * torch.exp(-self.network_variance) + alpha_2) * self.minimum_var_standard_deviation[nhl] + + if not self.growable[nhl] \ + and self.layers[nhl] > 1 \ + and self.number_samples_layer[nhl] > self.input_size + 1 \ + and current >= biased_min: + + if prune_strategy == self.PRUNE_NODE_STRATEGY_SINGLE: + return torch.argmin(self.Eh) + elif prune_strategy == self.PRUNE_NODE_STRATEGY_MULTIPLE: + nodes_to_prune = torch.where(self.Eh < torch.abs(torch.mean(self.Eh) - torch.var(self.Eh))) + if len(nodes_to_prune[0]): + return nodes_to_prune[0] + else: + return torch.argmin(self.Eh) + + return [-1] + + def grow_node(self, layer_number: int): + self.layers[layer_number] += 1 + if layer_number >= 0: + self.grow_weight_row(layer_number - 1) + self.grow_bias(layer_number - 1) + if layer_number <= self.number_hidden_layers: + self.grow_weight_column(layer_number) + + def grow_weight_row(self, layer_number: int): + def add_element(tensor_data: torch.tensor, momentum_tensor_data: torch.tensor, n_out: int): + tensor_data = torch.cat((tensor_data, self.xavier_weight_initialization(1, n_out)), axis=0) + momentum_tensor_data = torch.cat((momentum_tensor_data, torch.zeros(1, n_out, dtype=torch.float, device=MyDevice().get())), axis=0) + return tensor_data, momentum_tensor_data + + if layer_number >= len(self.weight): + [_, n_out] = self.output_weight.shape + self.output_weight, self.output_momentum = add_element(self.output_weight, self.output_momentum, n_out) + else: + [_, n_out] = self.weight[layer_number].shape + self.weight[layer_number], self.momentum[layer_number] = add_element(self.weight[layer_number], self.momentum[layer_number], n_out) + + def grow_weight_column(self, layer_number: int): + def add_element(tensor_data: torch.tensor, momentum_tensor_data: torch.tensor, n_out: int): + tensor_data = torch.cat((tensor_data, self.xavier_weight_initialization(n_out, 1)), axis=1) + momentum_tensor_data = torch.cat((momentum_tensor_data, torch.zeros(n_out, 1, dtype=torch.float, device=MyDevice().get())), axis=1) + return tensor_data, momentum_tensor_data + + if layer_number >= len(self.weight): + [n_out, _] = self.output_weight.shape + self.output_weight, self.output_momentum = add_element(self.output_weight, self.output_momentum, n_out) + else: + [n_out, _] = self.weight[layer_number].shape + self.weight[layer_number], self.momentum[layer_number] = add_element(self.weight[layer_number], self.momentum[layer_number], n_out) + + def grow_bias(self, layer_number): + def add_element(tensor_data: torch.tensor, momentum_tensor_data: torch.tensor, n_out: int): + tensor_data = torch.cat((tensor_data, self.xavier_weight_initialization(1, n_out)), axis=1) + momentum_tensor_data = torch.cat((momentum_tensor_data, torch.zeros(1, n_out, dtype=torch.float, device=MyDevice().get())), axis=1) + return tensor_data, momentum_tensor_data + + if layer_number >= len(self.bias): + [n_out, _] = self.output_bias.shape + self.output_bias, self.output_bias_momentum = add_element(self.output_bias, self.output_bias_momentum, n_out) + else: + [n_out, _] = self.bias[layer_number].shape + self.bias[layer_number], self.bias_momentum[layer_number] = add_element(self.bias[layer_number], self.bias_momentum[layer_number], n_out) + pass + + def prune_node(self, layer_number: int, node_number: int): + self.layers[layer_number] -= 1 + if layer_number >= 0: + self.prune_weight_row(layer_number - 1, node_number) + self.prune_bias(layer_number - 1, node_number) + if layer_number <= self.number_hidden_layers: + self.prune_weight_column(layer_number, node_number) + + def prune_weight_row(self, layer_number: int, node_number: int): + def remove_nth_row(tensor_data: torch.tensor, n: int): + return torch.cat([tensor_data[:n], tensor_data[n+1:]]) + + if layer_number >= len(self.weight): + self.output_weight = remove_nth_row(self.output_weight, node_number) + self.output_momentum = remove_nth_row(self.output_momentum, node_number) + else: + self.weight[layer_number] = remove_nth_row(self.weight[layer_number], node_number) + self.momentum[layer_number] = remove_nth_row(self.momentum[layer_number], node_number) + + def prune_weight_column(self, layer_number: int, node_number: int): + def remove_nth_column(weight_tensor: torch.tensor, n: int): + return torch.cat([weight_tensor.T[:n], weight_tensor.T[n+1:]]).T + + if layer_number >= len(self.weight): + self.output_weight = remove_nth_column(self.output_weight, node_number) + self.output_momentum = remove_nth_column(self.output_momentum, node_number) + else: + self.weight[layer_number] = remove_nth_column(self.weight[layer_number], node_number) + self.momentum[layer_number] = remove_nth_column(self.momentum[layer_number], node_number) + + def prune_bias(self, layer_number: int, node_number: int): + def remove_nth_element(bias_tensor: torch.tensor, n: int): + bias_tensor = torch.cat([bias_tensor[0][:n], bias_tensor[0][n+1:]]) + return bias_tensor.view(1, bias_tensor.shape[0]) + + if layer_number >= len(self.bias): + self.output_bias = remove_nth_element(self.output_bias, node_number) + self.output_bias_momentum = remove_nth_element(self.output_bias_momentum, node_number) + else: + self.bias[layer_number] = remove_nth_element(self.bias[layer_number], node_number) + self.bias_momentum[layer_number] = remove_nth_element(self.bias_momentum[layer_number], node_number) \ No newline at end of file