FeatureTools TypeError: unhashable type: 'set'

Question

我正在为 featuretools 尝试此代码：

features, feature_names = ft.dfs(entityset = es, target_entity = 'demo', 
          agg_primitives = ['count', 'max', 'time_since_first', 'median', 'time_since_last', 'avg_time_between',
                            'sum', 'mean'],
          trans_primitives = ['is_weekend', 'year', 'week', 'divide_by_feature', 'percentile'])

但是我遇到了这个错误

TypeError                                 Traceback (most recent call last)
<ipython-input-17-89e925ff895d> in <module>
      3           agg_primitives = ['count', 'max', 'time_since_first', 'median', 'time_since_last', 'avg_time_between',
      4                             'sum', 'mean'],
----> 5           trans_primitives = ['is_weekend', 'year', 'week', 'divide_by_feature', 'percentile'])

~/.local/lib/python3.6/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
     44                     ep.on_error(error=e,
     45                                 runtime=runtime)
---> 46                 raise e
     47 
     48             # send return value

~/.local/lib/python3.6/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
     36                 # call function
     37                 start = time.time()
---> 38                 return_value = func(*args, **kwargs)
     39                 runtime = time.time() - start
     40             except Exception as e:

~/.local/lib/python3.6/site-packages/featuretools/synthesis/dfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types)
    226                                                   n_jobs=n_jobs,
    227                                                   dask_kwargs=dask_kwargs,
--> 228                                                   verbose=verbose)
    229     return feature_matrix, features

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calculate_feature_matrix(features, entityset, cutoff_time, instance_ids, entities, relationships, cutoff_time_in_index, training_window, approximate, save_progress, verbose, chunk_size, n_jobs, dask_kwargs)
    265                                                  cutoff_df_time_var=cutoff_df_time_var,
    266                                                  target_time=target_time,
--> 267                                                  pass_columns=pass_columns)
    268 
    269     feature_matrix = pd.concat(feature_matrix)

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in linear_calculate_chunks(chunks, feature_set, approximate, training_window, verbose, save_progress, entityset, no_unapproximated_aggs, cutoff_df_time_var, target_time, pass_columns)
    496                                           no_unapproximated_aggs,
    497                                           cutoff_df_time_var,
--> 498                                           target_time, pass_columns)
    499         feature_matrix.append(_feature_matrix)
    500         # Do a manual garbage collection in case objects from calculate_chunk

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calculate_chunk(chunk, feature_set, entityset, approximate, training_window, verbose, save_progress, no_unapproximated_aggs, cutoff_df_time_var, target_time, pass_columns)
    341                                            ids,
    342                                            precalculated_features=precalculated_features_trie,
--> 343                                            training_window=window)
    344 
    345             id_name = _feature_matrix.index.name

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/utils.py in wrapped(*args, **kwargs)
     35         def wrapped(*args, **kwargs):
     36             if save_progress is None:
---> 37                 r = method(*args, **kwargs)
     38             else:
     39                 time = args[0].to_pydatetime()

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calc_results(time_last, ids, precalculated_features, training_window)
    316                                               ignored=all_approx_feature_set)
    317 
--> 318             matrix = calculator.run(ids)
    319             return matrix
    320 

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/feature_set_calculator.py in run(self, instance_ids)
    100                                             precalculated_trie=self.precalculated_features,
    101                                             filter_variable=target_entity.index,
--> 102                                             filter_values=instance_ids)
    103 
    104         # The dataframe for the target entity should be stored at the root of

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/feature_set_calculator.py in _calculate_features_for_entity(self, entity_id, feature_trie, df_trie, full_entity_df_trie, precalculated_trie, filter_variable, filter_values, parent_data)
    187                                     columns=columns,
    188                                     time_last=self.time_last,
--> 189                                     training_window=self.training_window)
    190 
    191         # Step 2: Add variables to the dataframe linking it to all ancestors.

~/.local/lib/python3.6/site-packages/featuretools/entityset/entity.py in query_by_values(self, instance_vals, variable_id, columns, time_last, training_window)
    271 
    272         if columns is not None:
--> 273             df = df[columns]
    274 
    275         return df

~/.local/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2686             return self._getitem_multilevel(key)
   2687         else:
-> 2688             return self._getitem_column(key)
   2689 
   2690     def _getitem_column(self, key):

~/.local/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2693         # get column
   2694         if self.columns.is_unique:
-> 2695             return self._get_item_cache(key)
   2696 
   2697         # duplicate columns & possible reduce dimensionality

~/.local/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   2485         """Return the cached item, item represents a label indexer."""
   2486         cache = self._item_cache
-> 2487         res = cache.get(item)
   2488         if res is None:
   2489             values = self._data.get(item)

TypeError: unhashable type: 'set'

我也试过最简单的深度特征合成（dfs）代码如下图，还是遇到同样的错误

features, feature_names = ft.dfs(entityset = es, target_entity = 'demo')

我不太确定为什么会遇到此错误，非常感谢任何有关如何从这里开始的帮助或建议。在此先感谢您的帮助！

Answer 1

我找到了解决方案，我当前的版本中存在错误，已由 FeatureTools 团队修复。只需运行直接从 master 进行 pip 安装，

pip install --upgrade https://github.com/featuretools/featuretools/zipball/master

Answer 2

已修复并已在 Featuretools 0.9.1 中发布。如果您升级到最新版本的 Featuretools，它将消失。

FeatureTools TypeError: unhashable type: 'set'

FeatureTools TypeError: unhashable type: 'set'

feature-engineering

featuretools