Solucionado (ver solução)
Solucionado
(ver solução)
1
resposta

Erro Matriz de Correlação

Olá. Ao inserir o código dados.corr().round(4) me deparo com o seguinte erro:

ValueError Traceback (most recent call last) Cell In[20], line 1 ----> 1 dados.corr().round(4)

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:10054, in DataFrame.corr(self, method, min_periods, numeric_only) 10052 cols = data.columns 10053 idx = cols.copy()

10054 mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False) 10056 if method == "pearson": 10057 correl = libalgos.nancorr(mat, minp=min_periods)

File ~\anaconda3\Lib\site-packages\pandas\core\frame.py:1838, in DataFrame.to_numpy(self, dtype, copy, na_value) 1836 if dtype is not None: 1837 dtype = np.dtype(dtype) -> 1838 result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value) 1839 if result.dtype is not dtype: 1840 result = np.array(result, dtype=dtype, copy=False)

File ~\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:1732, in BlockManager.as_array(self, dtype, copy, na_value) 1730 arr.flags.writeable = False 1731 else: -> 1732 arr = self._interleave(dtype=dtype, na_value=na_value) 1733 # The underlying data was copied within _interleave, so no need 1734 # to further copy if copy=True or setting na_value 1736 if na_value is not lib.no_default:

File ~\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:1794, in BlockManager._interleave(self, dtype, na_value) 1792 else: 1793 arr = blk.get_values(dtype) -> 1794 result[rl.indexer] = arr 1795 itemmask[rl.indexer] = 1 1797 if not itemmask.all():

ValueError: could not convert string to float: '01/01/2015'


Tentei a soluçao apresentado no fórum: dados['data'] = pd.to_datetime(dados['data']) dados['data'] = (dados['data'] - dados['data'].min()).dt.days dados.corr().round(4)

O que resultou em outro erro: ValueError Traceback (most recent call last) Cell In[21], line 1 ----> 1 dados['data'] = pd.to_datetime(dados['data']) 2 dados['data'] = (dados['data'] - dados['data'].min()).dt.days 3 dados.corr().round(4)

File ~\anaconda3\Lib\site-packages\pandas\core\tools\datetimes.py:1050, in to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache) 1048 result = arg.map(cache_array) 1049 else: -> 1050 values = convert_listlike(arg._values, format) 1051 result = arg._constructor(values, index=arg.index, name=arg.name) 1052 elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):

File ~\anaconda3\Lib\site-packages\pandas\core\tools\datetimes.py:453, in _convert_listlike_datetimes(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact) 451 # format could be inferred, or user didn't ask for mixed-format parsing. 452 if format is not None and format != "mixed": --> 453 return _array_strptime_with_fallback(arg, name, utc, format, exact, errors) 455 result, tz_parsed = objects_to_datetime64ns( 456 arg, 457 dayfirst=dayfirst, (...) 461 allow_object=True, 462 ) 464 if tz_parsed is not None: 465 # We can take a shortcut since the datetime64 numpy array 466 # is in UTC

File ~\anaconda3\Lib\site-packages\pandas\core\tools\datetimes.py:484, in _array_strptime_with_fallback(arg, name, utc, fmt, exact, errors) 473 def _array_strptime_with_fallback( 474 arg, 475 name, (...) 479 errors: str, 480 ) -> Index: 481 """ 482 Call array_strptime, with fallback behavior depending on 'errors'. 483 """ --> 484 result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc) 485 if any(tz is not None for tz in timezones): 486 return _return_parsed_timezone_results(result, timezones, utc, name)

File ~\anaconda3\Lib\site-packages\pandas_libs\tslibs\strptime.pyx:530, in pandas._libs.tslibs.strptime.array_strptime()

File ~\anaconda3\Lib\site-packages\pandas_libs\tslibs\strptime.pyx:351, in pandas._libs.tslibs.strptime.array_strptime()

ValueError: time data "13/01/2015" doesn't match format "%m/%d/%Y", at position 12. You might want to try: - passing format if your strings have a consistent format; - passing format='ISO8601' if your strings are all ISO8601 but not necessarily in exactly the same format; - passing format='mixed', and the format will be inferred for each element individually. You might want to use dayfirst alongside this.

1 resposta
solução!

Olá,

Vc precisa transformar a data

cria uma linha de código e adiciona: df['data'] = pd.to_datetime(df['data'], format='%d/%m/%Y')

corr_df = df.corr().round(4)

troca df por dados