1-
2-
3-
41from numpy import log
52import pandas as pd
63import numpy as np
74
85
9- # copied from scikit-bio
6+ # copied from scikit-bio
107# because I cannot install it
118def closure (mat ):
129 """
@@ -93,24 +90,25 @@ def multiplicative_replacement(mat, delta=None):
9390 [ 0.0625, 0.4375, 0.4375, 0.0625]])
9491 """
9592 mat = closure (mat )
96- z_mat = ( mat == 0 )
93+ z_mat = mat == 0
9794
9895 num_feats = mat .shape [- 1 ]
9996 tot = z_mat .sum (axis = - 1 , keepdims = True )
10097
10198 if delta is None :
102- delta = (1. / num_feats )** 2
99+ delta = (1.0 / num_feats ) ** 2
103100
104101 zcnts = 1 - tot * delta
105102 if np .any (zcnts ) < 0 :
106- raise ValueError ('The multiplicative replacement created negative '
107- 'proportions. Consider using a smaller `delta`.' )
103+ raise ValueError (
104+ "The multiplicative replacement created negative "
105+ "proportions. Consider using a smaller `delta`."
106+ )
108107 mat = np .where (z_mat , delta , zcnts * mat )
109108 return mat .squeeze ()
110109
111110
112-
113- def clr (data : pd .DataFrame , log = log ,features = "all" ):
111+ def clr (data : pd .DataFrame , log = log , features = "all" ):
114112 """
115113 Centered log ratio (CLR) with multiplicative replacement implemented in scikit-bio
116114 """
@@ -128,6 +126,8 @@ def clr(data: pd.DataFrame, log=log,features="all"):
128126 # Fill in zeros with multiplicative replacement
129127 matrix = multiplicative_replacement (matrix )
130128
129+ matrix = pd .DataFrame (matrix , index = d .index , columns = d .columns )
130+
131131 # CLR
132132 matrix = log (matrix )
133133
@@ -137,7 +137,7 @@ def clr(data: pd.DataFrame, log=log,features="all"):
137137 mean = matrix .mean (1 )
138138
139139 elif features .lower () == "nz" :
140-
140+
141141 mean = matrix [matrix != 0 ].mean (1 )
142142 elif features .lower () == "iql" :
143143 # use mean of features in interquartile range
@@ -147,9 +147,6 @@ def clr(data: pd.DataFrame, log=log,features="all"):
147147 else :
148148 raise Exception ("features must be 'all', 'nz', or 'iql'" )
149149
150-
151150 matrix = (matrix .T - mean ).T
152151
153- if type (data ) == pd .DataFrame :
154-
155- return pd .DataFrame (matrix , index = d .index , columns = d .columns )
152+ return matrix
0 commit comments