PointPatternQuest/pointPatternFNNAnalysis.m at master · sebherbert/PointPatternQuest · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374


function fullResults = pointPatternFNNAnalysis(fullPath, NNExp, pops, PARAMS)
% Measure and compare the nearest neighbour distance in experimental data
% and simulations
%{
Inputs:
  fullPath = absolute path plus new file name root
  NNExp = table of : cellType, NN distance, 3D positions of every cell
  cCDists = all cell to cell distances (square array of NxN cells)
  pops.popSource = celltype of the pop source of a possible effect
  pops.popTarget = celltype of the pop affected by the pop source
  pops.popPermut = celltypes among which the random permutations are done
  PARAMS = set of parameters (such as binSize or number of simulations)
Output:
  fullResults = structure containing the NN distribution for the simulations
  as well as the experimental data

%}

% % For simpler reading
% cellType = NNExp.cellType;
% r = PARAMS.binSize;

% Find the average cell size by averaging the minimum cell to cell distance
PARAMS.cellDiameter = mean(NNExp.nearestNeighbour);

%% Extract the 3D positions of the 2 cell populations of interest
popSource3Dpos = table2array(NNExp(NNExp.cellType == pops.popSource, {'pos3D'}));

popTarget3Dpos = table2array(NNExp(NNExp.cellType == pops.popTarget, {'pos3D'}));
nTarget = length(popTarget3Dpos);

rowPermut = logical(sum(NNExp.cellType == pops.popPermut,2));

if pops.popTarget == pops.popSource
    PARAMS.samePop = true;
else
    PARAMS.samePop = false;
end

% Find nearest neighbour
dnExp = findNN(popSource3Dpos, popTarget3Dpos, PARAMS.samePop, pops)';

% If the range of the model has to be fitted to the cdf50 of the NN
% distribution
if PARAMS.useRangeCDF50 == 1 % Boolean to exchange the cdf
    PARAMS.optiR0 = double(median(dnExp));
end

% Interpolate the CDF on a fixed scale and estimate the envelopes
expCDFs = formatCdfsExp(dnExp, PARAMS);

%% Use an optimisation function to find the most adapted strength and range parameters
if strcmp(PARAMS.model,'fittedModel')
    bestParams = optimizeParamsCall(NNExp, pops, rowPermut, nTarget, expCDFs,...
        PARAMS);
    fprintf('bestParams: Range = %0.1fµm ; Strength = %0.2f\n',bestParams(1),bestParams(2));

    % Use the optimized values to recalculate the simulation
    effect.Range(1) = bestParams(1);
    effect.Strength = bestParams(2);
    [dnSimu, ~] = simulateSpatialDisp(effect, NNExp, pops, rowPermut, nTarget, PARAMS);

    simuCDFs = formatCdfsSimu(dnSimu, PARAMS);
    % Display simulation + experimental
    figure
    displayNNCDFs(expCDFs, simuCDFs, PARAMS, PARAMS.useRMSMaxDist);

    % recalculate RMS and GOF
    diffCdf = expCDFs.fFix' - simuCDFs.f50pc;
    diffCdf(isnan(diffCdf)) = 0;
    medRMS = median(rms(diffCdf));
    text(60,0.5,sprintf('Fitted parameters:\nRange = %0.1fµm\nStrength = %0.2f\nRMS = %0.04f',...
        bestParams(1),bestParams(2),medRMS));
    saveas(gcf,sprintf('%s_fitted model', fullPath));
    saveas(gcf,sprintf('%s_fitted model.png', fullPath));

else % Use preset values
    %% Effectuate the simulations of the random permutations of the popTarget
    effect.Strength = PARAMS.effectStrength;
    effect.Range = PARAMS.effectRange;
    [dnSimu, ~] = simulateSpatialDisp(effect, NNExp, pops, rowPermut, nTarget, PARAMS);

    %% Calculate exp and simulations cdf and their dispersions individualy
    simuCDFs = formatCdfsSimu(dnSimu, PARAMS);

    %% Display all the CDFs
    if PARAMS.displayIndivCDF
        figure
        displayNNCDFs(expCDFs, simuCDFs, PARAMS)
    end

    medRMS = evaluateRMS(expCDFs,simuCDFs,PARAMS);

end

% Structure output
fullResults = {};
fullResults.dnExp = dnExp;
fullResults.expCDFs = expCDFs;
fullResults.dnSimu = dnSimu;
fullResults.simuCDFs = simuCDFs;
if strcmp(PARAMS.model,'fittedModel')
    fullResults.fit.Range = bestParams(1);
    fullResults.fit.Strength = bestParams(2);
    fullResults.fit.medRMS = medRMS;
else
    fullResults.medRMS = medRMS;
end
fullResults.PARAMS = PARAMS;

end


function medRMS = evaluateRMS(expCDFs,simuCDFs,PARAMS)
% Calculate the RMS between experimental and simulated curve f50pc

diffCdf = expCDFs.fFix' - simuCDFs.f50pc;
diffCdf(isnan(diffCdf)) = 0;

if PARAMS.useRMSMaxDist ==1
    % only take into account the distance ditribution up to a maximum distance
    % (as a function of the cell diameter)
    medRMS = rms(diffCdf(simuCDFs.x<PARAMS.maxDistFactor*PARAMS.cellDiameter));
else
    medRMS = rms(diffCdf);
end

end


function [bestParams, finalRMS] = optimizeParamsCall(NNExp, pops, rowPermut, nTarget, expCDFs, PARAMS)
% Launch the optimization function for the fit of the strength of range of
% the distribution effect
x0 = [PARAMS.optiR0,PARAMS.optiS0];

effect.Range = x0(1);
effect.Strength = x0(2);
[dnSimu, ~] = simulateSpatialDisp(effect, NNExp, pops, rowPermut, nTarget, PARAMS);
simuCDFs = formatCdfsSimu(dnSimu, PARAMS);

if PARAMS.doDisplayLiveFit
    % Display the background image (exp value)
    colors = lines(2);
    figure
    h(1) = plot(expCDFs.x,expCDFs.f,'linewidth',2,'Color',colors(2,:));
    hold on

    % And original fit (could be added to the )
    h(2) = plot(simuCDFs.x,simuCDFs.f50pc,'linewidth',2,'Color',colors(1,:));

    ylabel('Cumulative cell frequency');
    xlabel('Distance to nearest neighbor (µm)');

    pause(0.1)
end

% options = optimset('PlotFcns',@optimplotfval);
options = optimset('MaxIter',PARAMS.fitMaxIter);

if PARAMS.doDisplayLiveFit
    [bestParams, finalRMS]= fminsearch(@two_varModel, x0, options, NNExp, pops, rowPermut, nTarget, expCDFs, h, PARAMS);
else
    [bestParams, finalRMS]= fminsearch(@two_varModel, x0, options, NNExp, pops, rowPermut, nTarget, expCDFs, [], PARAMS);
end


end


function [medRMS, h] = two_varModel(x0, NNExp, pops, rowPermut, nTarget, expCDFs, h, PARAMS)
% provide the ks test comparing the experimental and simulated
% distributions

effect.Range = x0(1);
effect.Strength = x0(2);

if (effect.Range <= PARAMS.minFitRange || effect.Strength <= PARAMS.minFitStrength)
    medRMS = 100;
    return
end

[dnSimu, ~] = simulateSpatialDisp(effect, NNExp, pops, rowPermut, nTarget, PARAMS);

simuCDFs = formatCdfsSimu(dnSimu, PARAMS);

medRMS = evaluateRMS(expCDFs,simuCDFs,PARAMS);

if PARAMS.doDisplayLiveFit
    % update image
    set(h(2),'YData',simuCDFs.f50pc,'Color',lines(1));
    pause(0.1)
end

fprintf('bestParams: Range = %0.1fµm ; Strength = %0.2f ; RMS = %0.5f\n',effect.Range,effect.Strength,medRMS);

end

function displayProbMap(NNExp, probMap, PARAMS)
% Display probaMap as a colormap applied on the permutable population

all3Dsource = table2array(NNExp(NNExp.cellType == pops.popSource, {'pos3D'}));
all3Dpermut = table2array(NNExp(rowPermut, {'pos3D'}));

colors = colormap(parula(log(max(probMap))/log(PARAMS.effectStrength)+1));
for bioCell = 1:size(all3Dpermut,1)
    colorScat(bioCell,:) = colors(log(probMap(bioCell))/log(PARAMS.effectStrength)+1,:);
end

figure
% scatter3(all3Dpermut(:,1),all3Dpermut(:,2),all3Dpermut(:,3),[],colors(probMap(:),:));
scatter3(all3Dpermut(:,1),all3Dpermut(:,2),all3Dpermut(:,3),[],colorScat,'.');
hold on
scatter3(all3Dsource(:,1),all3Dsource(:,2),all3Dsource(:,3),'.','MarkerFaceColor',[0.098,0.325,0.851]);

end

function dn = findNN(popSource3Dpos, popTarget3Dpos, samePop, pops)
% Recreate the distance table of the 2 cell populations (all neighbours)
% sorted by size and only takes into account the firsts 2 distances

allN = pdist2(popSource3Dpos,popTarget3Dpos,'euclidean','Smallest', 2);
% extract the nearest neighbour distance
if samePop
    % When the target and source are the same then the minimum distance is always 0
    dn = allN(2,:);
else
    % When the target and source are the different
    dn = allN(1,:);
end
if min(dn) == 0
    fprintf('WARNING: nearest neighbour for exp t%d vs t%d is null\n',pops.popSource,pops.popTarget)
end

end

function [dnSimu, Grand] = simulateSpatialDisp(effect, NNExp, pops, rowPermut, nTarget, PARAMS)
% Simulates random draws of the popTarget population among the popPermut
% population, considering the effect of the popSource population on the
% next draw.

dnSimu = zeros(nTarget,PARAMS.numPermut);

% Calculate all the cell-cell distances to use in the spatial effect
% simulation effect
cell2CellDist = pdist2(table2array(NNExp(:,{'pos3D'})),table2array(NNExp(:,{'pos3D'})));

Grand = zeros(PARAMS.numPermut, size(PARAMS.binSize,2));

parfor perm = 1:PARAMS.numPermut % for each permutation run
    %     if mod(perm,1000) == 0
    %         fprintf('Running permutation %d\n',perm);
    %     end
    % Initiate a probability map => prob = 1 for the permutable population and
    % 0 for the rest of the population
    probMap = double(rowPermut);
    popTargetSimu{perm} = table;

    if PARAMS.samePop
        % Requires adaptation of the popSource at every step + an adaptation at
        % every step of the draw
        for bioCell = 1:nTarget
            tempCell = datasample(NNExp,1,'Weights',probMap);
            % Take cell out of the permutable population
            % be careful at not putting it back at more than 0 by an attractive or repulsive effect !
            probMap(tempCell.cellID) = 0;

            % Adapt the probability map of the other cells based on the draw.
            probMap = adaptProbMap(tempCell.cellID, probMap, cell2CellDist, effect);


            % Add the drawn cell to the complete simulated draw
            popTargetSimu{perm} = [popTargetSimu{perm};tempCell];
        end
        % In this case target and source populations are the same
        popSourceSimu{perm} = popTargetSimu{perm};

    else
        % Adapt the probability map once and for all (no adaptation if there is no
        % spatial effect) => Could also be done once and for all at the beginning
        % of the simulations

        % extract source cells IDs
        sourceCells = table2array(NNExp(NNExp.cellType == pops.popSource, {'cellID'}));
        probMap = adaptProbMap(sourceCells, probMap, cell2CellDist, effect);

        % Then make a single draw without replacing the cells
        popTargetSimu{perm} = datasample(NNExp,nTarget,'Replace',false,'Weights',probMap);
        popSourceSimu{perm} = NNExp(NNExp.cellType == pops.popSource, :);
    end

    % Find the Nearest neighbours in the population
    dnSimu(:,perm) = findNN(popSourceSimu{perm}.pos3D, popTargetSimu{perm}.pos3D, PARAMS.samePop, pops);

    %     % Calculate the cdf histogram
    %     for i=1:size(PARAMS.binSize,2)
    %         Grand(perm,i)= sum(dnSimu(:,perm)<PARAMS.binSize(i)) / numel(dnSimu(:,perm));
    %     end
end

end

function newProbMap = adaptProbMap(sourceCells, oriProbMap, cell2CellDist, effect)
% Adapt the draw probability map based on the desired effect, the drawned
% cell(s)
% SourceCells = cellID of the source population to integrate into the
% probMap
% -> 1 in the case of on the fly adaptation (same source and target pops)
% -> N in the case of static probMap (different source and target pops)

tempProbMap = oriProbMap;

for bioCell = 1:length(sourceCells)
    distsN = cell2CellDist(:,sourceCells(bioCell));
    affectedCells = distsN<effect.Range;
    tempProbMap(affectedCells) = tempProbMap(affectedCells)*effect.Strength;
end

% if later choose to change the multiplication factor by an additive term,
% multiply tempProMap by a vector containing the 0 positions of the
% oriProbMap (1 otherwise) to ensure conservation of the forbidden
% positions
newProbMap = tempProbMap;

end

function expCDFs = formatCdfsExp(dnExp, PARAMS)
% Interpolate the CDF on a fixed scale and estimate the envelopes

% Calculate the CDFs of the experimental population
[expCDFs.f,expCDFs.x,expCDFs.f5,expCDFs.f95] = ecdf(dnExp,'alpha',0.05);
[~,~,expCDFs.f1,expCDFs.f99] = ecdf(dnExp,'alpha',0.01);

% Interpolate the CDF on a fixed scale
expCDFs.fFix = interp1([0;unique(expCDFs.x)],...
    [0;expCDFs.f(2:end)],PARAMS.binSize);
expCDFs.xFix = PARAMS.binSize;
end

function simuCDFs = formatCdfsSimu(dnSimu, PARAMS)

%% New version using a "simpler" and more manual percentile approach (still kept
% Greenwood for the experimental cdf. (same process as original WS's)

% Calculate the CDFs of the simulated populations
for simu = 1:PARAMS.numPermut % each simulation is treated individually
    [simuCDFs.indiv{simu}.f,simuCDFs.indiv{simu}.x] = ecdf(dnSimu(:,simu));

    % In order to avoid dimension mismatch, cdfs are interpolated on fixed
    % abscissa with fixed periodicity before being merged together
    %     simuCDFs.xs(:,simu) = PARAMS.binSize;
    simuCDFs.fs(:,simu) = interp1([0;unique(simuCDFs.indiv{simu}.x)],...
        [0;simuCDFs.indiv{simu}.f(2:end)],PARAMS.binSize);
end

% Calculate the median simulation
simuCDFs.fs(isnan(simuCDFs.fs)) = 1;
simuCDFs.x = PARAMS.binSize;
% Use percentiles of the individually simulated cdfs to define an envelope
tempOutput = prctile(simuCDFs.fs,[1 5 25 50 75 95 99],2);
simuCDFs.f1pc = tempOutput(:,1);
simuCDFs.f5pc = tempOutput(:,2);
simuCDFs.f25pc = tempOutput(:,3);
simuCDFs.f50pc = tempOutput(:,4);
simuCDFs.f75pc = tempOutput(:,5);
simuCDFs.f95pc = tempOutput(:,6);
simuCDFs.f99pc = tempOutput(:,7);
% Add additionnal measurements
simuCDFs.fmean = mean(simuCDFs.fs,2);
simuCDFs.fstd = std(simuCDFs.fs,[],2);

end