Browse Source

Added correct file

Ioannis Agtzidis 4 years ago
parent
commit
73140c24ba
1 changed files with 50 additions and 143 deletions
  1. 50 143
      Studyforrest2ArffRegex.m

+ 50 - 143
Studyforrest2ArffRegex.m

@@ -1,148 +1,55 @@
-% Studyforrest2Arff.m
-%
-% This function converts gaze data from studyforrest file format to ARFF. The
-% input comprises from a gaze file and a frames timing file
+% function Studyforrest2ArffRegex:
+% gets as inputs regular expressions to gaze and events files. The execution of this function is around 3 times
+% faster in matlab
 %
 % input:
-%   eventsFile  - file containing events (onset, duration, frameidx, videotime, audiotime, lasttrigger)
-%   gazeFile    - file containing gaze (x, y, pupil dilation(units?), frameId reference)
-%   metadata    - metadata in the form accepted from SaveArff.m
-%   outputFile  - (optional) name of ARFF. If it is not used the data is stored in the same directory as events with .arff extension
-
-function Studyforrest2Arff(eventsFile, gazeFile, metadata, outputFile)
-    if (nargin < 4)
-        [dir, file, ext] = fileparts(eventsFile);
-        if (length(dir) > 0)
-            dir = [dir '/'];
+%   eventsFiles - regex to events files
+%   gazeFiles   - regex to gaze files
+%   outputDir   - directory to save results
+%
+% ex. fMRI results
+% Studyforrest2ArffRegex('/path/to/studyforrest/*/*/*/*movie*events.tsv', '/path/to/studyforrest/*/*/*/*eyegaze_physio.tsv', '/path/to/results');
+
+function Studyforrest2ArffRegex(eventFiles, gazeFiles, outputDir)
+    % set metadata for fMRI and lab experiments
+    metadataFmri.width_px = 1280;
+    metadataFmri.height_px = 546;
+    metadataFmri.width_mm = 265;
+    metadataFmri.height_mm = 113;
+    metadataFmri.distance_mm = 630;
+    metadataFmri.extra = {};
+
+    metadataLab.width_px = 1280;
+    metadataLab.height_px = 546;
+    metadataLab.width_mm = 522;
+    metadataLab.height_mm = 223;
+    metadataLab.distance_mm = 850;
+    metadataLab.extra = {};
+
+    eventFilelist = glob(eventFiles);
+    gazeFilelist = glob(gazeFiles);
+
+    assert(size(eventFilelist,1)==size(gazeFilelist,1), 'Provided regular expressions returned different number of files');
+
+    % Subjects 1 to 20 were recorded in the scanner. Subjects 21 to 36 were recorded in the lab
+    for fileId=1:size(eventFilelist,1)
+        disp(sprintf('%s\n%s\n\n', eventFilelist{fileId,1}, gazeFilelist{fileId,1}));
+        % get subject id
+        ind = findstr(eventFilelist{fileId,1}, 'sub-');
+        subId = str2num(eventFilelist{fileId,1}(ind+4:ind+5));
+
+        [dir, name, ext] = fileparts(eventFilelist{fileId,1});
+        outputFile = fullfile (outputDir, [name '.arff']);
+
+        if (subId <= 20)
+            disp('fmri')
+            Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataFmri, outputFile);
+        else
+            disp('lab')
+            Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataLab, outputFile);
+
+            % put in-scanner metadata in order to be able to cluster everything together
+            %Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataFmri);
         end
-
-        ind = findstr(file, '_events');
-
-        outputFile = [dir file(1:ind(end)-1) '.arff'];
-    end
-
-    frames = importdata(eventsFile, '\t');
-    frames = frames.data;
-    % check for missing frames
-    %{
-    frame_step = frames(2:end,3) - frames(1:end-1,3);
-    if (size(find(frame_step>1),1) > 0)
-        warning(['Missing frames in ' eventsFile '. No ARFF file is written']);
-        return;
-    end
-    %}
-
-    gaze = importdata(gazeFile, '\t');
-    if (IsOctave())
-        % at points where confidence is 0 we have 2 entries in the .tsv file.
-        % This translates to 2 columns from importdata with data. The last 2
-        % columns are zeros. Below we add the frame numbering to the last
-        % column. Also when the 3rd column is 0 the confidence is 0 too
-        gaze(gaze(:,3)==0,4) = gaze(gaze(:,3)==0,2);
-        gaze(gaze(:,3)==0,2) = 0; % remove frameId values
-    else
-        % in matlab the first two columns are NaN
-        gaze(isnan(gaze)) = 0;
-    end
-
-    % get gaze points only until the last frame
-    if (gaze(end,4) > frames(end,3))
-       warning(['Gaze entries point to more frames than they actually exist in ' eventsFile]);
-       gaze = gaze(gaze(:,4)<=frames(end,3),:);
-    end
-    
-
-    % allocate and assign arff values
-    arffData = zeros(size(gaze,1),5); % (time, x, y, confidence, frame_id)
-    arffData(:,2) = gaze(:,1); % x
-    arffData(:,3) = gaze(:,2); % y
-    arffData(:,4) = 1.0; % confidence
-    arffData(gaze(:,3)==0,4) = 0.0;
-    arffData(:,5) = gaze(:,4); % frame id
-
-    % calculate timestamps
-    % get gaze count for each frame
-    gazeCount = histc(gaze(:,4), frames(:,3));
-
-    prevFrameId = -1;
-    gazeCounter = 0;
-    gazeStep = 0;
-    for i=1:size(arffData,1)
-       frameId = gaze(i,4);
-       if (frameId ~= prevFrameId)
-           gazeCounter = 0;
-           gazeStep = frames(frameId,2)/gazeCount(frameId);
-       end
-       arffData(i,1) = frames(frameId,1) + gazeCounter*gazeStep;
-       % make sure timestamps are monotonous. i.e. we don't move past next frame's time
-       if (frameId<size(frames,1) && arffData(i,1) > frames(frameId+1,1))
-            arffData(i,1) = frames(frameId+1,1);
-       end
-
-       % convert to microseconds
-       arffData(i,1) = arffData(i,1)*1000000;
-
-       gazeCounter = gazeCounter + 1;
-       prevFrameId = frameId;
-    end
-    arffData(:,1) = floor(arffData(:,1)); % round to int
-
-    % if there is a difference in the amount of frames referenced in gazeFile
-    % and those in eventsFile then we get inf and nan values. The solution is to
-    % keep entries just before those observations appear. (The reason for the frame
-    % difference is not clear)
-    indInf = find(isinf(arffData(:,1)));
-    indNan = find(isnan(arffData(:,1)));
-    indTot = min([indInf; indNan]);
-    
-    if (size(indTot,1)>0)
-        arffData = arffData(1:indTot-1,:);
-        warning(['Missing frames in ' eventsFile '. Written timestamps might be erroneous']);
     end
-
-    % save data
-    relation = 'studyforrest_gaze';
-
-    attributes = {'time', 'INTEGER';
-                  'x', 'NUMERIC';
-                  'y', 'NUMERIC';
-                  'confidence', 'NUMERIC';
-                  'frame_id', 'INTEGER'};
-
-    %SaveArff(outputFile, arffData, metadata, attributes, relation);
-
-	% write to file directly to speed up the process. 7 times faster based on measurements
-	% start writing
-    fid = fopen(outputFile, 'w+');
-
-    % write relation
-    fprintf(fid, '@RELATION %s\n\n', relation);
-
-    % write metadata
-    fprintf(fid, '%%@METADATA width_px %d\n', metadata.width_px);
-    fprintf(fid, '%%@METADATA height_px %d\n', metadata.height_px);
-    fprintf(fid, '%%@METADATA width_mm %.2f\n', metadata.width_mm);
-    fprintf(fid, '%%@METADATA height_mm %.2f\n', metadata.height_mm);
-    fprintf(fid, '%%@METADATA distance_mm %.2f\n\n', metadata.distance_mm);
-
-    % write metadata extras. Those are data that vary between experiments
-    for i=1:size(metadata.extra,1)
-        fprintf(fid, '%%@METADATA %s %s\n', metadata.extra{i,1}, metadata.extra{i,2});
-    end
-    % print an empty line
-    fprintf(fid, '\n');
-	for i=1:size(attributes,1)
-        fprintf(fid, '@ATTRIBUTE %s %s\n', attributes{i,1}, upper(attributes{i,2}));
-    end
-
-    % write data keyword
-    fprintf(fid,'\n@DATA\n');
-    % write actual data
-    %for i=1:size(arffData,1)
-    %    fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData(i,:));
-    %end
-    fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData'); % simpler and faster
-
-    % close file
-    fclose(fid);
 end