4 tahun lalu · 73140c24ba
--- a/Studyforrest2ArffRegex.m
+++ b/Studyforrest2ArffRegex.m
@@ -1,148 +1,55 @@
 
				-% Studyforrest2Arff.m
			
 
				-%
			
 
				-% This function converts gaze data from studyforrest file format to ARFF. The
			
 
				-% input comprises from a gaze file and a frames timing file
			
 
				+% function Studyforrest2ArffRegex:
			
 
				+% gets as inputs regular expressions to gaze and events files. The execution of this function is around 3 times
			
 
				+% faster in matlab
			
 
				 %
			
 
				 % input:
			
 
				-%   eventsFile  - file containing events (onset, duration, frameidx, videotime, audiotime, lasttrigger)
			
 
				-%   gazeFile    - file containing gaze (x, y, pupil dilation(units?), frameId reference)
			
 
				-%   metadata    - metadata in the form accepted from SaveArff.m
			
 
				-%   outputFile  - (optional) name of ARFF. If it is not used the data is stored in the same directory as events with .arff extension
			
 
				-
			
 
				-function Studyforrest2Arff(eventsFile, gazeFile, metadata, outputFile)
			
 
				-    if (nargin < 4)
			
 
				-        [dir, file, ext] = fileparts(eventsFile);
			
 
				-        if (length(dir) > 0)
			
 
				-            dir = [dir '/'];
			
 
				+%   eventsFiles - regex to events files
			
 
				+%   gazeFiles   - regex to gaze files
			
 
				+%   outputDir   - directory to save results
			
 
				+%
			
 
				+% ex. fMRI results
			
 
				+% Studyforrest2ArffRegex('/path/to/studyforrest/*/*/*/*movie*events.tsv', '/path/to/studyforrest/*/*/*/*eyegaze_physio.tsv', '/path/to/results');
			
 
				+
			
 
				+function Studyforrest2ArffRegex(eventFiles, gazeFiles, outputDir)
			
 
				+    % set metadata for fMRI and lab experiments
			
 
				+    metadataFmri.width_px = 1280;
			
 
				+    metadataFmri.height_px = 546;
			
 
				+    metadataFmri.width_mm = 265;
			
 
				+    metadataFmri.height_mm = 113;
			
 
				+    metadataFmri.distance_mm = 630;
			
 
				+    metadataFmri.extra = {};
			
 
				+
			
 
				+    metadataLab.width_px = 1280;
			
 
				+    metadataLab.height_px = 546;
			
 
				+    metadataLab.width_mm = 522;
			
 
				+    metadataLab.height_mm = 223;
			
 
				+    metadataLab.distance_mm = 850;
			
 
				+    metadataLab.extra = {};
			
 
				+
			
 
				+    eventFilelist = glob(eventFiles);
			
 
				+    gazeFilelist = glob(gazeFiles);
			
 
				+
			
 
				+    assert(size(eventFilelist,1)==size(gazeFilelist,1), 'Provided regular expressions returned different number of files');
			
 
				+
			
 
				+    % Subjects 1 to 20 were recorded in the scanner. Subjects 21 to 36 were recorded in the lab
			
 
				+    for fileId=1:size(eventFilelist,1)
			
 
				+        disp(sprintf('%s\n%s\n\n', eventFilelist{fileId,1}, gazeFilelist{fileId,1}));
			
 
				+        % get subject id
			
 
				+        ind = findstr(eventFilelist{fileId,1}, 'sub-');
			
 
				+        subId = str2num(eventFilelist{fileId,1}(ind+4:ind+5));
			
 
				+
			
 
				+        [dir, name, ext] = fileparts(eventFilelist{fileId,1});
			
 
				+        outputFile = fullfile (outputDir, [name '.arff']);
			
 
				+
			
 
				+        if (subId <= 20)
			
 
				+            disp('fmri')
			
 
				+            Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataFmri, outputFile);
			
 
				+        else
			
 
				+            disp('lab')
			
 
				+            Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataLab, outputFile);
			
 
				+
			
 
				+            % put in-scanner metadata in order to be able to cluster everything together
			
 
				+            %Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataFmri);
			
 
				         end
			
 
				-
			
 
				-        ind = findstr(file, '_events');
			
 
				-
			
 
				-        outputFile = [dir file(1:ind(end)-1) '.arff'];
			
 
				-    end
			
 
				-
			
 
				-    frames = importdata(eventsFile, '\t');
			
 
				-    frames = frames.data;
			
 
				-    % check for missing frames
			
 
				-    %{
			
 
				-    frame_step = frames(2:end,3) - frames(1:end-1,3);
			
 
				-    if (size(find(frame_step>1),1) > 0)
			
 
				-        warning(['Missing frames in ' eventsFile '. No ARFF file is written']);
			
 
				-        return;
			
 
				-    end
			
 
				-    %}
			
 
				-
			
 
				-    gaze = importdata(gazeFile, '\t');
			
 
				-    if (IsOctave())
			
 
				-        % at points where confidence is 0 we have 2 entries in the .tsv file.
			
 
				-        % This translates to 2 columns from importdata with data. The last 2
			
 
				-        % columns are zeros. Below we add the frame numbering to the last
			
 
				-        % column. Also when the 3rd column is 0 the confidence is 0 too
			
 
				-        gaze(gaze(:,3)==0,4) = gaze(gaze(:,3)==0,2);
			
 
				-        gaze(gaze(:,3)==0,2) = 0; % remove frameId values
			
 
				-    else
			
 
				-        % in matlab the first two columns are NaN
			
 
				-        gaze(isnan(gaze)) = 0;
			
 
				-    end
			
 
				-
			
 
				-    % get gaze points only until the last frame
			
 
				-    if (gaze(end,4) > frames(end,3))
			
 
				-       warning(['Gaze entries point to more frames than they actually exist in ' eventsFile]);
			
 
				-       gaze = gaze(gaze(:,4)<=frames(end,3),:);
			
 
				-    end
			
 
				-    
			
 
				-
			
 
				-    % allocate and assign arff values
			
 
				-    arffData = zeros(size(gaze,1),5); % (time, x, y, confidence, frame_id)
			
 
				-    arffData(:,2) = gaze(:,1); % x
			
 
				-    arffData(:,3) = gaze(:,2); % y
			
 
				-    arffData(:,4) = 1.0; % confidence
			
 
				-    arffData(gaze(:,3)==0,4) = 0.0;
			
 
				-    arffData(:,5) = gaze(:,4); % frame id
			
 
				-
			
 
				-    % calculate timestamps
			
 
				-    % get gaze count for each frame
			
 
				-    gazeCount = histc(gaze(:,4), frames(:,3));
			
 
				-
			
 
				-    prevFrameId = -1;
			
 
				-    gazeCounter = 0;
			
 
				-    gazeStep = 0;
			
 
				-    for i=1:size(arffData,1)
			
 
				-       frameId = gaze(i,4);
			
 
				-       if (frameId ~= prevFrameId)
			
 
				-           gazeCounter = 0;
			
 
				-           gazeStep = frames(frameId,2)/gazeCount(frameId);
			
 
				-       end
			
 
				-       arffData(i,1) = frames(frameId,1) + gazeCounter*gazeStep;
			
 
				-       % make sure timestamps are monotonous. i.e. we don't move past next frame's time
			
 
				-       if (frameId<size(frames,1) && arffData(i,1) > frames(frameId+1,1))
			
 
				-            arffData(i,1) = frames(frameId+1,1);
			
 
				-       end
			
 
				-
			
 
				-       % convert to microseconds
			
 
				-       arffData(i,1) = arffData(i,1)*1000000;
			
 
				-
			
 
				-       gazeCounter = gazeCounter + 1;
			
 
				-       prevFrameId = frameId;
			
 
				-    end
			
 
				-    arffData(:,1) = floor(arffData(:,1)); % round to int
			
 
				-
			
 
				-    % if there is a difference in the amount of frames referenced in gazeFile
			
 
				-    % and those in eventsFile then we get inf and nan values. The solution is to
			
 
				-    % keep entries just before those observations appear. (The reason for the frame
			
 
				-    % difference is not clear)
			
 
				-    indInf = find(isinf(arffData(:,1)));
			
 
				-    indNan = find(isnan(arffData(:,1)));
			
 
				-    indTot = min([indInf; indNan]);
			
 
				-    
			
 
				-    if (size(indTot,1)>0)
			
 
				-        arffData = arffData(1:indTot-1,:);
			
 
				-        warning(['Missing frames in ' eventsFile '. Written timestamps might be erroneous']);
			
 
				     end
			
 
				-
			
 
				-    % save data
			
 
				-    relation = 'studyforrest_gaze';
			
 
				-
			
 
				-    attributes = {'time', 'INTEGER';
			
 
				-                  'x', 'NUMERIC';
			
 
				-                  'y', 'NUMERIC';
			
 
				-                  'confidence', 'NUMERIC';
			
 
				-                  'frame_id', 'INTEGER'};
			
 
				-
			
 
				-    %SaveArff(outputFile, arffData, metadata, attributes, relation);
			
 
				-
			
 
				-	% write to file directly to speed up the process. 7 times faster based on measurements
			
 
				-	% start writing
			
 
				-    fid = fopen(outputFile, 'w+');
			
 
				-
			
 
				-    % write relation
			
 
				-    fprintf(fid, '@RELATION %s\n\n', relation);
			
 
				-
			
 
				-    % write metadata
			
 
				-    fprintf(fid, '%%@METADATA width_px %d\n', metadata.width_px);
			
 
				-    fprintf(fid, '%%@METADATA height_px %d\n', metadata.height_px);
			
 
				-    fprintf(fid, '%%@METADATA width_mm %.2f\n', metadata.width_mm);
			
 
				-    fprintf(fid, '%%@METADATA height_mm %.2f\n', metadata.height_mm);
			
 
				-    fprintf(fid, '%%@METADATA distance_mm %.2f\n\n', metadata.distance_mm);
			
 
				-
			
 
				-    % write metadata extras. Those are data that vary between experiments
			
 
				-    for i=1:size(metadata.extra,1)
			
 
				-        fprintf(fid, '%%@METADATA %s %s\n', metadata.extra{i,1}, metadata.extra{i,2});
			
 
				-    end
			
 
				-    % print an empty line
			
 
				-    fprintf(fid, '\n');
			
 
				-	for i=1:size(attributes,1)
			
 
				-        fprintf(fid, '@ATTRIBUTE %s %s\n', attributes{i,1}, upper(attributes{i,2}));
			
 
				-    end
			
 
				-
			
 
				-    % write data keyword
			
 
				-    fprintf(fid,'\n@DATA\n');
			
 
				-    % write actual data
			
 
				-    %for i=1:size(arffData,1)
			
 
				-    %    fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData(i,:));
			
 
				-    %end
			
 
				-    fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData'); % simpler and faster
			
 
				-
			
 
				-    % close file
			
 
				-    fclose(fid);
			
 
				 end