|
@@ -1,148 +1,55 @@
|
|
|
-% Studyforrest2Arff.m
|
|
|
-%
|
|
|
-% This function converts gaze data from studyforrest file format to ARFF. The
|
|
|
-% input comprises from a gaze file and a frames timing file
|
|
|
+% function Studyforrest2ArffRegex:
|
|
|
+% gets as inputs regular expressions to gaze and events files. The execution of this function is around 3 times
|
|
|
+% faster in matlab
|
|
|
%
|
|
|
% input:
|
|
|
-% eventsFile - file containing events (onset, duration, frameidx, videotime, audiotime, lasttrigger)
|
|
|
-% gazeFile - file containing gaze (x, y, pupil dilation(units?), frameId reference)
|
|
|
-% metadata - metadata in the form accepted from SaveArff.m
|
|
|
-% outputFile - (optional) name of ARFF. If it is not used the data is stored in the same directory as events with .arff extension
|
|
|
-
|
|
|
-function Studyforrest2Arff(eventsFile, gazeFile, metadata, outputFile)
|
|
|
- if (nargin < 4)
|
|
|
- [dir, file, ext] = fileparts(eventsFile);
|
|
|
- if (length(dir) > 0)
|
|
|
- dir = [dir '/'];
|
|
|
+% eventsFiles - regex to events files
|
|
|
+% gazeFiles - regex to gaze files
|
|
|
+% outputDir - directory to save results
|
|
|
+%
|
|
|
+% ex. fMRI results
|
|
|
+% Studyforrest2ArffRegex('/path/to/studyforrest/*/*/*/*movie*events.tsv', '/path/to/studyforrest/*/*/*/*eyegaze_physio.tsv', '/path/to/results');
|
|
|
+
|
|
|
+function Studyforrest2ArffRegex(eventFiles, gazeFiles, outputDir)
|
|
|
+ % set metadata for fMRI and lab experiments
|
|
|
+ metadataFmri.width_px = 1280;
|
|
|
+ metadataFmri.height_px = 546;
|
|
|
+ metadataFmri.width_mm = 265;
|
|
|
+ metadataFmri.height_mm = 113;
|
|
|
+ metadataFmri.distance_mm = 630;
|
|
|
+ metadataFmri.extra = {};
|
|
|
+
|
|
|
+ metadataLab.width_px = 1280;
|
|
|
+ metadataLab.height_px = 546;
|
|
|
+ metadataLab.width_mm = 522;
|
|
|
+ metadataLab.height_mm = 223;
|
|
|
+ metadataLab.distance_mm = 850;
|
|
|
+ metadataLab.extra = {};
|
|
|
+
|
|
|
+ eventFilelist = glob(eventFiles);
|
|
|
+ gazeFilelist = glob(gazeFiles);
|
|
|
+
|
|
|
+ assert(size(eventFilelist,1)==size(gazeFilelist,1), 'Provided regular expressions returned different number of files');
|
|
|
+
|
|
|
+ % Subjects 1 to 20 were recorded in the scanner. Subjects 21 to 36 were recorded in the lab
|
|
|
+ for fileId=1:size(eventFilelist,1)
|
|
|
+ disp(sprintf('%s\n%s\n\n', eventFilelist{fileId,1}, gazeFilelist{fileId,1}));
|
|
|
+ % get subject id
|
|
|
+ ind = findstr(eventFilelist{fileId,1}, 'sub-');
|
|
|
+ subId = str2num(eventFilelist{fileId,1}(ind+4:ind+5));
|
|
|
+
|
|
|
+ [dir, name, ext] = fileparts(eventFilelist{fileId,1});
|
|
|
+ outputFile = fullfile (outputDir, [name '.arff']);
|
|
|
+
|
|
|
+ if (subId <= 20)
|
|
|
+ disp('fmri')
|
|
|
+ Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataFmri, outputFile);
|
|
|
+ else
|
|
|
+ disp('lab')
|
|
|
+ Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataLab, outputFile);
|
|
|
+
|
|
|
+ % put in-scanner metadata in order to be able to cluster everything together
|
|
|
+ %Studyforrest2Arff(eventFilelist{fileId,1}, gazeFilelist{fileId,1}, metadataFmri);
|
|
|
end
|
|
|
-
|
|
|
- ind = findstr(file, '_events');
|
|
|
-
|
|
|
- outputFile = [dir file(1:ind(end)-1) '.arff'];
|
|
|
- end
|
|
|
-
|
|
|
- frames = importdata(eventsFile, '\t');
|
|
|
- frames = frames.data;
|
|
|
- % check for missing frames
|
|
|
- %{
|
|
|
- frame_step = frames(2:end,3) - frames(1:end-1,3);
|
|
|
- if (size(find(frame_step>1),1) > 0)
|
|
|
- warning(['Missing frames in ' eventsFile '. No ARFF file is written']);
|
|
|
- return;
|
|
|
- end
|
|
|
- %}
|
|
|
-
|
|
|
- gaze = importdata(gazeFile, '\t');
|
|
|
- if (IsOctave())
|
|
|
- % at points where confidence is 0 we have 2 entries in the .tsv file.
|
|
|
- % This translates to 2 columns from importdata with data. The last 2
|
|
|
- % columns are zeros. Below we add the frame numbering to the last
|
|
|
- % column. Also when the 3rd column is 0 the confidence is 0 too
|
|
|
- gaze(gaze(:,3)==0,4) = gaze(gaze(:,3)==0,2);
|
|
|
- gaze(gaze(:,3)==0,2) = 0; % remove frameId values
|
|
|
- else
|
|
|
- % in matlab the first two columns are NaN
|
|
|
- gaze(isnan(gaze)) = 0;
|
|
|
- end
|
|
|
-
|
|
|
- % get gaze points only until the last frame
|
|
|
- if (gaze(end,4) > frames(end,3))
|
|
|
- warning(['Gaze entries point to more frames than they actually exist in ' eventsFile]);
|
|
|
- gaze = gaze(gaze(:,4)<=frames(end,3),:);
|
|
|
- end
|
|
|
-
|
|
|
-
|
|
|
- % allocate and assign arff values
|
|
|
- arffData = zeros(size(gaze,1),5); % (time, x, y, confidence, frame_id)
|
|
|
- arffData(:,2) = gaze(:,1); % x
|
|
|
- arffData(:,3) = gaze(:,2); % y
|
|
|
- arffData(:,4) = 1.0; % confidence
|
|
|
- arffData(gaze(:,3)==0,4) = 0.0;
|
|
|
- arffData(:,5) = gaze(:,4); % frame id
|
|
|
-
|
|
|
- % calculate timestamps
|
|
|
- % get gaze count for each frame
|
|
|
- gazeCount = histc(gaze(:,4), frames(:,3));
|
|
|
-
|
|
|
- prevFrameId = -1;
|
|
|
- gazeCounter = 0;
|
|
|
- gazeStep = 0;
|
|
|
- for i=1:size(arffData,1)
|
|
|
- frameId = gaze(i,4);
|
|
|
- if (frameId ~= prevFrameId)
|
|
|
- gazeCounter = 0;
|
|
|
- gazeStep = frames(frameId,2)/gazeCount(frameId);
|
|
|
- end
|
|
|
- arffData(i,1) = frames(frameId,1) + gazeCounter*gazeStep;
|
|
|
- % make sure timestamps are monotonous. i.e. we don't move past next frame's time
|
|
|
- if (frameId<size(frames,1) && arffData(i,1) > frames(frameId+1,1))
|
|
|
- arffData(i,1) = frames(frameId+1,1);
|
|
|
- end
|
|
|
-
|
|
|
- % convert to microseconds
|
|
|
- arffData(i,1) = arffData(i,1)*1000000;
|
|
|
-
|
|
|
- gazeCounter = gazeCounter + 1;
|
|
|
- prevFrameId = frameId;
|
|
|
- end
|
|
|
- arffData(:,1) = floor(arffData(:,1)); % round to int
|
|
|
-
|
|
|
- % if there is a difference in the amount of frames referenced in gazeFile
|
|
|
- % and those in eventsFile then we get inf and nan values. The solution is to
|
|
|
- % keep entries just before those observations appear. (The reason for the frame
|
|
|
- % difference is not clear)
|
|
|
- indInf = find(isinf(arffData(:,1)));
|
|
|
- indNan = find(isnan(arffData(:,1)));
|
|
|
- indTot = min([indInf; indNan]);
|
|
|
-
|
|
|
- if (size(indTot,1)>0)
|
|
|
- arffData = arffData(1:indTot-1,:);
|
|
|
- warning(['Missing frames in ' eventsFile '. Written timestamps might be erroneous']);
|
|
|
end
|
|
|
-
|
|
|
- % save data
|
|
|
- relation = 'studyforrest_gaze';
|
|
|
-
|
|
|
- attributes = {'time', 'INTEGER';
|
|
|
- 'x', 'NUMERIC';
|
|
|
- 'y', 'NUMERIC';
|
|
|
- 'confidence', 'NUMERIC';
|
|
|
- 'frame_id', 'INTEGER'};
|
|
|
-
|
|
|
- %SaveArff(outputFile, arffData, metadata, attributes, relation);
|
|
|
-
|
|
|
- % write to file directly to speed up the process. 7 times faster based on measurements
|
|
|
- % start writing
|
|
|
- fid = fopen(outputFile, 'w+');
|
|
|
-
|
|
|
- % write relation
|
|
|
- fprintf(fid, '@RELATION %s\n\n', relation);
|
|
|
-
|
|
|
- % write metadata
|
|
|
- fprintf(fid, '%%@METADATA width_px %d\n', metadata.width_px);
|
|
|
- fprintf(fid, '%%@METADATA height_px %d\n', metadata.height_px);
|
|
|
- fprintf(fid, '%%@METADATA width_mm %.2f\n', metadata.width_mm);
|
|
|
- fprintf(fid, '%%@METADATA height_mm %.2f\n', metadata.height_mm);
|
|
|
- fprintf(fid, '%%@METADATA distance_mm %.2f\n\n', metadata.distance_mm);
|
|
|
-
|
|
|
- % write metadata extras. Those are data that vary between experiments
|
|
|
- for i=1:size(metadata.extra,1)
|
|
|
- fprintf(fid, '%%@METADATA %s %s\n', metadata.extra{i,1}, metadata.extra{i,2});
|
|
|
- end
|
|
|
- % print an empty line
|
|
|
- fprintf(fid, '\n');
|
|
|
- for i=1:size(attributes,1)
|
|
|
- fprintf(fid, '@ATTRIBUTE %s %s\n', attributes{i,1}, upper(attributes{i,2}));
|
|
|
- end
|
|
|
-
|
|
|
- % write data keyword
|
|
|
- fprintf(fid,'\n@DATA\n');
|
|
|
- % write actual data
|
|
|
- %for i=1:size(arffData,1)
|
|
|
- % fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData(i,:));
|
|
|
- %end
|
|
|
- fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData'); % simpler and faster
|
|
|
-
|
|
|
- % close file
|
|
|
- fclose(fid);
|
|
|
end
|