Studyforrest2Arff.m 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. % Studyforrest2Arff.m
  2. %
  3. % This function converts gaze data from studyforrest file format to ARFF. The
  4. % input comprises from a gaze file and a frames timing file
  5. %
  6. % input:
  7. % eventsFile - file containing events (onset, duration, frameidx, videotime, audiotime, lasttrigger)
  8. % gazeFile - file containing gaze (x, y, pupil dilation(units?), frameId reference)
  9. % metadata - metadata in the form accepted from SaveArff.m
  10. % outputFile - (optional) name of ARFF. If it is not used the data is stored in the same directory as events with .arff extension
  11. function Studyforrest2Arff(eventsFile, gazeFile, metadata, outputFile)
  12. if (nargin < 4)
  13. [dir, file, ext] = fileparts(eventsFile);
  14. if (length(dir) > 0)
  15. dir = [dir '/'];
  16. end
  17. ind = findstr(file, '_events');
  18. outputFile = [dir file(1:ind(end)-1) '.arff'];
  19. end
  20. frames = importdata(eventsFile, '\t');
  21. frames = frames.data;
  22. % check for missing frames
  23. %{
  24. frame_step = frames(2:end,3) - frames(1:end-1,3);
  25. if (size(find(frame_step>1),1) > 0)
  26. warning(['Missing frames in ' eventsFile '. No ARFF file is written']);
  27. return;
  28. end
  29. %}
  30. gaze = importdata(gazeFile, '\t');
  31. if (IsOctave())
  32. % at points where confidence is 0 we have 2 entries in the .tsv file.
  33. % This translates to 2 columns from importdata with data. The last 2
  34. % columns are zeros. Below we add the frame numbering to the last
  35. % column. Also when the 3rd column is 0 the confidence is 0 too
  36. gaze(gaze(:,3)==0,4) = gaze(gaze(:,3)==0,2);
  37. gaze(gaze(:,3)==0,2) = 0; % remove frameId values
  38. else
  39. % in matlab the first two columns are NaN
  40. gaze(isnan(gaze)) = 0;
  41. end
  42. % get gaze points only until the last frame
  43. if (gaze(end,4) > frames(end,3))
  44. warning(['Gaze entries point to more frames than they actually exist in ' eventsFile]);
  45. gaze = gaze(gaze(:,4)<=frames(end,3),:);
  46. end
  47. % allocate and assign arff values
  48. arffData = zeros(size(gaze,1),5); % (time, x, y, confidence, frame_id)
  49. arffData(:,2) = gaze(:,1); % x
  50. arffData(:,3) = gaze(:,2); % y
  51. arffData(:,4) = 1.0; % confidence
  52. arffData(gaze(:,3)==0,4) = 0.0;
  53. arffData(:,5) = gaze(:,4); % frame id
  54. % calculate timestamps
  55. % get gaze count for each frame
  56. gazeCount = histc(gaze(:,4), frames(:,3));
  57. prevFrameId = -1;
  58. gazeCounter = 0;
  59. gazeStep = 0;
  60. for i=1:size(arffData,1)
  61. frameId = gaze(i,4);
  62. if (frameId ~= prevFrameId)
  63. gazeCounter = 0;
  64. gazeStep = frames(frameId,2)/gazeCount(frameId);
  65. end
  66. arffData(i,1) = frames(frameId,1) + gazeCounter*gazeStep;
  67. % make sure timestamps are monotonous. i.e. we don't move past next frame's time
  68. if (frameId<size(frames,1) && arffData(i,1) > frames(frameId+1,1))
  69. arffData(i,1) = frames(frameId+1,1);
  70. end
  71. % convert to microseconds
  72. arffData(i,1) = arffData(i,1)*1000000;
  73. gazeCounter = gazeCounter + 1;
  74. prevFrameId = frameId;
  75. end
  76. arffData(:,1) = floor(arffData(:,1)); % round to int
  77. % if there is a difference in the amount of frames referenced in gazeFile
  78. % and those in eventsFile then we get inf and nan values. The solution is to
  79. % keep entries just before those observations appear. (The reason for the frame
  80. % difference is not clear)
  81. indInf = find(isinf(arffData(:,1)));
  82. indNan = find(isnan(arffData(:,1)));
  83. indTot = min([indInf; indNan]);
  84. if (size(indTot,1)>0)
  85. arffData = arffData(1:indTot-1,:);
  86. warning(['Missing frames in ' eventsFile '. Written timestamps might be erroneous']);
  87. end
  88. % save data
  89. relation = 'studyforrest_gaze';
  90. attributes = {'time', 'INTEGER';
  91. 'x', 'NUMERIC';
  92. 'y', 'NUMERIC';
  93. 'confidence', 'NUMERIC';
  94. 'frame_id', 'INTEGER'};
  95. %SaveArff(outputFile, arffData, metadata, attributes, relation);
  96. % write to file directly to speed up the process. 7 times faster based on measurements
  97. % start writing
  98. fid = fopen(outputFile, 'w+');
  99. % write relation
  100. fprintf(fid, '@RELATION %s\n\n', relation);
  101. % write metadata
  102. fprintf(fid, '%%@METADATA width_px %d\n', metadata.width_px);
  103. fprintf(fid, '%%@METADATA height_px %d\n', metadata.height_px);
  104. fprintf(fid, '%%@METADATA width_mm %.2f\n', metadata.width_mm);
  105. fprintf(fid, '%%@METADATA height_mm %.2f\n', metadata.height_mm);
  106. fprintf(fid, '%%@METADATA distance_mm %.2f\n\n', metadata.distance_mm);
  107. % write metadata extras. Those are data that vary between experiments
  108. for i=1:size(metadata.extra,1)
  109. fprintf(fid, '%%@METADATA %s %s\n', metadata.extra{i,1}, metadata.extra{i,2});
  110. end
  111. % print an empty line
  112. fprintf(fid, '\n');
  113. for i=1:size(attributes,1)
  114. fprintf(fid, '@ATTRIBUTE %s %s\n', attributes{i,1}, upper(attributes{i,2}));
  115. end
  116. % write data keyword
  117. fprintf(fid,'\n@DATA\n');
  118. % write actual data
  119. %for i=1:size(arffData,1)
  120. % fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData(i,:));
  121. %end
  122. fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData'); % simpler and faster
  123. % close file
  124. fclose(fid);
  125. end