# Copyleft Gary Feng, 2008 # this script takes TOBII *FXD.txt file as input, # and produce a data matrix with subject number as a separate column # it also drops all other info in the header, but see the code for adding # whatever info you want. # # This script can process multiple files at the same time. See Usage # Usage: # first make sure you are in the same directory as the data file(s). # 1. open the DOS window by going to "Start->Run", and type "cmd", then "Enter". # 2. to see the current directory, type in "DIR" and "enter" aka "return". # 3. to change directory, type "CD c:\whatever\directory\you\need\to". # 4. type something like "gawk -f tobii_fxd_addsubjnum.awk mw6FXD.txt", # and replace the last file name. You can also process all files with names # with a particular pattern. For example, # "gawk -f tobii_fxd_addsubjnum.awk *.txt" will process all files in the current directory # that ends with the ".txt" extention. Be careful not to include files you don't want # Output will be displayed on the screen. Check to see if it looks right. # 5. When you feel like it's correct, do the following: # Press the upper-arrow key to bring back the last command you typed in; if this doesn # work you can either try F3, or type in again. # Add to the end " >outputfilename.txt" (change filename to whatever you need, or else # the file will be overwritten if it already exists). The command now reads: # "gawk -f tobii_fxd_addsubjnum.awk mw6FXD.txt >outputfilename.txt" # Press "Enter", and you should see a new file created. # You can now open the new file in Excel or a text editor. ################################################## # Script starts here ################################################## # #################### # BEGIN is a keyword, and the commands after it is executed # before anything is read from the input TXT file # We use it to set up some variables # BEGIN { # FS is for input Field Separator, and we define it as either space(s) or the tab key FS="[ ]+|\t" # OFS is for Output Field Separator; we use Tab OFS="\t" study = "" subj="" } #################### # Now GAWK automatically reads in a line from the input file, compare to pattern(s) defined below # and if a pattern matches, the corresponding commands will be executed. # Each line is compared to all patterns defined here. # #################### # first, we define the pattern for "study name" # any line starting with "Study:" will match; the "^" sign is the regular expression for "beginning of a line" /^Study:/ { # now what to do when this matches # we get the study name, which is the line substracting "Study: " # substr(STRING, Starting position, [optional length]) study = substr($0,7) } #################### # Here we get the subject number; # /^Subject:/ { # now this time I have to get the real subj=substr($0,9) } #################### # now match the fixations, which are lines starting with a number # in Regular Expression, [0-9] means any single digit between 0 and 9. # similarly, [a-zA-Z] means any lower or upper case single letter. /^[0-9]/ { # normally we want to make sure we have subject number and study name set before the output # but here we assume the FXD file is always well-formed and the header info is always before # the fixations. In this case we simply: # now we print the study name, subject number, and the Fixation line. print study, subj, $0 } #################### # END is a keyword, and the commands after it is executed # after everything is read from the input TXT file. # We don't do anything with it now. END { # doing nothing special here }