Browse Source

audio data

AgataKoziol 7 months ago
parent
commit
8414ceeddd

+ 1 - 0
annotations/eaf_2023/ak/converted/.DS_Store

@@ -0,0 +1 @@
+../../../../.git/annex/objects/5P/W9/MD5E-s6148--5dd6486a90acc6f6787dee9f770b7fbc/MD5E-s6148--5dd6486a90acc6f6787dee9f770b7fbc

+ 1 - 0
annotations/eaf_2023/ak/converted/77021_5/V20230127-070014_0_209500.csv

@@ -0,0 +1 @@
+../../../../../.git/annex/objects/5M/JP/MD5E-s5379--b4b29467aa0c2871f4cc7f9451dfe2bc.csv/MD5E-s5379--b4b29467aa0c2871f4cc7f9451dfe2bc.csv

+ 1 - 0
metadata/.DS_Store

@@ -0,0 +1 @@
+../.git/annex/objects/Fz/2x/MD5E-s6148--5b30cb69a96dfcf43019cf485b753ad8/MD5E-s6148--5b30cb69a96dfcf43019cf485b753ad8

+ 1 - 0
metadata/annotations.csv

@@ -10,3 +10,4 @@ eaf_2023/ak,77033_5/77033_5.WAV,0,3378792,3441447,77033_5/77033_5.eaf,eaf,,77033
 eaf_2023/ak,77033_5/77033_5.WAV,0,7034603,7098981,77033_5/77033_5.eaf,eaf,,77033_5/77033_5_7034603_7098981.csv,2023-10-22 15:45:26,0.1.1,,
 eaf_2023/ak,77033_5/77033_5.WAV,0,8343248,8406016,77033_5/77033_5.eaf,eaf,,77033_5/77033_5_8343248_8406016.csv,2023-10-22 15:45:26,0.1.1,,
 eaf_2023/ak,77033_5/77033_5.WAV,0,899827,962068,77033_5/77033_5.eaf,eaf,,77033_5/77033_5_899827_962068.csv,2023-10-22 15:45:26,0.1.1,,
+eaf_2023/ak,77021_5/V20230127-070014.WAV,0,0,209500,77021_5/V20230127-070014.eaf,eaf,,77021_5/V20230127-070014_0_209500.csv,2023-10-22 16:55:12,0.1.1,,

+ 3 - 2
metadata/children.csv

@@ -1,2 +1,3 @@
-child_id,experiment,child_dob,location_id,child_sex,language,monoling,languages,mat_ed,fat_ed,monoling_criterion,n_of_siblings,household_size,dob_criterion,dob_accuracy,discard
-77033_5,MOVIN,2021-02-08,warsaw,m,polish,Y,,17,17,“we asked families which languages they spoke in the home”,0,3,exact,day,0
+child_id,experiment,child_dob,location_id,child_sex,language,monoling,languages,mat_ed,fat_ed,monoling_criterion,n_of_siblings,household_size,dob_criterion,dob_accuracy,discard;
+77021_5,MOVIN,2020-10-15,warsaw,m,polish,N,polish 80%; english 20%,19,17,“we asked families which languages they spoke in the home”,0,3,exact,day,0
+77033_5,MOVIN,2021-02-08,warsaw,m,polish,Y,,17,17,“we asked families which languages they spoke in the home”,0,3,exact,day,0;

+ 3 - 2
metadata/recordings.csv

@@ -1,2 +1,3 @@
-experiment,child_id,date_iso,start_time,start_time_accuracy,recording_device_type,recording_filename,location_id,duration
-MOVIN,77033_5,2023-04-14,NA,hour,usb,77033_5/77033_5.WAV,warsaw,19100960
+experiment,child_id,date_iso,start_time,start_time_accuracy,recording_device_type,recording_filename,location_id,duration
+MOVIN,77021_5,2023-01-26,NA,hour,usb,77021_5/V20230127-070014.WAV,warsaw,584705
+MOVIN,77033_5,2023-04-14,NA,hour,usb,77033_5/77033_5.WAV,warsaw,19100960

+ 1 - 1
recordings/converted/.DS_Store

@@ -1 +1 @@
-../../.git/annex/objects/0V/Zp/MD5E-s6148--689f59669593f9c6162bbe3e14168f7e/MD5E-s6148--689f59669593f9c6162bbe3e14168f7e
+../../.git/annex/objects/FG/mj/MD5E-s6148--f124cffc33bb85c7c5a7015e517d90ff/MD5E-s6148--f124cffc33bb85c7c5a7015e517d90ff

+ 1 - 1
recordings/converted/standard/.DS_Store

@@ -1 +1 @@
-../../../.git/annex/objects/85/3v/MD5E-s6148--f04bd34aa70c217c85aedec2e23603a0/MD5E-s6148--f04bd34aa70c217c85aedec2e23603a0
+../../../.git/annex/objects/zz/KZ/MD5E-s6148--13ed22d93b7a9dc52ce17937dc5e3d64/MD5E-s6148--13ed22d93b7a9dc52ce17937dc5e3d64

+ 1 - 0
recordings/converted/standard/77021_5/V20230127-070014.wav

@@ -0,0 +1 @@
+../../../../.git/annex/objects/x2/k8/MD5E-s18698544--2e5e2e8c3c5fd9fcf3c9282c4496a11f.wav/MD5E-s18698544--2e5e2e8c3c5fd9fcf3c9282c4496a11f.wav

+ 0 - 1
recordings/converted/standard/parameters_20231022_154333.yml

@@ -1 +0,0 @@
-../../../.git/annex/objects/0k/q8/MD5E-s283--3f43e34b993dfc2955b4aec6317c980f.yml/MD5E-s283--3f43e34b993dfc2955b4aec6317c980f.yml

+ 1 - 0
recordings/converted/standard/parameters_20231022_161932.yml

@@ -0,0 +1 @@
+../../../.git/annex/objects/P3/Z5/MD5E-s283--3c724d9b59b102b3dab4acbd1023ce43.yml/MD5E-s283--3c724d9b59b102b3dab4acbd1023ce43.yml

+ 1 - 1
recordings/converted/standard/recordings.csv

@@ -1 +1 @@
-../../../.git/annex/objects/fG/W1/MD5E-s266--5b78a6f73f034e09ad02ba52f0b31fdf.csv/MD5E-s266--5b78a6f73f034e09ad02ba52f0b31fdf.csv
+../../../.git/annex/objects/KG/fg/MD5E-s226--e57b826bd1e4b03ad71036d4b931a795.csv/MD5E-s226--e57b826bd1e4b03ad71036d4b931a795.csv

+ 1 - 0
recordings/raw/77021_5/V20230127-070014.WAV

@@ -0,0 +1 @@
+../../../.git/annex/objects/0J/pm/MD5E-s14227968--37d72988c6b6bfa7d0e91a8894c81423.WAV/MD5E-s14227968--37d72988c6b6bfa7d0e91a8894c81423.WAV

+ 9 - 6
scripts/import_eaf_poland.py

@@ -49,7 +49,7 @@ XDS_MAPPING = {
         'U':'U',
         }
 
-BP_RECS = ['77033_5/77033_5.WAV']
+BP_RECS = ['77033_5/77033_5.WAV', '77021_5/V20230127-070014.WAV']
 def convert(filename: str, filter=None, **kwargs) -> pd.DataFrame:
 
     eaf = pympi.Elan.Eaf(filename)
@@ -135,7 +135,7 @@ def convert(filename: str, filter=None, **kwargs) -> pd.DataFrame:
                 
     
     return pd.DataFrame(segments.values())
-BP_REC = ['77033_5.eaf']
+BP_REC = ['V20230127-070014.eaf']
 chunk_break = 300000 #here put in miliseconds approximately how long is the shortest break between annotation chunks
 if __name__ == '__main__' :
     
@@ -148,7 +148,9 @@ if __name__ == '__main__' :
     ])
 
     files['time_seek'] = 0
+    print(files['raw_filename'])
     files['raw_filename'] = files['raw_filename'].apply(os.path.basename)
+    print(files['raw_filename'])
     files['recording_filename'] = files['raw_filename'].apply(lambda x: x.split('.')[-2] + '/' + x.split('.')[-2] + '.WAV')
     # files = files[files['recording_filename'].isin(project.recordings['recording_filename'])]
     files['set'] = 'eaf_2023/ak'
@@ -157,8 +159,9 @@ if __name__ == '__main__' :
     _files = []
 
     for f in files.to_dict(orient='records'):
-        eaf = pympi.Elan.Eaf(Path('./annotations') / 'eaf_2023' / 'ak' / 'raw' / f['raw_filename'].split('.')[0] / f['raw_filename'])
-
+        eaf = pympi.Elan.Eaf(Path('./annotations') / 'eaf_2023' / 'ak' / 'raw' / '77021_5/V20230127-070014.eaf')
+        print(f['raw_filename'])
+        
         df = pd.DataFrame(columns=['range_onset', 'range_offset'])
         eaf.get_full_time_interval()
 
@@ -184,8 +187,8 @@ if __name__ == '__main__' :
         final['range_onset'] = start_times
         final['range_offset'] = end_times
         final['time_seek'] = 0
-        final['raw_filename'] = '77033_5/77033_5.eaf'
-        final['recording_filename'] = '77033_5/77033_5.WAV'
+        final['raw_filename'] = '77021_5/V20230127-070014.eaf'
+        final['recording_filename'] = '77021_5/V20230127-070014.WAV'
         final['format'] = 'eaf'
         final['set'] = 'eaf_2023/ak'