Tripal v1.0 (6.x-1.0)
obo_loader.inc
Go to the documentation of this file.
00001 <?php
00016 function tripal_cv_obo_form(&$form_state = NULL) {
00017 
00018   // get a list of db from chado for user to choose
00019   $sql = "SELECT * FROM {tripal_cv_obo} ORDER BY name";
00020   $results = db_query($sql);
00021 
00022   $obos = array();
00023   $obos[] = '';
00024   while ($obo = db_fetch_object($results)) {
00025 //    $obos[$obo->obo_id] = "$obo->name  | $obo->path";
00026     $obos[$obo->obo_id] = $obo->name;
00027   }
00028 
00029   $form['obo_existing'] = array(
00030     '#type' => 'fieldset',
00031     '#title' => t('Use a Saved Ontology OBO Reference')
00032   );
00033 
00034   $form['obo_new'] = array(
00035     '#type' => 'fieldset',
00036     '#title' => t('Use a New Ontology OBO Reference')
00037   );
00038 
00039   $form['obo_existing']['existing_instructions']= array(
00040     '#value' => t('The Ontology OBO files listed in the drop down below have been automatically added upon
00041                    installation of the Tripal CV module or were added from a previous upload.  Select
00042                    an OBO, then click the submit button to load the vocabulary into the database.  If the
00043                    vocabularies already exist then the ontology will be updated.'),
00044     '#weight'        => -1
00045   );
00046 
00047   $form['obo_existing']['obo_id'] = array(
00048     '#title' => t('Ontology OBO File Reference'),
00049     '#type' => 'select',
00050     '#options' => $obos,
00051     '#weight'        => 0
00052   );
00053 
00054   $form['obo_new']['path_instructions']= array(
00055     '#value' => t('Provide the name and path for the OBO file.  If the vocabulary OBO file
00056                    is stored local to the server provide a file name. If the vocabulry is stored remotely,
00057                    provide a URL.  Only provide a URL or a local file, not both.'),
00058     '#weight'        => 0
00059   );
00060 
00061   $form['obo_new']['obo_name']= array(
00062     '#type'          => 'textfield',
00063     '#title'         => t('New Vocabulary Name'),
00064     '#description'   => t('Please provide a name for this vocabulary.  After upload, this name will appear in the drop down
00065                            list above for use again later.'),
00066     '#weight'        => 1
00067   );
00068 
00069   $form['obo_new']['obo_url']= array(
00070     '#type'          => 'textfield',
00071     '#title'         => t('Remote URL'),
00072     '#description'   => t('Please enter a URL for the online OBO file.  The file will be downloaded and parsed.
00073                            (e.g. http://www.obofoundry.org/ro/ro.obo'),
00074     '#default_value' => $default_desc,
00075     '#weight'        => 2
00076   );
00077 
00078   $form['obo_new']['obo_file']= array(
00079     '#type'          => 'textfield',
00080     '#title'         => t('Local File'),
00081     '#description'   => t('Please enter the full system path for an OBO definition file, or a path within the Drupal
00082                            installation (e.g. /sites/default/files/xyz.obo).  The path must be accessible to the
00083                            server on which this Drupal instance is running.'),
00084     '#default_value' => $default_desc,
00085     '#weight'        => 3
00086   );
00087 
00088   $form['submit'] = array(
00089     '#type'         => 'submit',
00090     '#value'        => t('Submit'),
00091     '#weight'       => 5,
00092     '#executes_submit_callback' => TRUE,
00093   );
00094 
00095   $form['#redirect'] = 'admin/tripal/tripal_cv/obo_loader';
00096 
00097   return $form;
00098 }
00099 
00106 function tripal_cv_obo_form_submit($form, &$form_state) {
00107   global $user;
00108 
00109   $obo_id =  $form_state['values']['obo_id'];
00110   $obo_name =  $form_state['values']['obo_name'];
00111   $obo_url =  $form_state['values']['obo_url'];
00112   $obo_file  = $form_state['values']['obo_file'];
00113 
00114   $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
00115   $obo = db_fetch_object(db_query($sql, $obo_id));
00116 
00117   if ($obo_id) {
00118     $args = array($obo_id);
00119     tripal_add_job("Load OBO $obo->name", 'tripal_cv',
00120        "tripal_cv_load_obo_v1_2_id", $args, $user->uid);
00121   }
00122   else {
00123     if ($obo_url) {
00124       $args = array($obo_name, $obo_url);
00125       tripal_add_job("Load OBO $obo_name", 'tripal_cv',
00126         "tripal_cv_load_obo_v1_2_url", $args, $user->uid);
00127     }
00128     elseif ($obo_file) {
00129       $args = array($obo_name, $obo_file);
00130       tripal_add_job("Load OBO $obo_name", 'tripal_cv',
00131         "tripal_cv_load_obo_v1_2_file", $args, $user->uid);
00132     }
00133   }
00134 }
00135 
00141 function tripal_cv_cvtermpath_form() {
00142 
00143   // get a list of db from chado for user to choose
00144   $sql = "SELECT * FROM {cv} WHERE NOT name = 'tripal' ORDER BY name ";
00145   $results = chado_query($sql);
00146 
00147   $cvs = array();
00148   $cvs[] = '';
00149   while ($cv = db_fetch_object($results)) {
00150     $cvs[$cv->cv_id] = $cv->name;
00151   }
00152 
00153   $form['cvid'] = array(
00154     '#title' => t('Controlled Vocabulary/Ontology Name'),
00155     '#type' => 'select',
00156     '#options' => $cvs,
00157     '#description' => t('The Chado cvtermpath is a database table that provides lineage for ontology terms 
00158       and is useful for quickly finding any ancestor parent of a term.  This table must be populated for each
00159       ontology.  Select a controlled vocabulary for which you would like to upate the cvtermpath.'),
00160   );
00161 
00162   $form['description'] = array(
00163     '#type' => 'item',
00164     '#value' => t("Submit a job to update chado cvtermpath table."),
00165     '#weight' => 1,
00166   );
00167 
00168   $form['button'] = array(
00169     '#type' => 'submit',
00170     '#value' => t('Update cvtermpath'),
00171     '#weight' => 2,
00172   );
00173 
00174   return $form;
00175 }
00176 
00177 
00182 function tripal_cv_load_obo_v1_2_id($obo_id, $jobid = NULL) {
00183 
00184   // get the OBO reference
00185   $sql = "SELECT * FROM {tripal_cv_obo} WHERE obo_id = %d";
00186   $obo = db_fetch_object(db_query($sql, $obo_id));
00187 
00188   // if the reference is for a remote URL then run the URL processing function
00189   if (preg_match("/^http:\/\//", $obo->path) or preg_match("/^ftp:\/\//", $obo->path)) {
00190     tripal_cv_load_obo_v1_2_url($obo->name, $obo->path, $jobid, 0);
00191   }
00192   // if the reference is for a local file then run the file processing function
00193   else {
00194     // check to see if the file is located local to Drupal
00195     $dfile = $_SERVER['DOCUMENT_ROOT'] . base_path() . $obo->path;
00196     if (file_exists($dfile)) {
00197       tripal_cv_load_obo_v1_2_file($obo->name, $dfile , $jobid, 0);
00198     }
00199     // if not local to Drupal, the file must be someplace else, just use
00200     // the full path provided
00201     else {
00202       if (file_exists($obo->path)) {
00203         tripal_cv_load_obo_v1_2_file($obo->name, $obo->path, $jobid, 0);
00204       }
00205       else {
00206         print "ERROR: counld not find OBO file: '$obo->path'\n";
00207       }
00208     }
00209   }
00210 }
00211 
00216 function tripal_cv_load_obo_v1_2_file($obo_name, $file, $jobid = NULL, $is_new = TRUE) {
00217   $newcvs = array();
00218   
00219   // TODO: need better error detection
00220   
00221   tripal_cv_load_obo_v1_2($file, $jobid, $newcvs);
00222   if ($is_new) {
00223     tripal_cv_load_obo_add_ref($obo_name, $file);
00224   }
00225   print "Ontology Sucessfully loaded!\n";
00226 
00227   // update the cvtermpath table
00228   tripal_cv_load_update_cvtermpath($newcvs, $jobid);
00229   
00230 
00231 }
00232 
00237 function tripal_cv_load_obo_v1_2_url($obo_name, $url, $jobid = NULL, $is_new = TRUE) {
00238 
00239   $newcvs = array();
00240 
00241   // first download the OBO
00242   $temp = tempnam(sys_get_temp_dir(), 'obo_');
00243   print "Downloading URL $url, saving to $temp\n";
00244   $url_fh = fopen($url, "r");
00245   $obo_fh = fopen($temp, "w");
00246   if (!$url_fh) {
00247     tripal_cv_obo_quiterror("Unable to download the remote OBO file at $url. Could a firewall be blocking outgoing connections? ".
00248           " if you are unable to download the file you may manually downlod the OBO file and use the web interface to ".
00249           " specify the location of the file on your server.");
00250 
00251   }
00252   while (!feof($url_fh)) {
00253     fwrite($obo_fh, fread($url_fh, 255), 255);
00254   }
00255   fclose($url_fh);
00256   fclose($obo_fh);
00257 
00258   // second, parse the OBO
00259   tripal_cv_load_obo_v1_2($temp, $jobid, $newcvs);
00260 
00261   // now remove the temp file
00262   unlink($temp);
00263 
00264   if ($is_new) {
00265     tripal_cv_load_obo_add_ref($obo_name, $url);
00266   }
00267 
00268   // update the cvtermpath table
00269   tripal_cv_load_update_cvtermpath($newcvs, $jobid);
00270 
00271   print "Ontology Sucessfully loaded!\n";
00272 }
00273 
00278 function tripal_cv_load_update_cvtermpath($newcvs, $jobid) {
00279 
00280   print "\nUpdating cvtermpath table.  This may take a while...\n";
00281   foreach ($newcvs as $namespace => $cvid) {
00282     tripal_cv_update_cvtermpath($cvid, $jobid);
00283   }
00284 }
00285 
00289 function tripal_cv_load_obo_add_ref($name, $path) {
00290   $isql = "INSERT INTO {tripal_cv_obo} (name,path) VALUES ('%s','%s')";
00291   db_query($isql, $name, $path);
00292 }
00293 
00298 function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
00299   
00300   $header = array();
00301     
00302   // make sure our temporary table exists
00303   $ret = array(); 
00304   if (!db_table_exists('tripal_obo_temp')) { 
00305     $schema = tripal_cv_get_custom_tables('tripal_obo_temp');  
00306     $success = tripal_core_create_custom_table($ret, 'tripal_obo_temp', $schema['tripal_obo_temp']);
00307     if (!$success) {
00308       watchdog('T_obo_loader', "Cannot create temporary loading table", array(), WATCHDOG_ERROR); 
00309       return;
00310     } 
00311   }
00312   // empty the temp table
00313   $sql = "DELETE FROM tripal_obo_temp";
00314   chado_query($sql);
00315 
00316   // get a persistent connection
00317   $connection = tripal_db_persistent_chado();
00318   if (!$connection) {
00319      print "A persistant connection was not obtained. Loading will be slow\n";
00320   }
00321           
00322   // if we cannot get a connection then let the user know the loading will be slow
00323   tripal_db_start_transaction();
00324   if ($connection) {
00325      print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" .
00326            "If the load fails or is terminated prematurely then the entire set of \n" .
00327            "insertions/updates is rolled back and will not be found in the database\n\n";
00328   }
00329 
00330   print "Step 1: Preloading File $file\n";  
00331 
00332   // make sure we have an 'internal' and a '_global' database
00333   if (!tripal_db_add_db('internal')) {
00334     tripal_cv_obo_quiterror("Cannot add 'internal' database");
00335   }
00336   if (!tripal_db_add_db('_global')) {
00337     tripal_cv_obo_quiterror("Cannot add '_global' database");
00338   }
00339 
00340   // parse the obo file
00341   $default_db = tripal_cv_obo_parse($file, $header, $jobid);
00342 
00343   // add the CV for this ontology to the database
00344   $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
00345   if (!$defaultcv) {
00346     tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
00347   }
00348   $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
00349 
00350   // add any typedefs to the vocabulary first
00351   $sql = "
00352     SELECT * FROM tripal_obo_temp
00353     WHERE type = 'Typedef' 
00354   ";
00355   $typedefs = chado_query($sql);
00356   while ($typedef = db_fetch_object($typedefs)) {
00357     $term = unserialize(base64_decode($typedef->stanza));
00358     tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db);
00359   }
00360 
00361   // next add terms to the vocabulary
00362   print "\nStep 2: Loading terms...\n";  
00363   if (!tripal_cv_obo_process_terms($defaultcv->name, $jobid, $newcvs, $default_db)) {
00364     tripal_cv_obo_quiterror('Cannot add terms from this ontology');
00365   }
00366 
00367   // transaction is complete
00368   tripal_db_commit_transaction();
00369   return;
00370 }
00371 
00376 function tripal_cv_obo_quiterror($message) {
00377 
00378   watchdog("T_obo_loader", $message, array(), WATCHDOG_ERROR);;
00379   exit;
00380 
00381 }
00382 
00387 function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) {
00388 
00389   $i = 0;
00390 
00391   // iterate through each term from the OBO file and add it
00392   $sql = "
00393     SELECT * FROM tripal_obo_temp
00394     WHERE type = 'Term' 
00395     ORDER BY id
00396   ";
00397   $terms = chado_query($sql);
00398   $count = pg_num_rows($terms);
00399   
00400   // calculate the interval for updates
00401   $interval = intval($count * 0.0001);
00402   if ($interval < 1) {
00403     $interval = 1;
00404   }
00405   while($t = db_fetch_object($terms)) {
00406     $term = unserialize(base64_decode($t->stanza));
00407     
00408     // update the job status every interval
00409     if ($jobid and $i % $interval == 0) {
00410       $complete = ($i / $count) * 50;
00411       tripal_job_set_progress($jobid + 50, intval($complete)); 
00412       printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));                                                             
00413     }                                 
00414     
00415     // add/update this term
00416     if (!tripal_cv_obo_process_term($term, $defaultcv, 0, $newcvs, $default_db)) {
00417       tripal_cv_obo_quiterror("Failed to process terms from the ontology");
00418     }
00419 
00420     $i++;
00421   }
00422   
00423   // set the final status
00424   if ($jobid) {
00425     $complete = ($i / $count) * 50;
00426     tripal_job_set_progress($jobid + 50, intval($complete)); 
00427     printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));
00428   }                                                             
00429   
00430   return 1;
00431 }
00432 
00437 function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) {
00438  
00439   // construct the term array for sending to the tripal_cv_add_cvterm function
00440   // for adding a new cvterm
00441   $t = array(); 
00442   $t['id'] = $term['id'][0];
00443   $t['name'] = $term['name'][0];
00444   if (array_key_exists('def', $term)) {
00445     $t['def'] = $term['def'][0];
00446   }
00447   if (array_key_exists('subset', $term)) {
00448     $t['subset'] = $term['subset'][0];  
00449   }  
00450   if (array_key_exists('namespace', $term)) {
00451     $t['namespace'] = $term['namespace'][0];
00452   }
00453   if (array_key_exists('is_obsolete', $term)) {
00454     $t['is_obsolete'] = $term['is_obsolete'][0];
00455   } 
00456   
00457   // add the cvterm
00458   $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db); 
00459   if (!$cvterm) {
00460     tripal_cv_obo_quiterror("Cannot add the term " . $term['id']);
00461   }  
00462  
00463   if (array_key_exists('namespace', $term)) {
00464     $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
00465   }
00466   
00467   // now handle other properites
00468   if (array_key_exists('is_anonymous', $term)) {
00469     //print "WARNING: unhandled tag: is_anonymous\n";
00470   }
00471   if (array_key_exists('alt_id', $term)) {
00472     foreach ($term['alt_id'] as $alt_id) {
00473       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
00474         tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
00475       }
00476     }
00477   }
00478   
00479   if (array_key_exists('subset', $term)) {
00480     //print "WARNING: unhandled tag: subset\n";
00481   }
00482   // add synonyms for this cvterm
00483   if (array_key_exists('synonym', $term)) {    
00484     if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
00485       tripal_cv_obo_quiterror("Cannot add synonyms");
00486     }
00487   }
00488 
00489   // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
00490   // types to be of the v1.2 standard
00491   if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
00492     if (array_key_exists('exact_synonym', $term)) {
00493       foreach ($term['exact_synonym'] as $synonym) {
00494         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
00495         $term['synonym'][] = $new;
00496       }
00497     }
00498     if (array_key_exists('narrow_synonym', $term)) {
00499       foreach ($term['narrow_synonym'] as $synonym) {
00500         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
00501         $term['synonym'][] = $new;
00502       }
00503     }
00504     if (array_key_exists('broad_synonym', $term)) {
00505       foreach ($term['broad_synonym'] as $synonym) {
00506         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
00507         $term['synonym'][] = $new;
00508       }
00509     }
00510 
00511     if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
00512       tripal_cv_obo_quiterror("Cannot add/update synonyms");
00513     }
00514   }
00515   
00516   // add the comment to the cvtermprop table
00517   if (array_key_exists('comment', $term)) {
00518     $comments = $term['comment'];
00519     $j = 0;
00520     foreach ($comments as $comment) {
00521       if (!tripal_cv_obo_add_cvterm_prop($cvterm, 'comment', $comment, $j)) {
00522         tripal_cv_obo_quiterror("Cannot add/update cvterm property");
00523       }
00524       $j++;
00525     }
00526   }
00527 
00528   // add any other external dbxrefs
00529   if (array_key_exists('xref', $term)) {
00530     foreach ($term['xref'] as $xref) {
00531       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
00532         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
00533       }
00534     }
00535   }
00536   
00537   if (array_key_exists('xref_analog', $term)) {
00538     foreach ($term['xref_analog'] as $xref) {
00539       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
00540         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
00541       }
00542     }
00543   }
00544   if (array_key_exists('xref_unk', $term)) {
00545     foreach ($term['xref_unk'] as $xref) {
00546       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
00547         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
00548       }
00549     }
00550   }
00551 
00552   // add is_a relationships for this cvterm
00553   if (array_key_exists('is_a', $term)) {
00554     foreach ($term['is_a'] as $is_a) {
00555       if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
00556         tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
00557       }
00558     }
00559   }
00560 
00561   if (array_key_exists('intersection_of', $term)) {
00562     //print "WARNING: unhandled tag: intersection_of\n";
00563   }
00564   if (array_key_exists('union_of', $term)) {
00565     //print "WARNING: unhandled tag: union_on\n";
00566   }
00567   if (array_key_exists('disjoint_from', $term)) {
00568     //print "WARNING: unhandled tag: disjoint_from\n";
00569   }
00570   if (array_key_exists('relationship', $term)) {
00571     foreach ($term['relationship'] as $value) {
00572       $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
00573       $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
00574       if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
00575         tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
00576       }
00577     }
00578   }
00579   if (array_key_exists('replaced_by', $term)) {
00580    //print "WARNING: unhandled tag: replaced_by\n";
00581   }
00582   if (array_key_exists('consider', $term)) {
00583     //print "WARNING: unhandled tag: consider\n";
00584   }
00585   if (array_key_exists('use_term', $term)) {
00586     //print "WARNING: unhandled tag: user_term\n";
00587   }
00588   if (array_key_exists('builtin', $term)) {
00589     //print "WARNING: unhandled tag: builtin\n";
00590   }
00591   return 1;
00592 }
00593 
00599 function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, 
00600   $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
00601 
00602   // make sure the relationship cvterm exists
00603   $term = array(
00604     'name' => $rel,
00605     'id' => "$default_db:$rel",
00606     'definition' => '',
00607     'is_obsolete' => 0,
00608   );
00609   $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, $default_db);
00610   
00611   if (!$relcvterm) {
00612     // if the relationship term couldn't be found in the default_db provided 
00613     // then do on more check to find it in the relationship ontology
00614     $term = array(
00615       'name' => $rel,
00616       'id' => "OBO_REL:$rel",
00617       'definition' => '',
00618       'is_obsolete' => 0,
00619     ); 
00620     $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0, 'OBO_REL');
00621     if (!$relcvterm) {
00622       tripal_cv_obo_quiterror("Cannot find the relationship term in the current ontology or in the relationship ontology: $rel\n");
00623     }
00624   }
00625 
00626   // get the object term
00627   $oterm = tripal_cv_obo_get_term($objname);
00628   if (!$oterm) {
00629     tripal_cv_obo_quiterror("Could not find object term $objname\n");
00630   }
00631   
00632   $objterm = array(); 
00633   $objterm['id']            = $oterm['id'][0];
00634   $objterm['name']          = $oterm['name'][0];
00635   if (array_key_exists('def', $oterm)) {
00636     $objterm['def']           = $oterm['def'][0];
00637   }
00638   if (array_key_exists('subset', $oterm)) {
00639     $objterm['subset']      = $oterm['subset'][0];  
00640   }  
00641   if (array_key_exists('namespace', $oterm)) {
00642     $objterm['namespace']   = $oterm['namespace'][0];
00643   }
00644   if (array_key_exists('is_obsolete', $oterm)) {
00645     $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
00646   }
00647   $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1, $default_db);  
00648   if (!$objcvterm) {
00649     tripal_cv_obo_quiterror("Cannot add cvterm " . $oterm['name'][0]);
00650   }
00651 
00652   // check to see if the cvterm_relationship already exists, if not add it
00653   $values = array(
00654     'type_id'    => $relcvterm->cvterm_id,
00655     'subject_id' => $cvterm->cvterm_id,
00656     'object_id'  => $objcvterm->cvterm_id
00657   );
00658   $options = array('statement_name' => 'sel_cvtermrelationship_tysuob');
00659   $result = tripal_core_chado_select('cvterm_relationship', array('*'), $values, $options);
00660   if (count($result) == 0) {
00661     $options = array(
00662       'statement_name' => 'ins_cvtermrelationship_tysuob',
00663       'return_record' => FALSE
00664     );
00665     $success = tripal_core_chado_insert('cvterm_relationship', $values, $options);
00666     if (!$success) {
00667       tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
00668     }
00669   }
00670 
00671   return TRUE;
00672 }
00673 
00678 function tripal_cv_obo_get_term($id) {
00679   $values = array('id' => $id);
00680   $options = array('statement_name' => 'sel_tripalobotemp_id');
00681   $result = tripal_core_chado_select('tripal_obo_temp', array('stanza'), $values, $options);
00682   if (count($result) == 0) {
00683     return FALSE;
00684   }
00685   return unserialize(base64_decode($result[0]->stanza));
00686 }
00687 
00692 function tripal_cv_obo_add_synonyms($term, $cvterm) {
00693 
00694   // make sure we have a 'synonym_type' vocabulary
00695   $syncv = tripal_cv_add_cv('synonym_type', 'A vocabulary added by the Tripal CV module OBO loader for storing synonym types.');
00696 
00697   // now add the synonyms
00698   if (array_key_exists('synonym', $term)) {
00699     foreach ($term['synonym'] as $synonym) {
00700       
00701       // separate out the synonym definition and the synonym type
00702       $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
00703       // the scope will be 'EXACT', etc...
00704       $scope = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
00705       if (!$scope) {  // if no scope then default to 'exact'
00706         $scope = 'exact'; 
00707       } 
00708 
00709       // make sure the synonym type exists in the 'synonym_type' vocabulary
00710       $values = array(
00711         'name' => $scope,
00712         'cv_id' => array(
00713           'name' => 'synonym_type',
00714         ),
00715       );
00716       $options = array('statement_name' => 'sel_cvterm_nacv', 'is_updlicate' => 1);
00717       $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
00718 
00719       // if it doesn't exist then add it
00720       if (!$results) {
00721         // build a 'term' object so we can add the missing term
00722         $term = array(
00723            'name' => $scope,
00724            'id' => "internal:$scope",
00725            'definition' => '',
00726            'is_obsolete' => 0,
00727         );
00728         $syntype = tripal_cv_add_cvterm($term, $syncv->name, 0, 1);
00729         if (!$syntype) {
00730           tripal_cv_obo_quiterror("Cannot add synonym type: internal:$scope");
00731         }
00732       }
00733       else {
00734         $syntype = $results[0];
00735       }
00736 
00737       // make sure the synonym doesn't already exists
00738       $values = array(
00739         'cvterm_id' => $cvterm->cvterm_id, 
00740         'synonym' => $def
00741       );
00742       $options = array('statement_name' => 'sel_cvtermsynonym_cvsy');
00743       $results = tripal_core_chado_select('cvtermsynonym', array('*'), $values, $options);
00744       if (count($results) == 0) {
00745         $values = array(
00746           'cvterm_id' => $cvterm->cvterm_id, 
00747           'synonym' => $def,
00748           'type_id' => $syntype->cvterm_id
00749         );
00750         $options = array(
00751           'statement_name' => 'ins_cvtermsynonym_cvsy',
00752           'return_record' => FALSE
00753         );
00754         $success = tripal_core_chado_insert('cvtermsynonym', $values, $options);
00755         if (!$success) {
00756           tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
00757         }
00758       }
00759 
00760       // now add the dbxrefs for the synonym if we have a comma in the middle
00761       // of a description then this will cause problems when splitting os lets
00762       // just change it so it won't mess up our splitting and then set it back
00763       // later.
00774     }
00775   }
00776 
00777   return TRUE;
00778 }
00779 
00785 function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
00786   $in_header = 1;
00787   $stanza = array();
00788   $default_db = '_global';
00789   $line_num = 0;
00790   $num_read = 0;
00791   $intv_read = 0;
00792   
00793   $filesize = filesize($obo_file); 
00794   $interval = intval($filesize * 0.01);
00795   if ($interval < 1) {
00796     $interval = 1;
00797   } 
00798 
00799   // iterate through the lines in the OBO file and parse the stanzas
00800   $fh = fopen($obo_file, 'r');
00801   while ($line = fgets($fh)) {
00802     
00803     $line_num++;
00804     $size = drupal_strlen($line);
00805     $num_read += $size;
00806     $intv_read += $size; 
00807     $line = trim($line);      
00808 
00809     // update the job status every 1% features
00810     if ($jobid and $intv_read >= $interval) {            
00811       $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
00812       print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
00813       tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 50));
00814       $intv_read = 0;      
00815     }
00816     
00817     // remove newlines
00818     $line = rtrim($line);
00819         
00820     // remove any special characters that may be hiding
00821     $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
00822 
00823     // skip empty lines
00824     if (strcmp($line, '') == 0) {
00825       continue;
00826     }
00827 
00828     //remove comments from end of lines
00829     $line = preg_replace('/^(.*?)\!.*$/', '\1', $line);  // TODO: if the explamation is escaped
00830 
00831     // at the first stanza we're out of header
00832     if (preg_match('/^\s*\[/', $line)) {  
00833       $in_header = 0;
00834 
00835       // store the stanza we just finished reading
00836       if (sizeof($stanza) > 0) {
00837         // add the term to the temp table
00838         $values = array(
00839           'id' => $stanza['id'][0],
00840           'stanza' => base64_encode(serialize($stanza)),
00841           'type' => $type,
00842         );
00843         $options = array('statement_name' => 'ins_tripalobotemp_all');
00844         $success = tripal_core_chado_insert('tripal_obo_temp', $values, $options);
00845         if (!$success) {
00846           watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
00847           exit;
00848         }
00849         
00850       }
00851       // get the stanza type:  Term, Typedef or Instance
00852       $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
00853 
00854       // start fresh with a new array
00855       $stanza = array();
00856       continue;
00857     }
00858     // break apart the line into the tag and value but ignore any escaped colons
00859     preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
00860     $pair = explode(":", $line, 2);
00861     $tag = $pair[0];
00862     $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
00863     
00864     // if this is the ID then look for the default DB
00865     $matches = array();
00866     if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
00867        $default_db = $matches[1];
00868     }
00869     
00870     $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
00871     $value = preg_replace("/\|-\|-\|/", "\:", $value);
00872     if ($in_header) {
00873       if (!array_key_exists($tag, $header)) {
00874         $header[$tag] = array();
00875       }
00876       $header[$tag][] = $value;
00877     }
00878     else {
00879       if (!array_key_exists($tag, $stanza)) {
00880         $stanza[$tag] = array();
00881       }
00882       $stanza[$tag][] = $value;
00883     }
00884   }
00885   // now add the last term in the file
00886   if (sizeof($stanza) > 0) {
00887     $values = array(
00888       'id' => $stanza['id'][0],
00889       'stanza' => base64_encode(serialize($stanza)),
00890       'type' => $type,
00891     );
00892     $options = array('statement_name' => 'ins_tripalobotemp_all');
00893     tripal_core_chado_insert('tripal_obo_temp', $values, $options);
00894     if (!$success) {
00895       watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
00896       exit;
00897     }
00898   }
00899   return $default_db;
00900 }
00901 
00907 function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
00908 
00909   $dbname = preg_replace('/^(.+?):.*$/', '$1', $xref);
00910   $accession = preg_replace('/^.+?:\s*(.*?)(\{.+$|\[.+$|\s.+$|\".+$|$)/', '$1', $xref);
00911   $description = preg_replace('/^.+?\"(.+?)\".*?$/', '$1', $xref);
00912   $dbxrefs = preg_replace('/^.+?\[(.+?)\].*?$/', '$1', $xref);
00913 
00914   if (!$accession) {
00915     tripal_cv_obo_quiterror();
00916     watchdog("T_obo_loader", "Cannot add a dbxref without an accession: '$xref'", NULL, WATCHDOG_WARNING);
00917     return FALSE;
00918   }
00919 
00920   // if the xref is a database link, handle that specially
00921   if (strcmp($dbname, 'http') == 0) {
00922     $accession = $xref;
00923     $dbname = 'URL';
00924   }
00925 
00926   // add the database
00927   $db = tripal_db_add_db($dbname);
00928   if (!$db) {
00929     tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
00930   }
00931 
00932   // now add the dbxref
00933   $dbxref = tripal_cv_obo_add_dbxref($db->db_id, $accession, '', $description);
00934   if (!$dbxref) {
00935     tripal_cv_obo_quiterror("Cannot find or add the database reference (dbxref)");
00936   }
00937 
00938   // finally add the cvterm_dbxref but first check to make sure it exists
00939   $values = array(
00940     'cvterm_id' => $cvterm->cvterm_id,
00941     'dbxref_id' => $dbxref->dbxref_id,
00942   );
00943   $options = array('statement_name' => 'sel_cvtermdbxref_cvdb');
00944   $result = tripal_core_chado_select('cvterm_dbxref', array('*'), $values, $options);
00945   if (count($result) == 0) {    
00946     $ins_options = array(
00947       'statement_name' => 'ins_cvtermdbxref_cvdb',
00948       'return_record' => FALSE
00949     );
00950     $result = tripal_core_chado_insert('cvterm_dbxref', $values, $ins_options);
00951     if (!$result){
00952       tripal_cv_obo_quiterror("Cannot add cvterm_dbxref: $xref");
00953       return FALSE;
00954     }
00955   }
00956 
00957   return TRUE;
00958 }
00959 
00964 function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
00965 
00966   // make sure the 'cvterm_property_type' CV exists
00967   $cv = tripal_cv_add_cv('cvterm_property_type', '');
00968   if (!$cv) {
00969     tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
00970   }
00971 
00972   // get the property type cvterm.  If it doesn't exist then we want to add it
00973   $values = array(
00974     'name' => $property,
00975     'cv_id' => $cv->cv_id,
00976   );
00977   $options = array('statement_name' => 'sel_cvterm_nacv_na');
00978   $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
00979   if (count($results) == 0) {    
00980     $term = array(
00981       'name' => $property,
00982       'id' => "internal:$property",
00983       'definition' => '',
00984       'is_obsolete' => 0,
00985     );
00986     $cvproptype = tripal_cv_add_cvterm($term, $cv->name, 0, 0);
00987     if (!$cvproptype) {
00988       tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
00989       return FALSE;
00990     }
00991   }
00992   else {
00993     $cvproptype = $results[0];
00994   }
00995 
00996   // remove any properties that currently exist for this term.  We'll reset them
00997   if ($rank == 0) {
00998     $values = array('cvterm_id' => $cvterm->cvterm_id);
00999     $options = array('statement_name' => 'del_cvtermprop_cv');
01000     $success = tripal_core_chado_delete('cvtermprop', $values, $options);
01001     if (!$success) {
01002        tripal_cv_obo_quiterror("Could not remove existing properties to update property $property for term\n");
01003        return FALSE;
01004     }    
01005   }
01006 
01007   // now add the property
01008   $values = array(
01009     'cvterm_id' => $cvterm->cvterm_id,
01010     'type_id' => $cvproptype->cvterm_id,
01011     'value' => $value,
01012     'rank' => $rank,
01013   );
01014   $options = array(
01015     'statement_name' => 'ins_cvtermprop_cvtyvara',
01016     'return_record' => FALSE,
01017   );
01018   $result = tripal_core_chado_insert('cvtermprop', $values, $options);
01019   if (!$result) {
01020     tripal_cv_obo_quiterror("Could not add property $property for term\n");
01021     return FALSE;
01022   }
01023   return TRUE;
01024 }
01025 
01026 
01031 function tripal_cv_obo_add_dbxref($db_id, $accession, $version='', $description='') {
01032 
01033   // check to see if the dbxref exists if not, add it
01034   $values = array(
01035     'db_id' => $db_id,
01036     'accession' => $accession,
01037   );
01038   $options = array('statement_name' => 'sel_dbxref_idac');
01039   $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options);
01040   if (count($result) == 0){
01041     $ins_values = array(
01042       'db_id'       => $db_id,
01043       'accession'   => $accession,
01044       'version'     => $version,
01045       'description' => $description,
01046     );
01047     $ins_options = array(
01048       'statement_name' => 'ins_dbxref_idacvede',
01049       'return_record' => FALSE
01050     );
01051     $result = tripal_core_chado_insert('dbxref', $ins_values, $ins_options);
01052     if (!$result) {
01053       tripal_cv_obo_quiterror("Failed to insert the dbxref record $accession");
01054       return FALSE;
01055     } 
01056     $result = tripal_core_chado_select('dbxref', array('dbxref_id'), $values, $options);    
01057   }
01058   return $result[0];
01059 }
01060 
 All Classes Files Functions Variables