Tripal v1.0 (6.x-1.0)
syncFeatures.inc
Go to the documentation of this file.
00001 <?php
00002 
00009 # This script can be run as a stand-alone script to sync all the features from chado to drupal
00010 // Parameter f specifies the feature_id to sync
00011 // -f 0 will sync all features
00012 
00013 $arguments = getopt("f:");
00014 
00015 if (isset($arguments['f'])) {
00016   $drupal_base_url = parse_url('http://www.example.com');
00017   $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
00018   $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
00019   $_SERVER['REMOTE_ADDR'] = NULL;
00020   $_SERVER['REQUEST_METHOD'] = NULL;
00021 
00022   require_once 'includes/bootstrap.inc';
00023   drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
00024 
00025   $feature_id = $arguments['f'];
00026 
00027   if ($feature_id > 0 ) {
00028     tripal_feature_sync_feature($feature_id);
00029   }
00030   else{
00031     print "syncing all features...\n";
00032     tripal_feature_sync_features();
00033   }
00034 }
00038 function tripal_feature_sync_form() {
00039 
00040   $form['description'] = array(
00041   '#type' => 'item',
00042   '#value' => t("Add feature types, optionally select an organism and ".
00043      "click the 'Sync all Features' button to create Drupal ".
00044      "content for features in chado. Only features of the types listed ".
00045      "below in the Feature Types box will be synced. You may limit the ".
00046      "features to be synced by a specific organism. Depending on the ".
00047      "number of features in the chado database this may take a long ".
00048      "time to complete. "),
00049   );
00050 
00051   $form['feature_types'] = array(
00052     '#title'       => t('Feature Types'),
00053     '#type'        => 'textarea',
00054     '#description' => t('Enter the names of the sequence types that the ".
00055        "site will support with independent pages.  Pages for these data ".
00056        "types will be built automatically for features that exist in the ".
00057        "chado database.  The names listed here should be spearated by ".
00058        "spaces or entered separately on new lines. The names must match ".
00059        "exactly (spelling and case) with terms in the sequence ontology'),
00060     '#required'    => TRUE,
00061     '#default_value' => variable_get('chado_sync_feature_types', 'gene contig'),
00062   );
00063 
00064   // get the list of organisms
00065   $sql = "SELECT * FROM {organism} ORDER BY genus, species";
00066   $orgs = tripal_organism_get_synced();
00067   $organisms[] = '';
00068   foreach ($orgs as $organism) {
00069     $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
00070   }
00071   $form['organism_id'] = array(
00072     '#title'       => t('Organism'),
00073     '#type'        => t('select'),
00074     '#description' => t("Choose the organism for which features set above will be synced. Only organisms which also have been synced will appear in this list."),
00075     '#options'     => $organisms,
00076   );
00077 
00078 
00079   $form['button'] = array(
00080     '#type' => 'submit',
00081     '#value' => t('Sync all Features'),
00082     '#weight' => 3,
00083   );
00084 
00085   return $form;
00086 }
00090 function tripal_feature_sync_form_validate($form, &$form_state) {
00091   $organism_id   = $form_state['values']['organism_id'];
00092   $feature_types = $form_state['values']['feature_types'];
00093 
00094   // nothing to do
00095 }
00099 function tripal_feature_sync_form_submit($form, &$form_state) {
00100 
00101   global $user;
00102 
00103   $organism_id   = $form_state['values']['organism_id'];
00104   $feature_types = $form_state['values']['feature_types'];
00105 
00106   $job_args = array(0, $organism_id, $feature_types);
00107 
00108   if ($organism_id) {
00109     $organism = tripal_core_chado_select('organism', array('genus', 'species'), array('organism_id' => $organism_id));
00110     $title = "Sync all features for " .  $organism[0]->genus . " " . $organism[0]->species;
00111   }
00112   else {
00113     $title = t('Sync all features for all synced organisms');
00114   }
00115 
00116   variable_set('chado_sync_feature_types', $feature_types);
00117 
00118   tripal_add_job($title, 'tripal_feature',
00119     'tripal_feature_sync_features', $job_args, $user->uid);
00120 }
00124 function tripal_feature_set_urls($job_id = NULL) {
00125   // first get the list of features that have been synced
00126   $sql = "SELECT * FROM {chado_feature}";
00127   $nodes = db_query($sql);
00128   while ($node = db_fetch_object($nodes)) {
00129     // now get the feature details
00130     $sql = "SELECT * 
00131             FROM feature F
00132               INNER JOIN organism O on O.organism_id = F.organism_id
00133             WHERE F.feature_id = %d";     
00134     $feature = db_fetch_object(chado_query($sql, $node->feature_id));
00135     if ($feature) {
00136       tripal_feature_set_feature_url($node, $feature);
00137     }
00138   }
00139 }
00143 function tripal_feature_set_feature_url($node, $feature) {
00144 
00145   // determine which URL alias to use
00146   $alias_type = variable_get('chado_feature_url', 'internal ID');
00147   $aprefix = variable_get('chado_feature_accession_prefix', 'ID');  
00148   $genus = preg_replace('/\s/', '_', strtolower($feature->genus));
00149   $species = preg_replace('/\s/', '_', strtolower($feature->species));    
00150   switch ($alias_type) {
00151     case 'feature name':
00152       $url_alias = $feature->name;
00153       break;
00154     case 'feature unique name':
00155       $url_alias = $feature->uniquename;
00156       break;
00157     case 'genus_species_uqname':
00158       $url_alias = $genus . "/" . $genus . "_" . $species . "/" . $feature->uniquename;
00159       break;
00160     case 'genus species name':
00161       $url_alias = $genus . "/" . $genus . "_" . $species . "/" . $feature->name;
00162       break;
00163     default:
00164       $url_alias = "$aprefix$feature->feature_id";
00165   }
00166   print "Setting URL alias for $feature->name: node/$node->nid => $url_alias\n";
00167   // remove any previous alias
00168   db_query("DELETE FROM {url_alias} WHERE src = '%s'", "node/$node->nid");
00169   // add the new alias
00170   path_set_alias("node/$node->nid", $url_alias);
00171 }
00177 function tripal_feature_sync_features($max_sync = 0, $organism_id = NULL,
00178   $feature_types = NULL, $job_id = NULL) {
00179   //print "Syncing features (max of $max_sync)\n";
00180   $i = 0;
00181 
00182   // get the list of available sequence ontology terms for which
00183   // we will build drupal pages from features in chado.  If a feature
00184   // is not one of the specified typse we won't build a node for it.
00185   if (!$feature_types) {
00186     $allowed_types = variable_get('chado_sync_feature_types', 'gene contig');
00187   }
00188   else {
00189     $allowed_types = $feature_types;
00190   }
00191   $allowed_types = preg_replace("/[\s\n\r]+/", " ", $allowed_types);
00192 
00193   print "Looking for features of type: $allowed_types\n";
00194 
00195   $so_terms = split(' ', $allowed_types);
00196   $where_cvt = "";
00197   foreach ($so_terms as $term) {
00198     $where_cvt .= "CVT.name = '$term' OR ";
00199   }
00200   $where_cvt = drupal_substr($where_cvt, 0, drupal_strlen($where_cvt)-3);  # strip trailing 'OR'
00201 
00202   // get the list of organisms that are synced and only include features from
00203   // those organisms
00204   $orgs = tripal_organism_get_synced();
00205   $where_org = "";
00206   foreach ($orgs as $org) {
00207     if ($organism_id) {
00208       if ($org->organism_id and $org->organism_id == $organism_id) {
00209         $where_org .= "F.organism_id = $org->organism_id OR ";
00210       }
00211     }
00212     else {
00213     if ($org->organism_id) {
00214       $where_org .= "F.organism_id = $org->organism_id OR ";
00215     }
00216     }
00217   }
00218   $where_org = drupal_substr($where_org, 0, drupal_strlen($where_org)-3);  # strip trailing 'OR'
00219 
00220   // use this SQL statement to get the features that we're going to upload
00221   $sql = "SELECT feature_id ".
00222         "FROM {FEATURE} F ".
00223         "  INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
00224         "  INNER JOIN CV on CV.cv_id = CVT.cv_id ".
00225         "WHERE ($where_cvt) AND ($where_org) AND CV.name = 'sequence' ".
00226         "ORDER BY feature_id";
00227 
00228   // get the list of features
00229   $results = chado_query($sql);
00230 
00231   // load into ids array
00232   $count = 0;
00233   $ids = array();
00234   while ($id = db_fetch_object($results)) {
00235     $ids[$count] = $id->feature_id;
00236     $count++;
00237   }
00238 
00239   // make sure our vocabularies are set before proceeding
00240   tripal_feature_set_vocabulary();
00241 
00242   // pre-create the SQL statement that will be used to check
00243   // if a feature has already been synced.  We skip features
00244   // that have been synced
00245   $sql = "SELECT * FROM {chado_feature} WHERE feature_id = %d";
00246 
00247   // Iterate through features that need to be synced
00248   $interval = intval($count * 0.01);
00249   if ($interval < 1) {
00250     $interval = 1;
00251   }
00252   $num_ids = sizeof($ids);
00253   $i = 0;
00254   foreach ($ids as $feature_id) {
00255     // update the job status every 1% features
00256     if ($job_id and $i % $interval == 0) {
00257       tripal_job_set_progress($job_id, intval(($i/$count)*100));
00258     }
00259     // if we have a maximum number to sync then stop when we get there
00260     // if not then just continue on
00261     if ($max_sync and $i == $max_sync) {
00262       return '';
00263     }
00264     if (!db_fetch_object(db_query($sql, $feature_id))) {
00265 
00266       # parsing all the features can cause memory overruns
00267       # we are not sure why PHP does not clean up the memory as it goes
00268       # to avoid this problem we will call this script through an
00269       # independent system call
00270       print "$i of $num_ids Syncing feature id: $feature_id\n";
00271       $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/includes/syncFeatures.inc -f $feature_id ";
00272       system($cmd);
00273 
00274     }
00275     $i++;
00276   }
00277 
00278   return '';
00279 }
00280 
00286 function tripal_feature_sync_feature($feature_id) {
00287   //print "\tSyncing feature $feature_id\n";
00288 
00289   global $user;
00290   $create_node = 1;   // set to 0 if the node exists and we just sync and not create
00291 
00292   // get the accession prefix
00293   $aprefix = variable_get('chado_feature_accession_prefix', 'ID');
00294 
00295   // if we don't have a feature_id then return
00296   if (!$feature_id) {
00297     drupal_set_message(t("Please provide a feature_id to sync"));
00298     return '';
00299   }
00300 
00301   // get information about this feature
00302   $fsql = "SELECT F.feature_id, F.name, F.uniquename,O.genus, ".
00303          "    O.species,CVT.name as cvname,F.residues,F.organism_id ".
00304          "FROM {FEATURE} F ".
00305          "  INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
00306          "  INNER JOIN Organism O ON F.organism_id = O.organism_ID ".
00307          "WHERE F.feature_id = %d";
00308   $feature = db_fetch_object(chado_query($fsql, $feature_id));
00309 
00310   // get the synonyms for this feature
00311   $synsql = "SELECT S.name ".
00312             "FROM {feature_synonym} FS ".
00313             "  INNER JOIN {synonym} S on FS.synonym_id = S.synonym_id ".
00314             "WHERE FS.feature_id = %d";
00315   $synonyms = chado_query($synsql, $feature_id);
00316 
00317   // now add these synonyms to the feature object as a single string
00318   $synstring = '';
00319   while ($synonym = db_fetch_object($synonyms)) {
00320     $synstring .= "$synonym->name\n";
00321   }
00322   $feature->synonyms = $synstring;
00323 
00324   // check to make sure that we don't have any nodes with this feature name as a title
00325   // but without a corresponding entry in the chado_feature table if so then we want to
00326   // clean up that node.  (If a node is found we don't know if it belongs to our feature or
00327   // not since features can have the same name/title.)
00328   $tsql =  "SELECT * FROM {node} N ".
00329            "WHERE title = '%s'";
00330   $cnsql = "SELECT * FROM {chado_feature} ".
00331            "WHERE nid = %d";
00332   $nodes = db_query($tsql, $feature->name);
00333   // cycle through all nodes that may have this title
00334   while ($node = db_fetch_object($nodes)) {
00335     $feature_nid = db_fetch_object(db_query($cnsql, $node->nid));
00336     if (!$feature_nid) {
00337       drupal_set_message(t("%feature_id: A node is present but the chado_feature entry is missing... correcting", array('%feature_id' => $feature_id)));
00338       node_delete($node->nid);
00339     }
00340   }
00341 
00342   // check if this feature already exists in the chado_feature table.
00343   // if we have a chado feature, we want to check to see if we have a node
00344   $cfsql = "SELECT * FROM {chado_feature} ".
00345            "WHERE feature_id = %d";
00346   // @coder-ignore: don't need to use db_rewrite_sql() since need all nodes regardless of access control
00347   $nsql =  "SELECT * FROM {node} N ".
00348            "WHERE nid = %d";
00349   $chado_feature = db_fetch_object(db_query($cfsql, $feature->feature_id));
00350   if ($chado_feature) {
00351     drupal_set_message(t("%feature_id: A chado_feature entry exists", array('%feature_id' => $feature_id)));
00352     $node = db_fetch_object(db_query($nsql, $chado_feature->nid));
00353     if (!$node) {
00354       // if we have a chado_feature but not a node then we have a problem and
00355       // need to cleanup
00356       drupal_set_message(t("%feature_id: The node is missing, but has a chado_feature entry... correcting", array('%feature_id' => $feature_id)));
00357       $df_sql = "DELETE FROM {chado_feature} WHERE feature_id = %d";
00358       db_query($df_sql, $feature_id);
00359     }
00360     else {
00361       drupal_set_message(t("%feature_id: A corresponding node exists", array('%feature_id' => $feature_id)));
00362       $create_node = 0;
00363     }
00364   }
00365 
00366   // if we've encountered an error then just return.
00367   if ($error_msg = db_error()) {
00368     //print "$error_msg\n";
00369     return '';
00370   }
00371 
00372   // if a drupal node does not exist for this feature then we want to
00373   // create one.  Note that the node_save call in this block
00374   // will call the hook_submit function which
00375   if ($create_node) {
00376     // get the organism for this feature
00377     $sql = "SELECT * FROM {organism} WHERE organism_id = %d";
00378     $organism = db_fetch_object(chado_query($sql, $feature->organism_id));
00379 
00380     drupal_set_message(t("%feature_id: Creating node $feature->name", array('%feature_id' => $feature_id)));
00381     $new_node = new stdClass();
00382     $new_node->type = 'chado_feature';
00383     $new_node->uid = $user->uid;
00384     $new_node->title = "$feature->name, $feature->uniquename ($feature->cvname) $organism->genus $organism->species";
00385     $new_node->fname = "$feature->name";
00386     $new_node->uniquename = "$feature->uniquename";
00387     $new_node->feature_id = $feature->feature_id;
00388     $new_node->residues = $feature->residues;
00389     $new_node->organism_id = $feature->organism_id;
00390     $new_node->feature_type = $feature->cvname;
00391     $new_node->synonyms = $feature->synonyms;
00392 
00393     // validate the node and if okay then submit
00394     node_validate($new_node);
00395     if ($errors = form_get_errors()) {
00396       print "Error encountered validating new node. Cannot sync\n";
00397       foreach ($errors as $key => $msg) {        
00398         watchdog('trp-fsync', "%msg", array('%msg' => $msg), 'error');
00399       }
00400       exit;
00401     }
00402     else {
00403       $node = node_submit($new_node);
00404       node_save($node);
00405     }
00406   }
00407   else {
00408     $node = $chado_feature;
00409   }
00410 
00411 
00412   // set the taxonomy for this node
00413   drupal_set_message(t("%feature_id ($node->nid): setting taxonomy", array('%feature_id' => $feature_id)));
00414   tripal_feature_set_taxonomy($node, $feature_id);
00415 
00416   // reindex the node
00417   // drupal_set_message(t("$feature_id( $node->nid): indexing"));
00418   // tripal_feature_index_feature ($feature_id,$node->nid);
00419 
00420   // set the URL alias for this node
00421   tripal_feature_set_feature_url($node, $feature);
00422 
00423 
00424   return '';
00425 }
00426 
 All Classes Files Functions Variables