|
Thursday, January 12, 2006
Author a Monad Cmdlet as Web Service Client
Search-Entrez, which works as NCBI eUtils web service client to build your own Entrez data pipeline.
For those who don't use Entrez: Entrez is the integrated, text-based search and retrieval system used at NCBI for the major databases, including PubMed, Nucleotide and Protein Sequences, Protein Structures, Complete Genomes, Taxonomy, and others. If you were a biologist, you probably use Entrez everyday. The graphic below for a breif view of Entrez integration.
Usually general user will access Entrez by its web form interface. For data mining NCBI made a utility called: eUtils, which provide a URL tool as well as a SOAP interface. The URL tool requires to construct a customized search URL and returns results as XML. To save some time, I decided to use SOAP interface (version 1.3a). More help on eUtils utility can be found here. Information about SOAP interface can be find here.
eUtils have contain several tools :
1. Create a "Class Libaray" in Visual C# Express.
2. Project -> Add Reference -> System.Management.Automation
3. Project -> Add Web Reference -> http://eutils.ncbi.nlm.nih.gov/entrez/eutils/soap/eutils_lite.wsdl (without EFech tool) -> Go -> Change Web Refernece name to "eUtils"
4. Create new class derived from MshSnapin object.
5. Create new class derived from Cmdlet object.
6. Build project to EntrezSnapin.dll.
7. Install mshsnapin using .NET SDK installutils.exe tool.
C# Code:
[Edit: Monad has now been renamed to Windows PowerShell. This script or discussion may require slight adjustments before it applies directly to newer builds.]
Steps to author a cmdlet,
For those who don't use Entrez: Entrez is the integrated, text-based search and retrieval system used at NCBI for the major databases, including PubMed, Nucleotide and Protein Sequences, Protein Structures, Complete Genomes, Taxonomy, and others. If you were a biologist, you probably use Entrez everyday. The graphic below for a breif view of Entrez integration.
Usually general user will access Entrez by its web form interface. For data mining NCBI made a utility called: eUtils, which provide a URL tool as well as a SOAP interface. The URL tool requires to construct a customized search URL and returns results as XML. To save some time, I decided to use SOAP interface (version 1.3a). More help on eUtils utility can be found here. Information about SOAP interface can be find here.
eUtils have contain several tools :
- EInfo: Provides field index term counts, last update, and available links for each database.
- ESearch: Searches and retrieves primary IDs (for use in EFetch, ELink, and ESummary) and term translations and optionally retains results for future use in the user's environment.
- EPost: Posts a file containing a list of primary IDs for future use in the user's environment to use with subsequent search strategies.
- ESummary: Retrieves document summaries from a list of primary IDs or from the user's environment.
- EFetch: Retrieves records in the requested format from a list of one or more primary IDs or from the user's environment.
- ELink: Checks for the existence of an external or Related Articles link from a list of one or more primary IDs. Retrieves primary IDs and relevancy scores for links to Entrez databases or Related Articles; creates a hyperlink to the primary LinkOut provider for a specific ID and database, or lists LinkOut URLs and Attributes for multiple IDs.
- EGQuery: Provides Entrez database counts in XML for a single search using Global Query.
- ESpell: Retrieves spelling suggestions.
1. Create a "Class Libaray" in Visual C# Express.
2. Project -> Add Reference -> System.Management.Automation
3. Project -> Add Web Reference -> http://eutils.ncbi.nlm.nih.gov/entrez/eutils/soap/eutils_lite.wsdl (without EFech tool) -> Go -> Change Web Refernece name to "eUtils"
4. Create new class derived from MshSnapin object.
5. Create new class derived from Cmdlet object.
6. Build project to EntrezSnapin.dll.
7. Install mshsnapin using .NET SDK installutils.exe tool.
set-alias installutil $env:windir\Microsoft.NET\Framework\v2.0.50727\installutil8. Load mshsnapin using add-mshsnapin cmdlet
installutil EntrezSnapin.dll
get-mshsnapin -reg9. Now you can enjoy powerful Entrez search tool under msh prompt.
add-mshsnapin EntrezSnapin
C# Code:
using System;
using System.ComponentModel;
using System.Management.Automation;
using System.Collections;
namespace Entrez
{
/// <summary> This class defines the properties of a snapin</summary>
[RunInstaller(true)]
public class EntrezSnapin : MshSnapIn
{
/// <summary>Creates an instance of DemonSnapin class.</summary>
public EntrezSnapin() : base()
{
}
///<summary>The snapin name which is used for registration</summary>
public override string Name
{ get
{ return "EntrezSnapin";
}
}
/// <summary>Gets vendor of the snapin.</summary>
public override string Vendor
{ get
{ return "http://mshforfun.blogspot.com";
}
}
/// <summary>Gets description of the snapin. </summary>
public override string Description
{ get
{ return "Cmdlets to build your own NCBI Entrez Data pipeline";
}
}
}
/// <summary>
/// Submit search to entrez database and return results IDs
/// </summary>
[Cmdlet("search", "entrez", SupportsShouldProcess = true)]
public class SearchEntrezCmd : Cmdlet
{
#region Parameters
private string db="pubmed";
/// <summary>Entrez Database</summary>
[Parameter(Mandatory=true, Position=0)]
public string Database
{
get { return db; }
set { db = value; }
}
private string term;
/// <summary>search terms or phrases with or without Boolean operators</summary>
[Parameter(Mandatory = true, Position = 1)]
public string Keywords
{
get { return term; }
set { term = value; }
}
private string retmax = "50";
/// <summary>Maximum number of records to return</summary>
[Parameter(Position = 2)]
public string MaxRecord
{
get { return retmax; }
set { retmax = value; }
}
private string email = "";
/// <summary>To identify your search results history on server</summary>
[Parameter(Position = 3)]
public string Email
{
get { return email; }
set { email = value; }
}
private string field = "";
/// <summary>specific search field for pubmed database</summary>
[Parameter(Position = 4)]
public string Field
{
get { return field; }
set { field = value; }
}
private string reldate = "";
/// <summary>Limit items a number of days immediately preceding today's date</summary>
[Parameter(Position = 5)]
public string RelativeDate
{
get { return reldate; }
set { reldate = value; }
}
private string mindate = "";
/// <summary>Lower bounded of two specific dates</summary>
[Parameter(Position = 6)]
public string MinimumDate
{
get { return mindate; }
set { mindate = value; }
}
private string maxdate = "";
/// <summary>Lower bounded of two specific dates</summary>
[Parameter(Position = 7)]
public string MaximumDate
{
get { return maxdate; }
set { maxdate = value; }
}
private string datetype = "";
/// <summary>Limit dates to a specific date field based on database</summary>
[Parameter(Position = 8)]
public string DateType
{
get { return datetype; }
set { datetype = value; }
}
#endregion
private string usehistory = "y";
private string tool = "Monad_Entrez_Client";
private string retstart = "0";
private string rettype = "uilist";
private string sort = "";
/// <summary>
/// Communicating purpose only
/// </summary>
protected override void BeginProcessing()
{
WriteVerbose("Searching Entrez database...\r\nDatabase: " + db + "\r\nKeywords: " + term);
}
/// <summary>
/// Get Entrez search results
/// </summary>
protected override void ProcessRecord()
{
if (ShouldProcess("Searching Entrez database...\r\nDatabase: " + db + "\r\nKeywords: " + term))
{
try
{
WriteVerbose("Creating Entrez WebService...");
eUtils.eUtilsService serv = new eUtils.eUtilsService();
WriteDebug("WebService:" + serv.ToString());
WriteVerbose("Submit search...");
// call NCBI ESearch utility
// NOTE: search term should be URL encoded
eUtils.eSearchResultType Search_Results = serv.run_eSearch_MS(db, term, "", "", usehistory, tool, email, field, reldate, mindate, maxdate, datetype, retstart, retmax, rettype, sort );
WriteDebug("Search Results:" + Search_Results.ToString());
if (Search_Results.ERROR != null)
{
ThrowTerminatingError(new ErrorRecord(new Exception("web service method error"), "eUtils.run_eSearch_MS", ErrorCategory.InvalidResult, Search_Results));
}
WriteVerbose("Number of results item found:" + Search_Results.Count);
WriteVerbose("Getting results summary...");
// call NCBI ESummary utility
// NOTE: search term should be URL encoded
eUtils.eSummaryResultType Summary_Results = serv.run_eSummary_MS( db, "", Search_Results.WebEnv, Search_Results.QueryKey, Search_Results.RetStart, Search_Results.RetMax, tool, email );
WriteDebug("Results summary:" + Summary_Results.ToString());
if (Summary_Results.ERROR != null)
{
ThrowTerminatingError(new ErrorRecord(new Exception("web service method error"), "eUtils.run_eSummary_MS", ErrorCategory.InvalidResult, Summary_Results));
}
WriteVerbose("Number of item retrieved:" + Summary_Results.DocSum.Length);
WriteVerbose("Write results to pipline...");
// results output
WriteObject(Summary_Results);
}
catch (Exception e)
{
ThrowTerminatingError(new ErrorRecord(e, "eUtils", ErrorCategory.ResourceUnavailable, db+term));
}
}
}
}
}
[Edit: Monad has now been renamed to Windows PowerShell. This script or discussion may require slight adjustments before it applies directly to newer builds.]
Tags: msh monad PowerShell
Post a Comment