dependenciesorg.clojure/clojure |
| 1.5.1 | net.sourceforge.htmlunit/htmlunit |
| 2.13 | se.fishtank/css-selectors |
| 1.0.2 |
|
(this space intentionally left almost blank) |
| |
| |
| ( ns yokogiri.core
( :require [ clojure.java.io :as io ] )
( :import [ com.gargoylesoftware.htmlunit StringWebResponse WebClient BrowserVersion WebClientOptions ]
[ com.gargoylesoftware.htmlunit.html HtmlPage DomNode DomAttr HTMLParser ]
[ org.w3c.dom NamedNodeMap Node ]
[ se.fishtank.css.selectors.dom DOMNodeSelector ] ) )
|
|
| ( set! *warn-on-reflection* true )
|
|
Returns the client options object for a WebClient.
| ( defn- web-client-options
[ ^ WebClient client ] ( .getOptions client ) )
|
|
| ( def set-client-options-map
{ :activex-native #( .setActiveXNative ^ WebClientOptions %1 %2 )
:applet #( .setAppletEnabled ^ WebClientOptions %1 %2 )
:block-popups #( .setPopupBlockerEnabled ^ WebClientOptions %1 %2 )
:css #( .setCssEnabled ^ WebClientOptions %1 %2 )
:geolocation #( .setGeolocationEnabled ^ WebClientOptions %1 %2 )
:homepage #( .setHomePage ^ WebClientOptions %1 %2 )
:insecure-ssl #( .setUseInsecureSSL ^ WebClientOptions %1 %2 )
:print-content-on-failing-status #( .setPrintContentOnFailingStatusCode ^ WebClientOptions %1 %2 )
:redirects #( .setRedirectEnabled ^ WebClientOptions %1 %2 )
:throw-on-failing-status #( .setThrowExceptionOnFailingStatusCode ^ WebClientOptions %1 %2 )
:throw-on-script-error #( .setThrowExceptionOnScriptError ^ WebClientOptions %1 %2 )
:timeout #( .setTimeout ^ WebClientOptions %1 %2 )
:tracking #( .setDoNotTrackEnabled ^ WebClientOptions %1 %2 )
:javascript #( .setJavaScriptEnabled ^ WebClientOptions %1 %2 ) } )
|
|
| ( declare ^ :dynamic *client* )
|
|
Sets options on the client.
Usage:
(let [client (make-client)]
(set-client-options! client {:redirects false}))
;=> #<WebClient com.gargoylesoftware.htmlunit.WebClient@7622ccf2>
Available Options:
:activex-native bool
:applet bool
:css bool
:geolocation bool
:insecure-ssl bool
:print-content-on-failing-status bool
:redirects bool
:throw-on-failing-status bool
:throw-on-script-error bool
:tracking bool
:javascript bool
:homepage string
:timeout integer
| ( defn set-client-options!
( [ opts ] ( set-client-options! *client* opts ) )
( [ ^ WebClient client opts ]
( let [ ^ WebClientOptions client-opts ( web-client-options client ) ]
( doseq [ [ k v ] opts ]
( let [ setter-fn ( get set-client-options-map k ) ]
( setter-fn client-opts v ) ) )
client ) ) )
|
|
Returns a map of all options currently set on a client.
Usage:
user> (let [client (make-client :redirects false)]
(get-client-options client))
;=> {:javascript true, :redirects false, ...}
| ( defn get-client-options
[ ^ WebClient client ]
( let [ ^ WebClientOptions opts ( web-client-options ^ WebClient client ) ]
{ :activex-native ( . opts isActiveXNative )
:applet ( . opts isAppletEnabled )
:block-popups ( . opts isPopupBlockerEnabled )
:css ( . opts isCssEnabled )
:geolocation ( . opts isGeolocationEnabled )
:homepage ( . opts getHomePage )
:insecure-ssl ( . opts isUseInsecureSSL )
:javascript ( . opts isJavaScriptEnabled )
:print-content-on-failing-status-code ( . opts getPrintContentOnFailingStatusCode )
:redirects ( . opts isRedirectEnabled )
:throw-on-failing-status ( . opts isThrowExceptionOnFailingStatusCode )
:throw-on-script-error ( . opts isThrowExceptionOnScriptError )
:timeout ( . opts getTimeout )
:tracking ( . opts isDoNotTrackEnabled ) } ) )
|
|
Constructs a new WebClient.
Usage:
user> (make-client)
;=> #<WebClient com.gargoylesoftware.htmlunit.WebClient@124d43a8>
With Options:
user> (make-client :geolocation true
:block-popups false)
;=> #<WebClient com.gargoylesoftware.htmlunit.WebClient@4473f04f>
Available Options:
:activex-native bool
:applet bool
:css bool
:geolocation bool
:insecure-ssl bool
:print-content-on-failing-status bool
:redirects bool
:throw-on-failing-status bool
:throw-on-script-error bool
:tracking bool
:javascript bool
:homepage string
:timeout integer
See also: yokogiri.core/set-client-options!
| ( defn make-client
[ & { :as opts } ]
( let [ client ( new WebClient ) ]
( if-not ( empty? opts )
( set-client-options! ( new WebClient ) opts )
client ) ) )
|
|
| ( defonce ^ :dynamic *client* ( make-client ) )
|
|
Takes a client which will be bound to client
within the scope of the form.
Usage:
user> (with-client (make-client :javascript false)
(get-page "http://www.example.com/"))
;=> #<HtmlPage HtmlPage(http://www.example.com/)@1536532984>
| ( defmacro with-client
[ c & body ]
` ( binding [ *client* ~ c ]
~@ body ) )
|
|
Takes a string, returns an HtmlPage.
Usage:
user> (create-page "<html><body><a href=\"http://example.com\">Link</a></body></html>")
;=> #<HtmlPage HtmlPage(file://fake-response-url)@478170219>
| ( defn create-page
"Takes a string, returns an HtmlPage.
**Usage:**
user> (create-page \"<html><body><a href=\\\"http://example.com\\\">Link</a></body></html>\") [ xml ]
response ( StringWebResponse. xml url ) ]
( HTMLParser/parseHtml response ( .getCurrentWindow ( WebClient. ) ) ) ) )
|
|
Takes a client and a url, returns an HtmlPage.
Usage:
user> (get-page (make-client) "http://www.example.com/")
;=> #<HtmlPage HtmlPage(http://www.example.com/)@478170219>
| ( defn get-page
( [ url ] ( get-page *client* url ) )
( [ ^ WebClient client , ^ String url ]
( .getPage ^ WebClient client url ) ) )
|
|
Takes a path as a string and creates a Page you can access with
#'yokogiri.core/xpath, #'yokogiri.core/css, etc.
Usage:
user> (as-page "http://www.example.com/")
;=> #<HtmlPage HtmlPage(file:/home/user/yokogiri/docs/uberdoc.html)@171016649>
| ( defn as-page
( [ path ] ( as-page *client* path ) )
( [ client path ] ( ->> path io/file io/as-url str ( get-page client ) ) ) )
|
|
Takes an HtmlPage and an xpath string. Returns a vector of nodes
which match the provided xpath string.
Usage:
user> (let [page (get-page your-client "http://www.example.com")]
(xpath page "//a"))
;=> [#<HtmlAnchor HtmlAnchor[<a href="http://www.iana.org/domains/example">]>]
| ( defn xpath
[ ^ HtmlPage page , ^ String xpath ]
( into [ ] ( .getByXPath page xpath ) ) )
|
|
Takes an HtmlPage and an xpath string. Returns the first matching
node which matches the provided xpath string.
Usage:
user> (first-by-xpath
(get-page your-client "http://www.example.com/")
"//a")
;=> #<HtmlAnchor HtmlAnchor[<a href="http://www.iana.org/domains/example">]>
| ( defn first-by-xpath
[ ^ HtmlPage page , ^ String xpath ]
( .getFirstByXPath page xpath ) )
|
|
Returns matches for a given CSS selector
Usage:
user> (css your-client "a.gbzt")
;=> (#<HtmlAnchor HtmlAnchor[<a onclick...>]>, ...)
http://www.goodercode.com/wp/use-css-selectors-with-htmlunit/
TODO: Bumping the version of css-selectors to 1.0.4 breaks
querying by CSS.
| ( defn css
[ ^ HtmlPage page , ^ String selector ]
( let [ queryable-page ( DOMNodeSelector. ( . page getDocumentElement ) ) ]
( seq ( . queryable-page querySelectorAll selector ) ) ) )
|
|
Returns a node's XML representation.
Usage:
user> (node-xml
(first-by-xpath
(get-page (make-client) "http://www.example.com/")
"//a"))
;=> <a href="http://www.iana.org/domains/example">\
More information...\
\
| ( defn node-xml
"Returns a node's XML representation.
**Usage:**
user> (node-xml
(first-by-xpath
\"//a\"))
[ ^ DomNode node ]
( .asXml node ) )
|
|
Returns a node's text value
Usage:
user> (node-text #<HtmlAnchor HtmlAnchor[<a class="foo" id="bar" href="http://example.com">]>)
;=> "Search"
| ( defn node-text
[ ^ DomNode node ]
( .asText node ) )
|
|
Returns a clojure map of attributes for a given node
Usage:
user> (attr-map #<HtmlAnchor HtmlAnchor[<a class="foo" id="bar" href="http://example.com">]>)
;=> {:text "Search", :href "http://example.com", :id "bar", :class "foo"}
See also: yokogiri.core/attrs
| ( defn attr-map
[ ^ DomNode node ]
( let [ ^ NamedNodeMap attrs ( .getAttributes node ) ]
( loop [ acc 0 , res { } ]
( if ( = acc ( .getLength attrs ) )
( assoc res :text ( node-text node ) )
( recur ( inc acc )
( let [ ^ DomAttr attr ( .item attrs acc ) ]
( assoc res ( keyword ( .getName attr ) ) ( .getValue attr ) ) ) ) ) ) ) )
|
|
See also: yokogiri.core/attr-map
| ( def attrs #' yokogiri.core/attr-map )
|
|
Returns the HtmlUnit DomAttr objects for a given node
See also: yokogiri.core/attr-map
TODO: http://htmlunit.sourceforge.net/apidocs/com/gargoylesoftware/htmlunit/html/DomAttr.html
| ( defn- dom-attr
[ ^ DomNode node ]
( let [ ^ NamedNodeMap attrs ( .getAttributes node )
len ( .getLength attrs ) ]
( map #( .item attrs % ) ( range 0 len ) ) ) )
|
|
| |
| |