Miscellaneous utilities
Description
Miscellaneous utilities.
Usage
as.cat( x)
atts( x, exclude=cq( levels, class, dim, dimnames, names, row.names, tsp))
clamp( x, min, max)
clip( x, n=1)
compacto( x, gap, width, extra)
cq( ...)
deparse.names.parsably( x)
disatt( x, keep_=cq( levels, dim, dimnames, names, row.names, tsp), keep)
eclone( env)
empty.data.frame( ...)
env.name.string( env)
expanded.call( nlocal=sys.parent())
everyth( x, by=1, from=1)
find.funs(pos=1, ..., exclude.mcache = TRUE, mode="function")
find.lurking.envs(obj, delve=FALSE, trace=FALSE)
index( lvector)
integ(expr, lo, hi, what = "x", ..., args.to.integrate = list())
inv.logit( q)
is.dir( dir)
isF( x)
isT( x)
legal.filename( name)
logit( x)
lsall( ...)
masked( pos)
masking( pos=1)
mkdir( dirlist)
most.recent( lvec)
mwhere( x, cond)
my.all.equal( x, y, ...)
named( x)
nscat( fmt, ..., sep='\n', file='')
nscatn( fmt, ..., sep='\n', file='')
option.or.default( opt.name, default=NULL)
pos( substrs, mainstrs, any.case = FALSE, names.for.output)
put.in.session( ...)
rename.els( ..., ignore.missing=FALSE)
returnList( ...)
safe.rbind( df1, df2) # Deprecated in 2013
scatn( fmt, ..., sep='\n', file='', append=FALSE)
sqr( x)
to.regexpr( x)
undent( s)
xgsub( x, pattern, replacement, perl=!fixed, fixed=FALSE, ...)
xsub( x, pattern, replacement, perl=!fixed, fixed=FALSE, ...)
yes.no( prompt, default)
as.cat( x)
atts( x, exclude=cq( levels, class, dim, dimnames, names, row.names, tsp))
clamp( x, min, max)
clip( x, n=1)
compacto( x, gap, width, extra)
cq( ...)
deparse.names.parsably( x)
disatt( x, keep_=cq( levels, dim, dimnames, names, row.names, tsp), keep)
eclone( env)
empty.data.frame( ...)
env.name.string( env)
expanded.call( nlocal=sys.parent())
everyth( x, by=1, from=1)
find.funs(pos=1, ..., exclude.mcache = TRUE, mode="function")
find.lurking.envs(obj, delve=FALSE, trace=FALSE)
index( lvector)
integ(expr, lo, hi, what = "x", ..., args.to.integrate = list())
inv.logit( q)
is.dir( dir)
isF( x)
isT( x)
legal.filename( name)
logit( x)
lsall( ...)
masked( pos)
masking( pos=1)
mkdir( dirlist)
most.recent( lvec)
mwhere( x, cond)
my.all.equal( x, y, ...)
named( x)
nscat( fmt, ..., sep='\n', file='')
nscatn( fmt, ..., sep='\n', file='')
option.or.default( opt.name, default=NULL)
pos( substrs, mainstrs, any.case = FALSE, names.for.output)
put.in.session( ...)
rename.els( ..., ignore.missing=FALSE)
returnList( ...)
safe.rbind( df1, df2)
scatn( fmt, ..., sep='\n', file='', append=FALSE)
sqr( x)
to.regexpr( x)
undent( s)
xgsub( x, pattern, replacement, perl=!fixed, fixed=FALSE, ...)
xsub( x, pattern, replacement, perl=!fixed, fixed=FALSE, ...)
yes.no( prompt, default)
Arguments
x , y , n , gap , width , , ... , by , keep , keep_ , env , from , exclude , exclude.mcache , nlocal , lvector , dir , name , pos , frame , mode , dirlist , lvec , cond , opt.name , default , substrs , mainstrs , any.case , names.for.output , ignore.missing , df1 , df2 , prompt , obj , delve , trace , fmt , sep , append , file , expr , lo , hi , what , args.to.integrate , q , s , min , max , pattern , replacement , perl , fixed
|
see "Arguments by function"
|
Details
as.cat
makes a character vector print as if it was cat
ted rather than print
ed (one element per line, no extra quotes or backslashes, no [1] etc prefixes).
atts
returns the names of the attributes of x
, excluding any that are in exclude
.
clamp
clamps its 1st argument to the limits specified by the 2nd and 3rd. You can also just supply a range of values in the 2nd arg, and leave the 3rd missing. It's meant for use with pipes; see Examples.
clip
removes the last n
elements of x
. OBSOLETE— use head( x, -n)
instead.
compacto
gives a matrix an extra S3 class "compacto", which means it will print out with column names/label vertical and optionally no gaps between the columns. gap
and width
control the latter in fairly obvious ways. extra
controls what gets printed to help the eye follow vertical alignment. See Examples; there is a method print.compacto
which surely needs little further description.
cq
is handy for typing cq( alpha, beta, gamma)
instead of cq( "alpha", "beta", "gamma")
. Certain strings DO still require quotes around them, e.g. cq( "NULL", "1-2")
).
deparse.names.parsably
is like deparse
except that name
objects get wrapped in a call to as.name
, so that they won't be evaluated accidentally.
disatt
gets rid of most attributes on x
. If you want to preserve some, use keep
. The usually-default argument keep_
, which is merged with keep
, ensures that the "basic" attributes are retained; if you want to drop some of those too, you will have to modify keep_
. Note that S3 class is dropped by default, because some S3 objects may not make sense without certain attributes.
eclone
clones an environment
into a new one with the same parent, ie making deep copies of all the (non-environment) members, so that changing their values in the new env won't affect the original values (unlike if you just assign the old env to the new one). Functions whose environment was the original environment, will have their environment reset to the new one. If you don't understand that, then either don't worry be happy, or do more homework on R's environment
objects. See also Examples.
empty.data.frame
creates a template data frame with 0 rows but with all columns of the appropriate type. Useful for rbind
ing to later.
env.name.string
returns a string naming an environment; its name
attribute if there is one, or the name of its path
attribute if applicable, concatenated with the first line of what would be shown if you print
ed the argument. Unlike environmentName
, this will always return a non-empty string.
expanded.call
returns the full argument list available to its caller, including defaults where arguments were not set explicitly. The arguments may not be those originally passed, if they were modified before the invocation of expanded.call
. Default arguments which depend on calculations after the invocation of expanded.call
will lead to an error.
everyth
extracts every by
-th element of x
, starting at position from
.
find.funs
finds "function" objects (or objects of other modes, via the "mode" arg) in one or more environments, optionally matching a pattern.
find.lurking.envs( myobj)
will search through myobj
and all its attributes, returning the size of each sub-object. The size of environments is returned as Inf. The search is completely recursive, except for environments and by default the inner workings of functions; attributes of the entire function are always recursed. Changing the delve
parameter to TRUE ensures full recursion of function arguments and function bodies, which will show e.g. the srcref
structure; try it to see why the default is FALSE. find.lurking.envs
can be very useful for working out e.g. why the result of a model-fitting function is taking up 1000000MB of disk space; sometimes this is due to unnecessary environments in well-concealed places.
index
returns the position(s) of TRUE elements. Unlike which
: attributes are lost; NA elements map to NAs; index(<<length 0 object>>)
is numeric(0)
; index( <<non-logical>>)
is NA.
integ
is a handy wrapper for integrate
, that takes an expression rather than a function— so integ( sin(x), 0, 1)
"just works".
is.dir
tests for directoriness.
isF
and isT
test a logical scalar in the obvious way, with NA (and non-logicals) failing the test, to avoid teeeedious repetition of is( !is.na( my.complicated.expression) & my.complicated.expression) ...
. They are deliberately not vectorized (contrary to some versions of mvbutils
documentation); arguments with non-1 length trigger a warning.
legal.filename
coerces its character argument into a similar-looking string that is a legal filename on any (?) system.
logit
and inv.logit
apply those transformations (for those of us who can never remember what the stats package versions are called).
lsall
is like ls
but coerces all.names=TRUE
.
masked
checks which objects in search()[pos]
are masked by identically-named objects higher in the search path. masking
checks for objects mask identically-named objects lower in the search path. Namespaces may make the results irrelevant.
mkdir
makes directories; unlike dir.create
, it can do several levels at once.
most.recent
returns the highest-so-far position of TRUE within a logical vector, or 0 if TRUE has not occurred yet; most.recent( c(F,T,F,T))
returns c(0,2,2,4).
mwhere
subsets a data.frame
by row, just like %where%
(qv); it's for use in pipes, as per Examples.
my.all.equal
is like all.equal
, except that it returns FALSE in cases where all.equal
returns a non-logical-mode result.
named(x)
is just names(x) <- as.character( x); x
; useful for lapply
etc.
nscat
, nscatn
: see scatn
option.or.default
obsolete— use equivalent getOption()
instead.
pos
is probably to be eschewed in new code, in favour of gregexpr
with fixed=TRUE
, which is likely faster. (And I should rewrite it to use gregexpr
.) It's one of a few legacy functions in mvbutils
that pre-date improvements in base R. pos
will either search for several literal patterns in a single target, or vice versa– but not both. It returns a matrix showing the positions of the matching substrings, with as many columns as the maximum number of matches. 0 signifies "no match"; there is always at least one column even if there are no matches at all.
rename.els
replaces specified names of a vector with new ones.
returnList
returns a list corresponding to old-style (pre-R 1.8) return
syntax. Briefly: a single argument is returned as itself. Multiple arguments are returned in a list. The names of that list are the argument names if provided; or, for any unnamed argument that is just a symbolic name, that symbolic name; or no name at all, for other unnamed arguments. You can duplicate pre-1.8 behaviour of return(...)
via return(returnList(...))
.
safe.rbind
( Deprecated in 2013 ) mimics rbind
, but works round an R bug (I reckon) where a column appears to be a numeric in one data.frame
but a factor in the other. But I now think you should just sort your column classes/types properly in advance, rather than mixing types and relying on somewhat arbitrary conversion rules.
scatn
is just cat( sprintf( fmt, ...), "", file=file, sep=sep)
. scatn
prints a newline afterwards, but not before; nscat
does the opposite; nscatn
does both. If you're just displaying a "title" before calling print
, use nscat
.
sqr
squares its argument (i.e. multiplies the argument by itself), without the risk that x^2
might incur exponentiation.
to.regexpr
converts literal strings to their equivalent regexps, e.g. by doubling backslashes. Useful if you want "fixed=TRUE" to apply only to a portion of your regexp.
undent
is handy when you want a slab of multi-line text inside some function you are writing. Raw-string syntax helps a lot (see the final examples of ?Quotes
), but indentation is horrible and the first line is out-of-step with the rest. You ideally want your text to appear indented at whatever looks nice inside your code, but for the actual string not to be indented. So, start your raw string with a newline, and wrap the string in undent
, and all will be well.
xsub
and xgsub
are for pipes. They are just like sub
and gsub
, except that the x
argument comes first, and that there is a default of perl=TRUE
(unless you set fixed=TRUE
). So you can write eg str |> xsub( "old", "new")
rather than str |> sub( "old", "new", x=_)
or sub( "old", "new", str)
. It's just better.
yes.no
cat
s its "prompt" argument and waits for user input. if the user input pmatch
es "yes" or "YES", then yes.no
returns TRUE
; if the input pmatch
es no
or NO
then yes.no
returns FALSE
; if the input is ” and default
is set, then yes.no
returns default
; otherwise it repeats the question. You probably want to put a space at the end of prompt
.
Value
as.cat |
character vector of class cat
|
clip |
vector of the same mode as x
|
cq |
character vector
|
empty.data.frame |
data.frame
|
env.name.string |
a string
|
expanded.call |
a call object
|
everyth |
same type as x
|
find.funs |
a character vector of function names
|
find.lurking.envs |
a data.frame with columns "what" and "size"
|
integ |
scalar
|
inv.logit |
numeric vector
|
is.dir |
logical vector
|
is.nonzero |
TRUE or FALSE
|
isF , isT
|
TRUE or FALSE
|
legal.filename |
character( 1)
|
logit |
numeric vector
|
masked |
character vector
|
masking |
character vector
|
mclip |
possibly-modified version of x
|
mkdir |
logical vector of success/failure
|
nscat |
NULL
|
nscatn |
NULL
|
most.recent |
integer vector the same length as lvec , with values in the range (0,length(lvec)).
|
named |
vector of the same mode as x
|
option.or.default |
option's value
|
pos |
numeric matrix, one column per match found plus one; at least one column guaranteed
|
rename.els |
whatever the first argument was, with new names
|
returnList |
list or single object
|
safe.rbind |
data.frame
|
scatn |
NULL
|
to.regexpr |
character
|
undent |
string
|
xgsub |
character
|
xsub |
character
|
yes.no |
TRUE or FALSE
|
Arguments by function
- as.cat
-
x: character vector that you want to be displayed via cat( x, sep="\n")
- atts
-
x: any object; exclude: a character vector whatever quotidian attributes that you are not interested in knowing about
- clip
-
x: a vector or list
- clip
-
n: integer saying how many elements to clip from the end of x
- cq
-
...: quoted or unquoted character strings, to be substitute
d and then concatenated
- deparse.names.parsably
-
x: any object for deparse
- name
objects treated specially
- eclone
-
env: an environment
- empty.data.frame
-
...: named length-1 vectors of appropriate mode, e.g. "first.col=”"
- env.name.string
-
env: environment
- expanded.call
-
nlocal: frame to retrieve arguments from. Normally, use the default; see mlocal
.
- everyth
-
x: subsettable thing. by: step between values to extract. from: first position.
- find.funs
-
...: extra arguments for objects
. Usually just "pattern" for regexp searches.
- find.funs
-
exclude.mcache: if TRUE (default), don't look at mlazy
objects
- find.funs
-
mode: "function" to look for functions, "environment" to look for environments, etc
- find.lurking.envs
-
delve: whether to recurse into function arguments and function bodies
- find.lurking.envs
-
trace: just a debugging aid– leave as FALSE
- index
-
lvector: vector of TRUE/FALSE/NA
- integ
-
expr: an expression; what: a string, the argument of expr
to be integrated over; lo, hi: limits; ...: other variables to be set in the expression; args.to.integrate: a list of other things to pass to integrate
- is.dir
-
dir: character vector of files to check existence and directoriness of.
- isF, isT
-
x: anything, but meant to be a logical scalar
- legal.filename
-
name: character string to be modified
- find.funs
-
pos: list of environments, or vector of char or numeric positions in search path.
- lsall
-
...: as for ls
, except that all.names
will be coerced to TRUE
- masking, masked
-
pos: position in search path
- mclip
-
x: thing to be clipped (usually numeric, but character should work)— dimensions and other attributes are preserved; min, max: clipping range
- mkdir
-
dirlist: character vector of directories to create
- most.recent
-
logical vector
- my.all.equal
-
x, y: anything; ...: passed to all.equal
- named
-
x: character vector which will become its own names
attribute
- nscat, nscatn
-
see scatn
- option.or.default
-
opt.name: character(1)
- option.or.default
-
default: value to be returned if there is no option
called "opt.name"
- pos
-
substrs: character vector of patterns (literal not regexpr)
- pos
-
mainstrs: character vector to search for substrs
in.
- pos
-
any.case: logical- ignore case?
- pos
-
names.for.output: character vector to label rows of output matrix; optional
- put.in.session
-
...: a named set of objects, to be assign
ed into the mvb.session.info
search environment
- rename.els
-
...: the first argument is the thing to rename. Subsequent args like X=<some expr giving a string result>
mean that whichever element of the first arg was called "X", will now be called the result of that expression. ignore.missing=TRUE
means that requests to rename non-existent elements will be ignored; otherwise, they will throw an error.
- returnList
-
...: named or un-named arguments, just as for return
before R 1.8.
- safe.rbind
-
df1, df2: data.frame
or list
- scatn, nscat
-
fmt, ...: as per sprintf
; file, sep, append: as per cat
- to.regexpr
-
x: character vector
- undent
-
s: string, presumably a "raw string".
- xgsub
-
x, pattern, replacement, perl=!fixed, fixed= FALSE, ...: as per gsub
- xsub
-
as per xgsub
- yes.no
-
prompt: string to put before asking for input
- yes.no
-
default: value to return if user just presses <ENTER>
Author(s)
Mark Bravington
Examples
# as.cat
ugly.bugly <- c( 'A rose by any other name', 'would annoy taxonomists')
ugly.bugly
#[1] "A rose by any other name" "would annoy taxonomists"
as.cat( ugly.bugly) # calls print.cat--- no clutter
#A rose by any other name
#would annoy taxonomists
x <- structure( matrix( 1:4, 2, 2), baggage='purple suitcase')
atts( x) # will not print "dim" since that is in default 'exclude' list
#[1] "baggage"
1:7 |> clamp( 2, 4)
#[1] 2 2 3 4 4 4 4
1:7 |> clamp( 2:4)
#[1] 2 2 3 4 4 4 4
clip( 1:5, 2) # 1:3
cq( alpha, beta) # c( "alpha", "beta")
x <- matrix( 1:4, 2, 2)
compacto( x)
compacto( x, extra='|', width=3) # similar to gap... yet different
colnames( x) <- c( 'Gogol', 'Turgenev')
compacto( x)
x <- 6
attr( x, 'massive') <- 1:1e5
x
disatt( x)
old_env <- new.env()
evalq( envir=old_env, {
x <- 3
fun <- function() x
})
new_env <- eclone( old_env)
new_env$x <- 5
new_env$fun() # 5
lazy_env <- old_env
lazy_env$x <- 4
old_env$x # 4 ! Take care with environments...
old_env$fun() # 4 of course
new_env$x # 5 phew
empty.data.frame( a=1, b="yes")
# data.frame with 0 rows of columns "a" (numeric) and "b" (character)
empty.data.frame( a=1, b=factor( c( "yes", "no")))$b
# factor with levels c( "no", "yes")
everyth( 1:10, 3, 5) # c( 5, 8)
f <- function( a=9, b) expanded.call(); f( 3, 4) # list( a=3, b=4)
find.funs( "package:base", patt="an") # "transform" etc.
find.lurking.envs( cd)
# what size
#1 attr(obj, "source") 5368
#2 obj 49556
#3 environment(obj) <: namespace:mvbutils> Inf
## Not run:
eapply( .GlobalEnv, find.lurking.envs)
## End(Not run)
integ( sin(x), 0, 1) # [1] 0.4597
integ( sin(x+a), a=5, 0, 1) # [1] -0.6765; 'a' is "passed" to 'expr'
integ( sin(y+a), what='y', 0, 1, a=0) # [1] 0.4597; arg is 'y' not 'x'
is.dir( getwd()) # TRUE
isF( FALSE) # TRUE
isF( NA) # FALSE
isF( c( FALSE, FALSE)) # FALSE, with a warning
sapply( c( FALSE, NA, TRUE), isF)
# [1] TRUE FALSE FALSE
sapply( c( FALSE, NA, TRUE), isT)
# [1] FALSE FALSE TRUE
legal.filename( "a:b\\c/d&f") # "a.b.c.d&f"
most.recent( c( FALSE,TRUE,FALSE,TRUE)) # c( 0, 2, 2, 4)
# mwhere for subsetting: find vowels whose alphetic position is a multiple of 5
df <- data.frame( x=1:10, y=LETTERS[ 1:10])
# Base-R pipes may not exist for the R version being used here
# So, try to parse the expression first...
pp <- try( parse( text=
'df |> mwhere( x %% 5 == 0) |> mwhere( y %in% cq( A,E,I,O,U))'
))
if( pp %is.not.a% 'try-error') eval( pp[[1]]) # just E-row
sapply( named( cq( alpha, beta)), nchar) # c( alpha=5, beta=4)
pos( cq( quick, lazy), "the quick brown fox jumped over the lazy dog")
# matrix( c( 5, 37), nrow=2)
pos( "quick", c( "first quick", "second quick quick", "third"))
# matrix( c( 7,8,0, 0,14,0), nrow=3)
pos( "quick", "slow") # matrix( 0)
x <- c( Cat='good', Dog='bad')
rename.els( x, Cat='Armadillo')
# Armadillo Dog
# "good" "bad"
try( rename.els( x, Zorilla='Bandicoot'))
# Error in rename.els(x, Zorilla = "Bandicoot") : all(present) is not TRUE
rename.els( x, Zorilla='Bandicoot', ignore.missing=TRUE)
# Cat Dog
# "good" "bad"
f <- function() { a <- 9; return( returnList( a, a*a, a2=a+a)) }
f() # list( a=9, 81, a2=18)
scatn( 'Things %i', 1:3)
nscat( 'Things %i', 1:3)
nscatn( 'Things %i', 1:3)
to.regexpr( "a{{") # "a\\{\\{"
test <- undent( r"--{
I can indent this
howsoever I like.
New paragraph!
}--")
as.cat( test)
longstring <- 'Bollocks, then the good stuff, then more bollocks'
longstring |> xsub( ',[^,]*$', '') |> xsub( '.*, *', '')
# "then the good stuff"
## Not run:
mkdir( "subdirectory.of.getwd")
yes.no( "OK (Y/N)? ")
masking( 1)
masked( 5)
## End(Not run)
ugly.bugly <- c( 'A rose by any other name', 'would annoy taxonomists')
ugly.bugly
as.cat( ugly.bugly)
x <- structure( matrix( 1:4, 2, 2), baggage='purple suitcase')
atts( x)
1:7 |> clamp( 2, 4)
1:7 |> clamp( 2:4)
clip( 1:5, 2)
cq( alpha, beta)
x <- matrix( 1:4, 2, 2)
compacto( x)
compacto( x, extra='|', width=3)
colnames( x) <- c( 'Gogol', 'Turgenev')
compacto( x)
x <- 6
attr( x, 'massive') <- 1:1e5
x
disatt( x)
old_env <- new.env()
evalq( envir=old_env, {
x <- 3
fun <- function() x
})
new_env <- eclone( old_env)
new_env$x <- 5
new_env$fun()
lazy_env <- old_env
lazy_env$x <- 4
old_env$x
old_env$fun()
new_env$x
empty.data.frame( a=1, b="yes")
empty.data.frame( a=1, b=factor( c( "yes", "no")))$b
everyth( 1:10, 3, 5)
f <- function( a=9, b) expanded.call(); f( 3, 4)
find.funs( "package:base", patt="an")
find.lurking.envs( cd)
eapply( .GlobalEnv, find.lurking.envs)
integ( sin(x), 0, 1)
integ( sin(x+a), a=5, 0, 1)
integ( sin(y+a), what='y', 0, 1, a=0)
is.dir( getwd())
isF( FALSE)
isF( NA)
isF( c( FALSE, FALSE))
sapply( c( FALSE, NA, TRUE), isF)
sapply( c( FALSE, NA, TRUE), isT)
legal.filename( "a:b\\c/d&f")
most.recent( c( FALSE,TRUE,FALSE,TRUE))
df <- data.frame( x=1:10, y=LETTERS[ 1:10])
pp <- try( parse( text=
'df |> mwhere( x %% 5 == 0) |> mwhere( y %in% cq( A,E,I,O,U))'
))
if( pp %is.not.a% 'try-error') eval( pp[[1]])
sapply( named( cq( alpha, beta)), nchar)
pos( cq( quick, lazy), "the quick brown fox jumped over the lazy dog")
pos( "quick", c( "first quick", "second quick quick", "third"))
pos( "quick", "slow")
x <- c( Cat='good', Dog='bad')
rename.els( x, Cat='Armadillo')
try( rename.els( x, Zorilla='Bandicoot'))
rename.els( x, Zorilla='Bandicoot', ignore.missing=TRUE)
f <- function() { a <- 9; return( returnList( a, a*a, a2=a+a)) }
f()
scatn( 'Things %i', 1:3)
nscat( 'Things %i', 1:3)
nscatn( 'Things %i', 1:3)
to.regexpr( "a{{")
test <- undent( r"--{
I can indent this
howsoever I like.
New paragraph!
}--")
as.cat( test)
longstring <- 'Bollocks, then the good stuff, then more bollocks'
longstring |> xsub( ',[^,]*$', '') |> xsub( '.*, *', '')
mkdir( "subdirectory.of.getwd")
yes.no( "OK (Y/N)? ")
masking( 1)
masked( 5)